git subrepo pull --force deps/lightrec
[pcsx_rearmed.git] / deps / lightrec / constprop.c
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2022 Paul Cercueil <paul@crapouillou.net>
4  */
5
6 #include "constprop.h"
7 #include "disassembler.h"
8 #include "lightrec-private.h"
9
10 #include <stdbool.h>
11 #include <string.h>
12
13 static u32 get_min_value(const struct constprop_data *d)
14 {
15         /* Min value: all sign bits to 1, all unknown bits but MSB to 0 */
16         return (d->value & d->known) | d->sign | (~d->known & BIT(31));
17 }
18
19 static u32 get_max_value(const struct constprop_data *d)
20 {
21         /* Max value: all sign bits to 0, all unknown bits to 1 */
22         return ((d->value & d->known) | ~d->known) & ~d->sign;
23 }
24
25 static u32 lightrec_same_sign(const struct constprop_data *d1,
26                               const struct constprop_data *d2)
27 {
28         u32 min1, min2, max1, max2, a, b, c, d;
29
30         min1 = get_min_value(d1);
31         max1 = get_max_value(d1);
32         min2 = get_min_value(d2);
33         max2 = get_max_value(d2);
34
35         a = min1 + min2;
36         b = min1 + max2;
37         c = max1 + min2;
38         d = max1 + max2;
39
40         return ((a & b & c & d) | (~a & ~b & ~c & ~d)) & BIT(31);
41 }
42
43 static u32 lightrec_get_sign_mask(const struct constprop_data *d)
44 {
45         u32 imm;
46
47         if (d->sign)
48                 return d->sign;
49
50         imm = (d->value & BIT(31)) ? d->value : ~d->value;
51         imm = ~(imm & d->known);
52         if (imm)
53                 imm = 32 - clz32(imm);
54
55         return imm < 32 ? GENMASK(31, imm) : 0;
56 }
57
58 static void lightrec_propagate_addi(u32 rs, u32 rd,
59                                     const struct constprop_data *d,
60                                     struct constprop_data *v)
61 {
62         u32 end, bit, sum, min, mask, imm, value;
63         struct constprop_data result = {
64                 .value = v[rd].value,
65                 .known = v[rd].known,
66                 .sign = v[rd].sign,
67         };
68         bool carry = false;
69
70         /* clear unknown bits to ease processing */
71         v[rs].value &= v[rs].known;
72         value = d->value & d->known;
73
74         mask = ~(lightrec_get_sign_mask(d) & lightrec_get_sign_mask(&v[rs]));
75         end = mask ? 32 - clz32(mask) : 0;
76
77         for (bit = 0; bit < 32; bit++) {
78                 if (v[rs].known & d->known & BIT(bit)) {
79                         /* the bits are known - compute the resulting bit and
80                          * the carry */
81                         sum = ((u32)carry << bit) + (v[rs].value & BIT(bit))
82                                 + (value & BIT(bit));
83
84                         if (sum & BIT(bit))
85                                 result.value |= BIT(bit);
86                         else
87                                 result.value &= ~BIT(bit);
88
89                         result.known |= BIT(bit);
90                         result.sign &= ~BIT(bit);
91                         carry = sum & BIT(bit + 1);
92                         continue;
93                 }
94
95                 if (bit >= end) {
96                         /* We're past the last significant bits of the values
97                          * (extra sign bits excepted).
98                          * The destination register will be sign-extended
99                          * starting from here (if no carry) or from the next
100                          * bit (if carry).
101                          * If the source registers are not sign-extended and we
102                          * have no carry, the algorithm is done here. */
103
104                         if ((v[rs].sign | d->sign) & BIT(bit)) {
105                                 mask = GENMASK(31, bit);
106
107                                 if (lightrec_same_sign(&v[rs], d)) {
108                                         /* Theorical minimum and maximum values
109                                          * have the same sign; therefore the
110                                          * sign bits are known. */
111                                         min = get_min_value(&v[rs])
112                                                 + get_min_value(d);
113                                         result.value = (min & mask)
114                                                 | (result.value & ~mask);
115                                         result.known |= mask << carry;
116                                         result.sign = 0;
117                                 } else {
118                                         /* min/max have different signs. */
119                                         result.sign = mask << 1;
120                                         result.known &= ~mask;
121                                 }
122                                 break;
123                         } else if (!carry) {
124                                 /* Past end bit, no carry; we're done here. */
125                                 break;
126                         }
127                 }
128
129                 result.known &= ~BIT(bit);
130                 result.sign &= ~BIT(bit);
131
132                 /* Found an unknown bit in one of the registers.
133                  * If the carry and the bit in the other register are both zero,
134                  * we can continue the algorithm. */
135                 if (!carry && (((d->known & ~value)
136                                 | (v[rs].known & ~v[rs].value)) & BIT(bit)))
137                         continue;
138
139                 /* We have an unknown bit in one of the source registers, and we
140                  * may generate a carry: there's nothing to do. Everything from
141                  * this bit till the next known 0 bit or sign bit will be marked
142                  * as unknown. The algorithm can then restart at the following
143                  * bit. */
144
145                 imm = (v[rs].known & d->known & ~v[rs].value & ~value)
146                         | v[rs].sign | d->sign;
147
148                 imm &= GENMASK(31, bit);
149                 imm = imm ? ctz32(imm) : 31;
150                 mask = GENMASK(imm, bit);
151                 result.known &= ~mask;
152                 result.sign &= ~mask;
153
154                 bit = imm;
155                 carry = false;
156         }
157
158         v[rd] = result;
159 }
160
161 static void lightrec_propagate_sub(u32 rs, u32 rt, u32 rd,
162                                    struct constprop_data *v)
163 {
164         struct constprop_data d = {
165                 .value = ~v[rt].value,
166                 .known = v[rt].known,
167                 .sign = v[rt].sign,
168         };
169         u32 imm, mask, bit;
170
171         /* Negate the known Rt value, then propagate as a regular ADD. */
172
173         for (bit = 0; bit < 32; bit++) {
174                 if (!(d.known & BIT(bit))) {
175                         /* Unknown bit - mark bits unknown up to the next known 0 */
176
177                         imm = (d.known & ~d.value) | d.sign;
178                         imm &= GENMASK(31, bit);
179                         imm = imm ? ctz32(imm) : 31;
180                         mask = GENMASK(imm, bit);
181                         d.known &= ~mask;
182                         d.sign &= ~mask;
183                         break;
184                 }
185
186                 if (!(d.value & BIT(bit))) {
187                         /* Bit is 0: we can set our carry, and the algorithm is done. */
188                         d.value |= BIT(bit);
189                         break;
190                 }
191
192                 /* Bit is 1 - set to 0 and continue algorithm */
193                 d.value &= ~BIT(bit);
194         }
195
196         lightrec_propagate_addi(rs, rd, &d, v);
197 }
198
199 static void lightrec_propagate_slt(u32 rs, u32 rd, bool is_signed,
200                                    const struct constprop_data *d,
201                                    struct constprop_data *v)
202 {
203         unsigned int bit;
204
205         if (is_signed && (v[rs].known & d->known
206                           & (v[rs].value ^ d->value) & BIT(31))) {
207                 /* If doing a signed comparison and the two bits 31 are known
208                  * to be opposite, we can deduce the value. */
209                 v[rd].value = v[rs].value >> 31;
210                 v[rd].known = 0xffffffff;
211                 v[rd].sign = 0;
212                 return;
213         }
214
215         for (bit = 32; bit > 0; bit--) {
216                 if (!(v[rs].known & d->known & BIT(bit - 1))) {
217                         /* One bit is unknown and we cannot figure out which
218                          * value is smaller. We still know that the upper 31
219                          * bits are zero. */
220                         v[rd].value = 0;
221                         v[rd].known = 0xfffffffe;
222                         v[rd].sign = 0;
223                         break;
224                 }
225
226                 /* The two bits are equal - continue to the next bit. */
227                 if (~(v[rs].value ^ d->value) & BIT(bit - 1))
228                         continue;
229
230                 /* The two bits aren't equal; we can therefore deduce which
231                  * value is smaller. */
232                 v[rd].value = !(v[rs].value & BIT(bit - 1));
233                 v[rd].known = 0xffffffff;
234                 v[rd].sign = 0;
235                 break;
236         }
237
238         if (bit == 0) {
239                 /* rs == rt and all bits are known */
240                 v[rd].value = 0;
241                 v[rd].known = 0xffffffff;
242                 v[rd].sign = 0;
243         }
244 }
245
246 void lightrec_consts_propagate(const struct block *block,
247                                unsigned int idx,
248                                struct constprop_data *v)
249 {
250         const struct opcode *list = block->opcode_list;
251         union code c;
252         u32 imm, flags;
253
254         if (idx == 0)
255                 return;
256
257         /* Register $zero is always, well, zero */
258         v[0].value = 0;
259         v[0].sign = 0;
260         v[0].known = 0xffffffff;
261
262         if (op_flag_sync(list[idx].flags)) {
263                 memset(&v[1], 0, sizeof(*v) * 31);
264                 return;
265         }
266
267         flags = list[idx - 1].flags;
268
269         if (idx > 1 && !op_flag_sync(flags)) {
270                 if (op_flag_no_ds(flags))
271                         c = list[idx - 1].c;
272                 else
273                         c = list[idx - 2].c;
274
275                 switch (c.i.op) {
276                 case OP_BNE:
277                         /* After a BNE $zero + delay slot, we know that the
278                          * branch wasn't taken, and therefore the other register
279                          * is zero. */
280                         if (c.i.rs == 0) {
281                                 v[c.i.rt].value = 0;
282                                 v[c.i.rt].sign = 0;
283                                 v[c.i.rt].known = 0xffffffff;
284                         } else if (c.i.rt == 0) {
285                                 v[c.i.rs].value = 0;
286                                 v[c.i.rs].sign = 0;
287                                 v[c.i.rs].known = 0xffffffff;
288                         }
289                         break;
290                 case OP_BLEZ:
291                         v[c.i.rs].value &= ~BIT(31);
292                         v[c.i.rs].known |= BIT(31);
293                         fallthrough;
294                 case OP_BEQ:
295                         /* TODO: handle non-zero? */
296                         break;
297                 case OP_REGIMM:
298                         switch (c.r.rt) {
299                         case OP_REGIMM_BLTZ:
300                         case OP_REGIMM_BLTZAL:
301                                 v[c.i.rs].value &= ~BIT(31);
302                                 v[c.i.rs].known |= BIT(31);
303                                 break;
304                         case OP_REGIMM_BGEZ:
305                         case OP_REGIMM_BGEZAL:
306                                 v[c.i.rs].value |= BIT(31);
307                                 v[c.i.rs].known |= BIT(31);
308                                 /* TODO: handle non-zero? */
309                                 break;
310                         }
311                         break;
312                 default:
313                         break;
314                 }
315         }
316
317         c = list[idx - 1].c;
318
319         switch (c.i.op) {
320         case OP_SPECIAL:
321                 switch (c.r.op) {
322                 case OP_SPECIAL_SLL:
323                         v[c.r.rd].value = v[c.r.rt].value << c.r.imm;
324                         v[c.r.rd].known = (v[c.r.rt].known << c.r.imm)
325                                 | (BIT(c.r.imm) - 1);
326                         v[c.r.rd].sign = v[c.r.rt].sign << c.r.imm;
327                         break;
328
329                 case OP_SPECIAL_SRL:
330                         v[c.r.rd].value = v[c.r.rt].value >> c.r.imm;
331                         v[c.r.rd].known = (v[c.r.rt].known >> c.r.imm)
332                                 | ((BIT(c.r.imm) - 1) << (32 - c.r.imm));
333                         v[c.r.rd].sign = c.r.imm ? 0 : v[c.r.rt].sign;
334                         break;
335
336                 case OP_SPECIAL_SRA:
337                         v[c.r.rd].value = (s32)v[c.r.rt].value >> c.r.imm;
338                         v[c.r.rd].sign = (s32)(v[c.r.rt].sign
339                                                | (~v[c.r.rt].known & 0x80000000)) >> c.r.imm;
340                         v[c.r.rd].known = (s32)v[c.r.rt].known >> c.r.imm;
341                         break;
342
343                 case OP_SPECIAL_SLLV:
344                         if ((v[c.r.rs].known & 0x1f) == 0x1f) {
345                                 imm = v[c.r.rs].value & 0x1f;
346                                 v[c.r.rd].value = v[c.r.rt].value << imm;
347                                 v[c.r.rd].known = (v[c.r.rt].known << imm)
348                                         | (BIT(imm) - 1);
349                                 v[c.r.rd].sign = v[c.r.rt].sign << imm;
350                         } else {
351                                 v[c.r.rd].known = 0;
352                                 v[c.r.rd].sign = 0;
353                         }
354                         break;
355
356                 case OP_SPECIAL_SRLV:
357                         if ((v[c.r.rs].known & 0x1f) == 0x1f) {
358                                 imm = v[c.r.rs].value & 0x1f;
359                                 v[c.r.rd].value = v[c.r.rt].value >> imm;
360                                 v[c.r.rd].known = (v[c.r.rt].known >> imm)
361                                         | ((BIT(imm) - 1) << (32 - imm));
362                                 if (imm)
363                                         v[c.r.rd].sign = 0;
364                         } else {
365                                 v[c.r.rd].known = 0;
366                                 v[c.r.rd].sign = 0;
367                         }
368                         break;
369
370                 case OP_SPECIAL_SRAV:
371                         if ((v[c.r.rs].known & 0x1f) == 0x1f) {
372                                 imm = v[c.r.rs].value & 0x1f;
373                                 v[c.r.rd].value = (s32)v[c.r.rt].value >> imm;
374                                 v[c.r.rd].sign = (s32)(v[c.r.rt].sign
375                                                        | (~v[c.r.rt].known & 0x80000000)) >> imm;
376                                 v[c.r.rd].known = (s32)v[c.r.rt].known >> imm;
377                         } else {
378                                 v[c.r.rd].known = 0;
379                                 v[c.r.rd].sign = 0;
380                         }
381                         break;
382
383                 case OP_SPECIAL_ADD:
384                 case OP_SPECIAL_ADDU:
385                         if (is_known_zero(v, c.r.rs))
386                                 v[c.r.rd] = v[c.r.rt];
387                         else if (is_known_zero(v, c.r.rt))
388                                 v[c.r.rd] = v[c.r.rs];
389                         else
390                                 lightrec_propagate_addi(c.r.rs, c.r.rd, &v[c.r.rt], v);
391                         break;
392
393                 case OP_SPECIAL_SUB:
394                 case OP_SPECIAL_SUBU:
395                         if (c.r.rs == c.r.rt) {
396                                 v[c.r.rd].value = 0;
397                                 v[c.r.rd].known = 0xffffffff;
398                                 v[c.r.rd].sign = 0;
399                         } else {
400                                 lightrec_propagate_sub(c.r.rs, c.r.rt, c.r.rd, v);
401                         }
402                         break;
403
404                 case OP_SPECIAL_AND:
405                         v[c.r.rd].known = (v[c.r.rt].known & v[c.r.rs].known)
406                                 | (~v[c.r.rt].value & v[c.r.rt].known)
407                                 | (~v[c.r.rs].value & v[c.r.rs].known);
408                         v[c.r.rd].value = v[c.r.rt].value & v[c.r.rs].value & v[c.r.rd].known;
409                         v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign;
410                         break;
411
412                 case OP_SPECIAL_OR:
413                         v[c.r.rd].known = (v[c.r.rt].known & v[c.r.rs].known)
414                                 | (v[c.r.rt].value & v[c.r.rt].known)
415                                 | (v[c.r.rs].value & v[c.r.rs].known);
416                         v[c.r.rd].value = (v[c.r.rt].value | v[c.r.rs].value) & v[c.r.rd].known;
417                         v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign;
418                         break;
419
420                 case OP_SPECIAL_XOR:
421                         v[c.r.rd].value = v[c.r.rt].value ^ v[c.r.rs].value;
422                         v[c.r.rd].known = v[c.r.rt].known & v[c.r.rs].known;
423                         v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign;
424                         break;
425
426                 case OP_SPECIAL_NOR:
427                         v[c.r.rd].known = (v[c.r.rt].known & v[c.r.rs].known)
428                                 | (v[c.r.rt].value & v[c.r.rt].known)
429                                 | (v[c.r.rs].value & v[c.r.rs].known);
430                         v[c.r.rd].value = ~(v[c.r.rt].value | v[c.r.rs].value) & v[c.r.rd].known;
431                         v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign;
432                         break;
433
434                 case OP_SPECIAL_SLT:
435                 case OP_SPECIAL_SLTU:
436                         lightrec_propagate_slt(c.r.rs, c.r.rd,
437                                                c.r.op ==  OP_SPECIAL_SLT,
438                                                &v[c.r.rt], v);
439                         break;
440
441                 case OP_SPECIAL_MULT:
442                 case OP_SPECIAL_MULTU:
443                 case OP_SPECIAL_DIV:
444                 case OP_SPECIAL_DIVU:
445                         if (OPT_FLAG_MULT_DIV && c.r.rd) {
446                                 v[c.r.rd].known = 0;
447                                 v[c.r.rd].sign = 0;
448                         }
449                         if (OPT_FLAG_MULT_DIV && c.r.imm) {
450                                 v[c.r.imm].known = 0;
451                                 v[c.r.imm].sign = 0;
452                         }
453                         break;
454
455                 case OP_SPECIAL_MFLO:
456                 case OP_SPECIAL_MFHI:
457                         v[c.r.rd].known = 0;
458                         v[c.r.rd].sign = 0;
459                         break;
460
461                 case OP_SPECIAL_JALR:
462                         v[c.r.rd].known = 0xffffffff;
463                         v[c.r.rd].sign = 0;
464                         v[c.r.rd].value = block->pc + ((idx + 2) << 2);
465                         break;
466
467                 default:
468                         break;
469                 }
470                 break;
471
472         case OP_META_MULT2:
473         case OP_META_MULTU2:
474                 if (OPT_FLAG_MULT_DIV && c.r.rd) {
475                         if (c.r.op < 32) {
476                                 v[c.r.rd].value = v[c.r.rs].value << c.r.op;
477                                 v[c.r.rd].known = (v[c.r.rs].known << c.r.op)
478                                         | (BIT(c.r.op) - 1);
479                                 v[c.r.rd].sign = v[c.r.rs].sign << c.r.op;
480                         } else {
481                                 v[c.r.rd].value = 0;
482                                 v[c.r.rd].known = 0xffffffff;
483                                 v[c.r.rd].sign = 0;
484                         }
485                 }
486
487                 if (OPT_FLAG_MULT_DIV && c.r.imm) {
488                         if (c.r.op >= 32) {
489                                 v[c.r.imm].value = v[c.r.rs].value << (c.r.op - 32);
490                                 v[c.r.imm].known = (v[c.r.rs].known << (c.r.op - 32))
491                                         | (BIT(c.r.op - 32) - 1);
492                                 v[c.r.imm].sign = v[c.r.rs].sign << (c.r.op - 32);
493                         } else if (c.i.op == OP_META_MULT2) {
494                                 v[c.r.imm].value = (s32)v[c.r.rs].value >> (32 - c.r.op);
495                                 v[c.r.imm].known = (s32)v[c.r.rs].known >> (32 - c.r.op);
496                                 v[c.r.imm].sign = (s32)v[c.r.rs].sign >> (32 - c.r.op);
497                         } else {
498                                 v[c.r.imm].value = v[c.r.rs].value >> (32 - c.r.op);
499                                 v[c.r.imm].known = v[c.r.rs].known >> (32 - c.r.op);
500                                 v[c.r.imm].sign = v[c.r.rs].sign >> (32 - c.r.op);
501                         }
502                 }
503                 break;
504
505         case OP_REGIMM:
506                 break;
507
508         case OP_ADDI:
509         case OP_ADDIU:
510                 if (c.i.imm) {
511                         struct constprop_data d = {
512                                 .value = (s32)(s16)c.i.imm,
513                                 .known = 0xffffffff,
514                                 .sign = 0,
515                         };
516
517                         lightrec_propagate_addi(c.i.rs, c.i.rt, &d, v);
518                 } else {
519                         /* immediate is zero - that's just a register copy. */
520                         v[c.i.rt] = v[c.i.rs];
521                 }
522                 break;
523
524         case OP_SLTI:
525         case OP_SLTIU:
526                 {
527                         struct constprop_data d = {
528                                 .value = (s32)(s16)c.i.imm,
529                                 .known = 0xffffffff,
530                                 .sign = 0,
531                         };
532
533                         lightrec_propagate_slt(c.i.rs, c.i.rt,
534                                                c.i.op == OP_SLTI, &d, v);
535                 }
536                 break;
537
538         case OP_ANDI:
539                 v[c.i.rt].value = v[c.i.rs].value & c.i.imm;
540                 v[c.i.rt].known = v[c.i.rs].known | ~c.i.imm;
541                 v[c.i.rt].sign = 0;
542                 break;
543
544         case OP_ORI:
545                 v[c.i.rt].value = v[c.i.rs].value | c.i.imm;
546                 v[c.i.rt].known = v[c.i.rs].known | c.i.imm;
547                 v[c.i.rt].sign = (v[c.i.rs].sign & 0xffff) ? 0xffff0000 : v[c.i.rs].sign;
548                 break;
549
550         case OP_XORI:
551                 v[c.i.rt].value = v[c.i.rs].value ^ c.i.imm;
552                 v[c.i.rt].known = v[c.i.rs].known;
553                 v[c.i.rt].sign = (v[c.i.rs].sign & 0xffff) ? 0xffff0000 : v[c.i.rs].sign;
554                 break;
555
556         case OP_LUI:
557                 v[c.i.rt].value = c.i.imm << 16;
558                 v[c.i.rt].known = 0xffffffff;
559                 v[c.i.rt].sign = 0;
560                 break;
561
562         case OP_CP0:
563                 switch (c.r.rs) {
564                 case OP_CP0_MFC0:
565                 case OP_CP0_CFC0:
566                         v[c.r.rt].known = 0;
567                         v[c.r.rt].sign = 0;
568                         break;
569                 default:
570                         break;
571                 }
572                 break;
573
574         case OP_CP2:
575                 if (c.r.op == OP_CP2_BASIC) {
576                         switch (c.r.rs) {
577                         case OP_CP2_BASIC_MFC2:
578                                 switch (c.r.rd) {
579                                 case 1:
580                                 case 3:
581                                 case 5:
582                                 case 8:
583                                 case 9:
584                                 case 10:
585                                 case 11:
586                                         /* Signed 16-bit */
587                                         v[c.r.rt].known = 0;
588                                         v[c.r.rt].sign = 0xffff8000;
589                                         break;
590                                 case 7:
591                                 case 16:
592                                 case 17:
593                                 case 18:
594                                 case 19:
595                                         /* Unsigned 16-bit */
596                                         v[c.r.rt].value = 0;
597                                         v[c.r.rt].known = 0xffff0000;
598                                         v[c.r.rt].sign = 0;
599                                         break;
600                                 default:
601                                         /* 32-bit */
602                                         v[c.r.rt].known = 0;
603                                         v[c.r.rt].sign = 0;
604                                         break;
605                                 }
606                                 break;
607                         case OP_CP2_BASIC_CFC2:
608                                 switch (c.r.rd) {
609                                 case 4:
610                                 case 12:
611                                 case 20:
612                                 case 26:
613                                 case 27:
614                                 case 29:
615                                 case 30:
616                                         /* Signed 16-bit */
617                                         v[c.r.rt].known = 0;
618                                         v[c.r.rt].sign = 0xffff8000;
619                                         break;
620                                 default:
621                                         /* 32-bit */
622                                         v[c.r.rt].known = 0;
623                                         v[c.r.rt].sign = 0;
624                                         break;
625                                 }
626                                 break;
627                         }
628                 }
629                 break;
630         case OP_LB:
631                 v[c.i.rt].known = 0;
632                 v[c.i.rt].sign = 0xffffff80;
633                 break;
634         case OP_LH:
635                 v[c.i.rt].known = 0;
636                 v[c.i.rt].sign = 0xffff8000;
637                 break;
638         case OP_LBU:
639                 v[c.i.rt].value = 0;
640                 v[c.i.rt].known = 0xffffff00;
641                 v[c.i.rt].sign = 0;
642                 break;
643         case OP_LHU:
644                 v[c.i.rt].value = 0;
645                 v[c.i.rt].known = 0xffff0000;
646                 v[c.i.rt].sign = 0;
647                 break;
648         case OP_LWL:
649         case OP_LWR:
650                 /* LWL/LWR don't write the full register if the address is
651                  * unaligned, so we only need to know the low 2 bits */
652                 if (v[c.i.rs].known & 0x3) {
653                         imm = (v[c.i.rs].value & 0x3) * 8;
654
655                         if (c.i.op == OP_LWL) {
656                                 imm = BIT(24 - imm) - 1;
657                                 v[c.i.rt].sign &= ~imm;
658                         } else {
659                                 imm = imm ? GENMASK(31, 32 - imm) : 0;
660                                 v[c.i.rt].sign = 0;
661                         }
662                         v[c.i.rt].known &= imm;
663                         break;
664                 }
665                 fallthrough;
666         case OP_LW:
667         case OP_META_LWU:
668                 v[c.i.rt].known = 0;
669                 v[c.i.rt].sign = 0;
670                 break;
671         case OP_META:
672                 switch (c.m.op) {
673                 case OP_META_MOV:
674                         v[c.m.rd] = v[c.m.rs];
675                         break;
676
677                 case OP_META_EXTC:
678                         v[c.m.rd].value = (s32)(s8)v[c.m.rs].value;
679                         if (v[c.m.rs].known & BIT(7)) {
680                                 v[c.m.rd].known = v[c.m.rs].known | 0xffffff00;
681                                 v[c.m.rd].sign = 0;
682                         } else {
683                                 v[c.m.rd].known = v[c.m.rs].known & 0x7f;
684                                 v[c.m.rd].sign = 0xffffff80;
685                         }
686                         break;
687
688                 case OP_META_EXTS:
689                         v[c.m.rd].value = (s32)(s16)v[c.m.rs].value;
690                         if (v[c.m.rs].known & BIT(15)) {
691                                 v[c.m.rd].known = v[c.m.rs].known | 0xffff0000;
692                                 v[c.m.rd].sign = 0;
693                         } else {
694                                 v[c.m.rd].known = v[c.m.rs].known & 0x7fff;
695                                 v[c.m.rd].sign = 0xffff8000;
696                         }
697                         break;
698
699                 case OP_META_COM:
700                         v[c.m.rd].known = v[c.m.rs].known;
701                         v[c.m.rd].value = ~v[c.m.rs].value;
702                         v[c.m.rd].sign = v[c.m.rs].sign;
703                         break;
704                 default:
705                         break;
706                 }
707                 break;
708         case OP_JAL:
709                 v[31].known = 0xffffffff;
710                 v[31].sign = 0;
711                 v[31].value = block->pc + ((idx + 2) << 2);
712                 break;
713
714         default:
715                 break;
716         }
717
718         /* Reset register 0 which may have been used as a target */
719         v[0].value = 0;
720         v[0].sign = 0;
721         v[0].known = 0xffffffff;
722 }
723
724 enum psx_map
725 lightrec_get_constprop_map(const struct lightrec_state *state,
726                            const struct constprop_data *v, u8 reg, s16 imm)
727 {
728         const struct lightrec_mem_map *map;
729         unsigned int i;
730         u32 min, max;
731
732         min = get_min_value(&v[reg]) + imm;
733         max = get_max_value(&v[reg]) + imm;
734
735         /* Handle the case where max + imm overflows */
736         if ((min & 0xe0000000) != (max & 0xe0000000))
737                 return PSX_MAP_UNKNOWN;
738
739         pr_debug("Min: "X32_FMT" max: "X32_FMT" Known: "X32_FMT" Sign: "X32_FMT"\n",
740                  min, max, v[reg].known, v[reg].sign);
741
742         min = kunseg(min);
743         max = kunseg(max);
744
745         for (i = 0; i < state->nb_maps; i++) {
746                 map = &state->maps[i];
747
748                 if (min >= map->pc && min < map->pc + map->length
749                     && max >= map->pc && max < map->pc + map->length)
750                         return (enum psx_map) i;
751         }
752
753         return PSX_MAP_UNKNOWN;
754 }