1 // SPDX-License-Identifier: LGPL-2.1-or-later
3 * Copyright (C) 2022 Paul Cercueil <paul@crapouillou.net>
7 #include "disassembler.h"
8 #include "lightrec-private.h"
13 static u32 get_min_value(const struct constprop_data *d)
15 /* Min value: all sign bits to 1, all unknown bits but MSB to 0 */
16 return (d->value & d->known) | d->sign | (~d->known & BIT(31));
19 static u32 get_max_value(const struct constprop_data *d)
21 /* Max value: all sign bits to 0, all unknown bits to 1 */
22 return ((d->value & d->known) | ~d->known) & ~d->sign;
25 static u32 lightrec_same_sign(const struct constprop_data *d1,
26 const struct constprop_data *d2)
28 u32 min1, min2, max1, max2, a, b, c, d;
30 min1 = get_min_value(d1);
31 max1 = get_max_value(d1);
32 min2 = get_min_value(d2);
33 max2 = get_max_value(d2);
40 return ((a & b & c & d) | (~a & ~b & ~c & ~d)) & BIT(31);
43 static u32 lightrec_get_sign_mask(const struct constprop_data *d)
50 imm = (d->value & BIT(31)) ? d->value : ~d->value;
51 imm = ~(imm & d->known);
53 imm = 32 - clz32(imm);
55 return imm < 32 ? GENMASK(31, imm) : 0;
58 static void lightrec_propagate_addi(u32 rs, u32 rd,
59 const struct constprop_data *d,
60 struct constprop_data *v)
62 u32 end, bit, sum, min, mask, imm, value;
63 struct constprop_data result = {
70 /* clear unknown bits to ease processing */
71 v[rs].value &= v[rs].known;
72 value = d->value & d->known;
74 mask = ~(lightrec_get_sign_mask(d) & lightrec_get_sign_mask(&v[rs]));
75 end = mask ? 32 - clz32(mask) : 0;
77 for (bit = 0; bit < 32; bit++) {
78 if (v[rs].known & d->known & BIT(bit)) {
79 /* the bits are known - compute the resulting bit and
81 sum = ((u32)carry << bit) + (v[rs].value & BIT(bit))
85 result.value |= BIT(bit);
87 result.value &= ~BIT(bit);
89 result.known |= BIT(bit);
90 result.sign &= ~BIT(bit);
91 carry = sum & BIT(bit + 1);
96 /* We're past the last significant bits of the values
97 * (extra sign bits excepted).
98 * The destination register will be sign-extended
99 * starting from here (if no carry) or from the next
101 * If the source registers are not sign-extended and we
102 * have no carry, the algorithm is done here. */
104 if ((v[rs].sign | d->sign) & BIT(bit)) {
105 mask = GENMASK(31, bit);
107 if (lightrec_same_sign(&v[rs], d)) {
108 /* Theorical minimum and maximum values
109 * have the same sign; therefore the
110 * sign bits are known. */
111 min = get_min_value(&v[rs])
113 result.value = (min & mask)
114 | (result.value & ~mask);
115 result.known |= mask << carry;
118 /* min/max have different signs. */
119 result.sign = mask << 1;
120 result.known &= ~mask;
124 /* Past end bit, no carry; we're done here. */
129 result.known &= ~BIT(bit);
130 result.sign &= ~BIT(bit);
132 /* Found an unknown bit in one of the registers.
133 * If the carry and the bit in the other register are both zero,
134 * we can continue the algorithm. */
135 if (!carry && (((d->known & ~value)
136 | (v[rs].known & ~v[rs].value)) & BIT(bit)))
139 /* We have an unknown bit in one of the source registers, and we
140 * may generate a carry: there's nothing to do. Everything from
141 * this bit till the next known 0 bit or sign bit will be marked
142 * as unknown. The algorithm can then restart at the following
145 imm = (v[rs].known & d->known & ~v[rs].value & ~value)
146 | v[rs].sign | d->sign;
148 imm &= GENMASK(31, bit);
149 imm = imm ? ctz32(imm) : 31;
150 mask = GENMASK(imm, bit);
151 result.known &= ~mask;
152 result.sign &= ~mask;
161 static void lightrec_propagate_sub(u32 rs, u32 rt, u32 rd,
162 struct constprop_data *v)
164 struct constprop_data d = {
165 .value = ~v[rt].value,
166 .known = v[rt].known,
171 /* Negate the known Rt value, then propagate as a regular ADD. */
173 for (bit = 0; bit < 32; bit++) {
174 if (!(d.known & BIT(bit))) {
175 /* Unknown bit - mark bits unknown up to the next known 0 */
177 imm = (d.known & ~d.value) | d.sign;
178 imm &= GENMASK(31, bit);
179 imm = imm ? ctz32(imm) : 31;
180 mask = GENMASK(imm, bit);
186 if (!(d.value & BIT(bit))) {
187 /* Bit is 0: we can set our carry, and the algorithm is done. */
192 /* Bit is 1 - set to 0 and continue algorithm */
193 d.value &= ~BIT(bit);
196 lightrec_propagate_addi(rs, rd, &d, v);
199 static void lightrec_propagate_slt(u32 rs, u32 rd, bool is_signed,
200 const struct constprop_data *d,
201 struct constprop_data *v)
205 if (is_signed && (v[rs].known & d->known
206 & (v[rs].value ^ d->value) & BIT(31))) {
207 /* If doing a signed comparison and the two bits 31 are known
208 * to be opposite, we can deduce the value. */
209 v[rd].value = v[rs].value >> 31;
210 v[rd].known = 0xffffffff;
215 for (bit = 32; bit > 0; bit--) {
216 if (!(v[rs].known & d->known & BIT(bit - 1))) {
217 /* One bit is unknown and we cannot figure out which
218 * value is smaller. We still know that the upper 31
221 v[rd].known = 0xfffffffe;
226 /* The two bits are equal - continue to the next bit. */
227 if (~(v[rs].value ^ d->value) & BIT(bit - 1))
230 /* The two bits aren't equal; we can therefore deduce which
231 * value is smaller. */
232 v[rd].value = !(v[rs].value & BIT(bit - 1));
233 v[rd].known = 0xffffffff;
239 /* rs == rt and all bits are known */
241 v[rd].known = 0xffffffff;
246 void lightrec_consts_propagate(const struct opcode *list,
248 struct constprop_data *v)
256 /* Register $zero is always, well, zero */
259 v[0].known = 0xffffffff;
261 if (op_flag_sync(list[idx].flags)) {
262 memset(&v[1], 0, sizeof(*v) * 31);
266 if (idx > 1 && !op_flag_sync(list[idx - 1].flags)) {
271 /* After a BNE $zero + delay slot, we know that the
272 * branch wasn't taken, and therefore the other register
277 v[c.i.rt].known = 0xffffffff;
278 } else if (c.i.rt == 0) {
281 v[c.i.rs].known = 0xffffffff;
285 v[c.i.rs].value &= ~BIT(31);
286 v[c.i.rs].known |= BIT(31);
289 /* TODO: handle non-zero? */
294 case OP_REGIMM_BLTZAL:
295 v[c.i.rs].value &= ~BIT(31);
296 v[c.i.rs].known |= BIT(31);
299 case OP_REGIMM_BGEZAL:
300 v[c.i.rs].value |= BIT(31);
301 v[c.i.rs].known |= BIT(31);
302 /* TODO: handle non-zero? */
317 v[c.r.rd].value = v[c.r.rt].value << c.r.imm;
318 v[c.r.rd].known = (v[c.r.rt].known << c.r.imm)
319 | (BIT(c.r.imm) - 1);
320 v[c.r.rd].sign = v[c.r.rt].sign << c.r.imm;
324 v[c.r.rd].value = v[c.r.rt].value >> c.r.imm;
325 v[c.r.rd].known = (v[c.r.rt].known >> c.r.imm)
326 | (BIT(c.r.imm) - 1 << 32 - c.r.imm);
327 v[c.r.rd].sign = c.r.imm ? 0 : v[c.r.rt].sign;
331 v[c.r.rd].value = (s32)v[c.r.rt].value >> c.r.imm;
332 v[c.r.rd].known = (s32)v[c.r.rt].known >> c.r.imm;
333 v[c.r.rd].sign = (s32)v[c.r.rt].sign >> c.r.imm;
336 case OP_SPECIAL_SLLV:
337 if ((v[c.r.rs].known & 0x1f) == 0x1f) {
338 imm = v[c.r.rs].value & 0x1f;
339 v[c.r.rd].value = v[c.r.rt].value << imm;
340 v[c.r.rd].known = (v[c.r.rt].known << imm)
342 v[c.r.rd].sign = v[c.r.rt].sign << imm;
349 case OP_SPECIAL_SRLV:
350 if ((v[c.r.rs].known & 0x1f) == 0x1f) {
351 imm = v[c.r.rs].value & 0x1f;
352 v[c.r.rd].value = v[c.r.rt].value >> imm;
353 v[c.r.rd].known = (v[c.r.rt].known >> imm)
354 | (BIT(imm) - 1 << 32 - imm);
363 case OP_SPECIAL_SRAV:
364 if ((v[c.r.rs].known & 0x1f) == 0x1f) {
365 imm = v[c.r.rs].value & 0x1f;
366 v[c.r.rd].value = (s32)v[c.r.rt].value >> imm;
367 v[c.r.rd].known = (s32)v[c.r.rt].known >> imm;
368 v[c.r.rd].sign = (s32)v[c.r.rt].sign >> imm;
376 case OP_SPECIAL_ADDU:
377 if (is_known_zero(v, c.r.rs))
378 v[c.r.rd] = v[c.r.rt];
379 else if (is_known_zero(v, c.r.rt))
380 v[c.r.rd] = v[c.r.rs];
382 lightrec_propagate_addi(c.r.rs, c.r.rd, &v[c.r.rt], v);
386 case OP_SPECIAL_SUBU:
387 if (c.r.rs == c.r.rt) {
389 v[c.r.rd].known = 0xffffffff;
392 lightrec_propagate_sub(c.r.rs, c.r.rt, c.r.rd, v);
397 v[c.r.rd].known = (v[c.r.rt].known & v[c.r.rs].known)
398 | (~v[c.r.rt].value & v[c.r.rt].known)
399 | (~v[c.r.rs].value & v[c.r.rs].known);
400 v[c.r.rd].value = v[c.r.rt].value & v[c.r.rs].value & v[c.r.rd].known;
401 v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign;
405 v[c.r.rd].known = (v[c.r.rt].known & v[c.r.rs].known)
406 | (v[c.r.rt].value & v[c.r.rt].known)
407 | (v[c.r.rs].value & v[c.r.rs].known);
408 v[c.r.rd].value = (v[c.r.rt].value | v[c.r.rs].value) & v[c.r.rd].known;
409 v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign;
413 v[c.r.rd].value = v[c.r.rt].value ^ v[c.r.rs].value;
414 v[c.r.rd].known = v[c.r.rt].known & v[c.r.rs].known;
415 v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign;
419 v[c.r.rd].known = (v[c.r.rt].known & v[c.r.rs].known)
420 | (v[c.r.rt].value & v[c.r.rt].known)
421 | (v[c.r.rs].value & v[c.r.rs].known);
422 v[c.r.rd].value = ~(v[c.r.rt].value | v[c.r.rs].value) & v[c.r.rd].known;
423 v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign;
427 case OP_SPECIAL_SLTU:
428 lightrec_propagate_slt(c.r.rs, c.r.rd,
429 c.r.op == OP_SPECIAL_SLT,
433 case OP_SPECIAL_MULT:
434 case OP_SPECIAL_MULTU:
436 case OP_SPECIAL_DIVU:
437 if (OPT_FLAG_MULT_DIV && c.r.rd) {
441 if (OPT_FLAG_MULT_DIV && c.r.imm) {
442 v[c.r.imm].known = 0;
447 case OP_SPECIAL_MFLO:
448 case OP_SPECIAL_MFHI:
459 if (OPT_FLAG_MULT_DIV && c.r.rd) {
461 v[c.r.rd].value = v[c.r.rs].value << c.r.op;
462 v[c.r.rd].known = (v[c.r.rs].known << c.r.op)
464 v[c.r.rd].sign = v[c.r.rs].sign << c.r.op;
467 v[c.r.rd].known = 0xffffffff;
472 if (OPT_FLAG_MULT_DIV && c.r.imm) {
474 v[c.r.imm].value = v[c.r.rs].value << c.r.op - 32;
475 v[c.r.imm].known = (v[c.r.rs].known << c.r.op - 32)
476 | (BIT(c.r.op - 32) - 1);
477 v[c.r.imm].sign = v[c.r.rs].sign << c.r.op - 32;
478 } else if (c.i.op == OP_META_MULT2) {
479 v[c.r.imm].value = (s32)v[c.r.rs].value >> 32 - c.r.op;
480 v[c.r.imm].known = (s32)v[c.r.rs].known >> 32 - c.r.op;
481 v[c.r.imm].sign = (s32)v[c.r.rs].sign >> 32 - c.r.op;
483 v[c.r.imm].value = v[c.r.rs].value >> 32 - c.r.op;
484 v[c.r.imm].known = v[c.r.rs].known >> 32 - c.r.op;
485 v[c.r.imm].sign = v[c.r.rs].sign >> 32 - c.r.op;
496 struct constprop_data d = {
497 .value = (s32)(s16)c.i.imm,
502 lightrec_propagate_addi(c.i.rs, c.i.rt, &d, v);
504 /* immediate is zero - that's just a register copy. */
505 v[c.i.rt] = v[c.i.rs];
512 struct constprop_data d = {
513 .value = (s32)(s16)c.i.imm,
518 lightrec_propagate_slt(c.i.rs, c.i.rt,
519 c.i.op == OP_SLTI, &d, v);
524 v[c.i.rt].value = v[c.i.rs].value & c.i.imm;
525 v[c.i.rt].known = v[c.i.rs].known | ~c.i.imm;
530 v[c.i.rt].value = v[c.i.rs].value | c.i.imm;
531 v[c.i.rt].known = v[c.i.rs].known | c.i.imm;
532 v[c.i.rt].sign = (v[c.i.rs].sign & 0xffff) ? 0xffff0000 : v[c.i.rs].sign;
536 v[c.i.rt].value = v[c.i.rs].value ^ c.i.imm;
537 v[c.i.rt].known = v[c.i.rs].known;
538 v[c.i.rt].sign = (v[c.i.rs].sign & 0xffff) ? 0xffff0000 : v[c.i.rs].sign;
542 v[c.i.rt].value = c.i.imm << 16;
543 v[c.i.rt].known = 0xffffffff;
560 if (c.r.op == OP_CP2_BASIC) {
562 case OP_CP2_BASIC_MFC2:
573 v[c.r.rt].sign = 0xffff8000;
580 /* Unsigned 16-bit */
582 v[c.r.rt].known = 0xffff0000;
592 case OP_CP2_BASIC_CFC2:
603 v[c.r.rt].sign = 0xffff8000;
617 v[c.i.rt].sign = 0xffffff80;
621 v[c.i.rt].sign = 0xffff8000;
625 v[c.i.rt].known = 0xffffff00;
630 v[c.i.rt].known = 0xffff0000;
635 /* LWL/LWR don't write the full register if the address is
636 * unaligned, so we only need to know the low 2 bits */
637 if (v[c.i.rs].known & 0x3) {
638 imm = (v[c.i.rs].value & 0x3) * 8;
640 if (c.i.op == OP_LWL) {
641 imm = BIT(24 - imm) - 1;
642 v[c.i.rt].sign &= ~imm;
644 imm = imm ? GENMASK(31, 32 - imm) : 0;
647 v[c.i.rt].known &= ~imm;
656 v[c.r.rd] = v[c.r.rs];
659 v[c.i.rt].value = (s32)(s8)v[c.i.rs].value;
660 if (v[c.i.rs].known & BIT(7)) {
661 v[c.i.rt].known = v[c.i.rs].known | 0xffffff00;
664 v[c.i.rt].known = v[c.i.rs].known & 0x7f;
665 v[c.i.rt].sign = 0xffffff80;
670 v[c.i.rt].value = (s32)(s16)v[c.i.rs].value;
671 if (v[c.i.rs].known & BIT(15)) {
672 v[c.i.rt].known = v[c.i.rs].known | 0xffff0000;
675 v[c.i.rt].known = v[c.i.rs].known & 0x7fff;
676 v[c.i.rt].sign = 0xffff8000;
684 /* Reset register 0 which may have been used as a target */
687 v[0].known = 0xffffffff;
691 lightrec_get_constprop_map(const struct lightrec_state *state,
692 const struct constprop_data *v, u8 reg, s16 imm)
694 const struct lightrec_mem_map *map;
698 min = get_min_value(&v[reg]) + imm;
699 max = get_max_value(&v[reg]) + imm;
701 /* Handle the case where max + imm overflows */
702 if ((min & 0xe0000000) != (max & 0xe0000000))
703 return PSX_MAP_UNKNOWN;
705 pr_debug("Min: 0x%08x max: 0x%08x Known: 0x%08x Sign: 0x%08x\n",
706 min, max, v[reg].known, v[reg].sign);
711 for (i = 0; i < state->nb_maps; i++) {
712 map = &state->maps[i];
714 if (min >= map->pc && min < map->pc + map->length
715 && max >= map->pc && max < map->pc + map->length)
716 return (enum psx_map) i;
719 return PSX_MAP_UNKNOWN;