Commit | Line | Data |
---|---|---|
9259d748 PC |
1 | // SPDX-License-Identifier: LGPL-2.1-or-later |
2 | /* | |
3 | * Copyright (C) 2022 Paul Cercueil <paul@crapouillou.net> | |
4 | */ | |
5 | ||
6 | #include "constprop.h" | |
7 | #include "disassembler.h" | |
8 | #include "lightrec-private.h" | |
9 | ||
10 | #include <stdbool.h> | |
11 | #include <string.h> | |
12 | ||
13 | static u32 get_min_value(const struct constprop_data *d) | |
14 | { | |
15 | /* Min value: all sign bits to 1, all unknown bits but MSB to 0 */ | |
16 | return (d->value & d->known) | d->sign | (~d->known & BIT(31)); | |
17 | } | |
18 | ||
19 | static u32 get_max_value(const struct constprop_data *d) | |
20 | { | |
21 | /* Max value: all sign bits to 0, all unknown bits to 1 */ | |
22 | return ((d->value & d->known) | ~d->known) & ~d->sign; | |
23 | } | |
24 | ||
25 | static u32 lightrec_same_sign(const struct constprop_data *d1, | |
26 | const struct constprop_data *d2) | |
27 | { | |
28 | u32 min1, min2, max1, max2, a, b, c, d; | |
29 | ||
30 | min1 = get_min_value(d1); | |
31 | max1 = get_max_value(d1); | |
32 | min2 = get_min_value(d2); | |
33 | max2 = get_max_value(d2); | |
34 | ||
35 | a = min1 + min2; | |
36 | b = min1 + max2; | |
37 | c = max1 + min2; | |
38 | d = max1 + max2; | |
39 | ||
40 | return ((a & b & c & d) | (~a & ~b & ~c & ~d)) & BIT(31); | |
41 | } | |
42 | ||
43 | static u32 lightrec_get_sign_mask(const struct constprop_data *d) | |
44 | { | |
45 | u32 imm; | |
46 | ||
47 | if (d->sign) | |
48 | return d->sign; | |
49 | ||
50 | imm = (d->value & BIT(31)) ? d->value : ~d->value; | |
51 | imm = ~(imm & d->known); | |
52 | if (imm) | |
53 | imm = 32 - clz32(imm); | |
54 | ||
55 | return imm < 32 ? GENMASK(31, imm) : 0; | |
56 | } | |
57 | ||
58 | static void lightrec_propagate_addi(u32 rs, u32 rd, | |
59 | const struct constprop_data *d, | |
60 | struct constprop_data *v) | |
61 | { | |
62 | u32 end, bit, sum, min, mask, imm, value; | |
63 | struct constprop_data result = { | |
64 | .value = v[rd].value, | |
65 | .known = v[rd].known, | |
66 | .sign = v[rd].sign, | |
67 | }; | |
68 | bool carry = false; | |
69 | ||
70 | /* clear unknown bits to ease processing */ | |
71 | v[rs].value &= v[rs].known; | |
72 | value = d->value & d->known; | |
73 | ||
74 | mask = ~(lightrec_get_sign_mask(d) & lightrec_get_sign_mask(&v[rs])); | |
75 | end = mask ? 32 - clz32(mask) : 0; | |
76 | ||
77 | for (bit = 0; bit < 32; bit++) { | |
78 | if (v[rs].known & d->known & BIT(bit)) { | |
79 | /* the bits are known - compute the resulting bit and | |
80 | * the carry */ | |
81 | sum = ((u32)carry << bit) + (v[rs].value & BIT(bit)) | |
82 | + (value & BIT(bit)); | |
83 | ||
84 | if (sum & BIT(bit)) | |
85 | result.value |= BIT(bit); | |
86 | else | |
87 | result.value &= ~BIT(bit); | |
88 | ||
89 | result.known |= BIT(bit); | |
90 | result.sign &= ~BIT(bit); | |
91 | carry = sum & BIT(bit + 1); | |
92 | continue; | |
93 | } | |
94 | ||
95 | if (bit >= end) { | |
96 | /* We're past the last significant bits of the values | |
97 | * (extra sign bits excepted). | |
98 | * The destination register will be sign-extended | |
99 | * starting from here (if no carry) or from the next | |
100 | * bit (if carry). | |
101 | * If the source registers are not sign-extended and we | |
102 | * have no carry, the algorithm is done here. */ | |
103 | ||
104 | if ((v[rs].sign | d->sign) & BIT(bit)) { | |
105 | mask = GENMASK(31, bit); | |
106 | ||
107 | if (lightrec_same_sign(&v[rs], d)) { | |
108 | /* Theorical minimum and maximum values | |
109 | * have the same sign; therefore the | |
110 | * sign bits are known. */ | |
111 | min = get_min_value(&v[rs]) | |
112 | + get_min_value(d); | |
113 | result.value = (min & mask) | |
114 | | (result.value & ~mask); | |
115 | result.known |= mask << carry; | |
116 | result.sign = 0; | |
117 | } else { | |
118 | /* min/max have different signs. */ | |
119 | result.sign = mask << 1; | |
120 | result.known &= ~mask; | |
121 | } | |
122 | break; | |
123 | } else if (!carry) { | |
124 | /* Past end bit, no carry; we're done here. */ | |
125 | break; | |
126 | } | |
127 | } | |
128 | ||
129 | result.known &= ~BIT(bit); | |
130 | result.sign &= ~BIT(bit); | |
131 | ||
132 | /* Found an unknown bit in one of the registers. | |
133 | * If the carry and the bit in the other register are both zero, | |
134 | * we can continue the algorithm. */ | |
135 | if (!carry && (((d->known & ~value) | |
136 | | (v[rs].known & ~v[rs].value)) & BIT(bit))) | |
137 | continue; | |
138 | ||
139 | /* We have an unknown bit in one of the source registers, and we | |
140 | * may generate a carry: there's nothing to do. Everything from | |
141 | * this bit till the next known 0 bit or sign bit will be marked | |
142 | * as unknown. The algorithm can then restart at the following | |
143 | * bit. */ | |
144 | ||
145 | imm = (v[rs].known & d->known & ~v[rs].value & ~value) | |
146 | | v[rs].sign | d->sign; | |
147 | ||
148 | imm &= GENMASK(31, bit); | |
149 | imm = imm ? ctz32(imm) : 31; | |
150 | mask = GENMASK(imm, bit); | |
151 | result.known &= ~mask; | |
152 | result.sign &= ~mask; | |
153 | ||
154 | bit = imm; | |
155 | carry = false; | |
156 | } | |
157 | ||
158 | v[rd] = result; | |
159 | } | |
160 | ||
161 | static void lightrec_propagate_sub(u32 rs, u32 rt, u32 rd, | |
162 | struct constprop_data *v) | |
163 | { | |
164 | struct constprop_data d = { | |
165 | .value = ~v[rt].value, | |
166 | .known = v[rt].known, | |
167 | .sign = v[rt].sign, | |
168 | }; | |
169 | u32 imm, mask, bit; | |
170 | ||
171 | /* Negate the known Rt value, then propagate as a regular ADD. */ | |
172 | ||
173 | for (bit = 0; bit < 32; bit++) { | |
174 | if (!(d.known & BIT(bit))) { | |
175 | /* Unknown bit - mark bits unknown up to the next known 0 */ | |
176 | ||
177 | imm = (d.known & ~d.value) | d.sign; | |
178 | imm &= GENMASK(31, bit); | |
179 | imm = imm ? ctz32(imm) : 31; | |
180 | mask = GENMASK(imm, bit); | |
181 | d.known &= ~mask; | |
182 | d.sign &= ~mask; | |
183 | break; | |
184 | } | |
185 | ||
186 | if (!(d.value & BIT(bit))) { | |
187 | /* Bit is 0: we can set our carry, and the algorithm is done. */ | |
188 | d.value |= BIT(bit); | |
189 | break; | |
190 | } | |
191 | ||
192 | /* Bit is 1 - set to 0 and continue algorithm */ | |
193 | d.value &= ~BIT(bit); | |
194 | } | |
195 | ||
196 | lightrec_propagate_addi(rs, rd, &d, v); | |
197 | } | |
198 | ||
199 | static void lightrec_propagate_slt(u32 rs, u32 rd, bool is_signed, | |
200 | const struct constprop_data *d, | |
201 | struct constprop_data *v) | |
202 | { | |
203 | unsigned int bit; | |
204 | ||
205 | if (is_signed && (v[rs].known & d->known | |
206 | & (v[rs].value ^ d->value) & BIT(31))) { | |
207 | /* If doing a signed comparison and the two bits 31 are known | |
208 | * to be opposite, we can deduce the value. */ | |
209 | v[rd].value = v[rs].value >> 31; | |
210 | v[rd].known = 0xffffffff; | |
211 | v[rd].sign = 0; | |
212 | return; | |
213 | } | |
214 | ||
215 | for (bit = 32; bit > 0; bit--) { | |
216 | if (!(v[rs].known & d->known & BIT(bit - 1))) { | |
217 | /* One bit is unknown and we cannot figure out which | |
218 | * value is smaller. We still know that the upper 31 | |
219 | * bits are zero. */ | |
220 | v[rd].value = 0; | |
221 | v[rd].known = 0xfffffffe; | |
222 | v[rd].sign = 0; | |
223 | break; | |
224 | } | |
225 | ||
226 | /* The two bits are equal - continue to the next bit. */ | |
227 | if (~(v[rs].value ^ d->value) & BIT(bit - 1)) | |
228 | continue; | |
229 | ||
230 | /* The two bits aren't equal; we can therefore deduce which | |
231 | * value is smaller. */ | |
232 | v[rd].value = !(v[rs].value & BIT(bit - 1)); | |
233 | v[rd].known = 0xffffffff; | |
234 | v[rd].sign = 0; | |
235 | break; | |
236 | } | |
237 | ||
238 | if (bit == 0) { | |
239 | /* rs == rt and all bits are known */ | |
240 | v[rd].value = 0; | |
241 | v[rd].known = 0xffffffff; | |
242 | v[rd].sign = 0; | |
243 | } | |
244 | } | |
245 | ||
cb72ea13 | 246 | void lightrec_consts_propagate(const struct block *block, |
9259d748 PC |
247 | unsigned int idx, |
248 | struct constprop_data *v) | |
249 | { | |
cb72ea13 | 250 | const struct opcode *list = block->opcode_list; |
9259d748 | 251 | union code c; |
cb72ea13 | 252 | u32 imm, flags; |
9259d748 PC |
253 | |
254 | if (idx == 0) | |
255 | return; | |
256 | ||
257 | /* Register $zero is always, well, zero */ | |
258 | v[0].value = 0; | |
259 | v[0].sign = 0; | |
260 | v[0].known = 0xffffffff; | |
261 | ||
262 | if (op_flag_sync(list[idx].flags)) { | |
263 | memset(&v[1], 0, sizeof(*v) * 31); | |
264 | return; | |
265 | } | |
266 | ||
cb72ea13 PC |
267 | flags = list[idx - 1].flags; |
268 | ||
269 | if (idx > 1 && !op_flag_sync(flags)) { | |
270 | if (op_flag_no_ds(flags)) | |
271 | c = list[idx - 1].c; | |
272 | else | |
273 | c = list[idx - 2].c; | |
9259d748 PC |
274 | |
275 | switch (c.i.op) { | |
276 | case OP_BNE: | |
277 | /* After a BNE $zero + delay slot, we know that the | |
278 | * branch wasn't taken, and therefore the other register | |
279 | * is zero. */ | |
280 | if (c.i.rs == 0) { | |
281 | v[c.i.rt].value = 0; | |
282 | v[c.i.rt].sign = 0; | |
283 | v[c.i.rt].known = 0xffffffff; | |
284 | } else if (c.i.rt == 0) { | |
285 | v[c.i.rs].value = 0; | |
286 | v[c.i.rs].sign = 0; | |
287 | v[c.i.rs].known = 0xffffffff; | |
288 | } | |
289 | break; | |
290 | case OP_BLEZ: | |
291 | v[c.i.rs].value &= ~BIT(31); | |
292 | v[c.i.rs].known |= BIT(31); | |
293 | fallthrough; | |
294 | case OP_BEQ: | |
295 | /* TODO: handle non-zero? */ | |
296 | break; | |
297 | case OP_REGIMM: | |
298 | switch (c.r.rt) { | |
299 | case OP_REGIMM_BLTZ: | |
300 | case OP_REGIMM_BLTZAL: | |
301 | v[c.i.rs].value &= ~BIT(31); | |
302 | v[c.i.rs].known |= BIT(31); | |
303 | break; | |
304 | case OP_REGIMM_BGEZ: | |
305 | case OP_REGIMM_BGEZAL: | |
306 | v[c.i.rs].value |= BIT(31); | |
307 | v[c.i.rs].known |= BIT(31); | |
308 | /* TODO: handle non-zero? */ | |
309 | break; | |
310 | } | |
311 | break; | |
312 | default: | |
313 | break; | |
314 | } | |
315 | } | |
316 | ||
317 | c = list[idx - 1].c; | |
318 | ||
319 | switch (c.i.op) { | |
320 | case OP_SPECIAL: | |
321 | switch (c.r.op) { | |
322 | case OP_SPECIAL_SLL: | |
323 | v[c.r.rd].value = v[c.r.rt].value << c.r.imm; | |
324 | v[c.r.rd].known = (v[c.r.rt].known << c.r.imm) | |
325 | | (BIT(c.r.imm) - 1); | |
326 | v[c.r.rd].sign = v[c.r.rt].sign << c.r.imm; | |
327 | break; | |
328 | ||
329 | case OP_SPECIAL_SRL: | |
330 | v[c.r.rd].value = v[c.r.rt].value >> c.r.imm; | |
331 | v[c.r.rd].known = (v[c.r.rt].known >> c.r.imm) | |
332 | | (BIT(c.r.imm) - 1 << 32 - c.r.imm); | |
333 | v[c.r.rd].sign = c.r.imm ? 0 : v[c.r.rt].sign; | |
334 | break; | |
335 | ||
336 | case OP_SPECIAL_SRA: | |
337 | v[c.r.rd].value = (s32)v[c.r.rt].value >> c.r.imm; | |
338 | v[c.r.rd].known = (s32)v[c.r.rt].known >> c.r.imm; | |
339 | v[c.r.rd].sign = (s32)v[c.r.rt].sign >> c.r.imm; | |
340 | break; | |
341 | ||
342 | case OP_SPECIAL_SLLV: | |
343 | if ((v[c.r.rs].known & 0x1f) == 0x1f) { | |
344 | imm = v[c.r.rs].value & 0x1f; | |
345 | v[c.r.rd].value = v[c.r.rt].value << imm; | |
346 | v[c.r.rd].known = (v[c.r.rt].known << imm) | |
347 | | (BIT(imm) - 1); | |
348 | v[c.r.rd].sign = v[c.r.rt].sign << imm; | |
349 | } else { | |
350 | v[c.r.rd].known = 0; | |
351 | v[c.r.rd].sign = 0; | |
352 | } | |
353 | break; | |
354 | ||
355 | case OP_SPECIAL_SRLV: | |
356 | if ((v[c.r.rs].known & 0x1f) == 0x1f) { | |
357 | imm = v[c.r.rs].value & 0x1f; | |
358 | v[c.r.rd].value = v[c.r.rt].value >> imm; | |
359 | v[c.r.rd].known = (v[c.r.rt].known >> imm) | |
360 | | (BIT(imm) - 1 << 32 - imm); | |
361 | if (imm) | |
362 | v[c.r.rd].sign = 0; | |
363 | } else { | |
364 | v[c.r.rd].known = 0; | |
365 | v[c.r.rd].sign = 0; | |
366 | } | |
367 | break; | |
368 | ||
369 | case OP_SPECIAL_SRAV: | |
370 | if ((v[c.r.rs].known & 0x1f) == 0x1f) { | |
371 | imm = v[c.r.rs].value & 0x1f; | |
372 | v[c.r.rd].value = (s32)v[c.r.rt].value >> imm; | |
373 | v[c.r.rd].known = (s32)v[c.r.rt].known >> imm; | |
374 | v[c.r.rd].sign = (s32)v[c.r.rt].sign >> imm; | |
375 | } else { | |
376 | v[c.r.rd].known = 0; | |
377 | v[c.r.rd].sign = 0; | |
378 | } | |
379 | break; | |
380 | ||
381 | case OP_SPECIAL_ADD: | |
382 | case OP_SPECIAL_ADDU: | |
383 | if (is_known_zero(v, c.r.rs)) | |
384 | v[c.r.rd] = v[c.r.rt]; | |
385 | else if (is_known_zero(v, c.r.rt)) | |
386 | v[c.r.rd] = v[c.r.rs]; | |
387 | else | |
388 | lightrec_propagate_addi(c.r.rs, c.r.rd, &v[c.r.rt], v); | |
389 | break; | |
390 | ||
391 | case OP_SPECIAL_SUB: | |
392 | case OP_SPECIAL_SUBU: | |
393 | if (c.r.rs == c.r.rt) { | |
394 | v[c.r.rd].value = 0; | |
395 | v[c.r.rd].known = 0xffffffff; | |
396 | v[c.r.rd].sign = 0; | |
397 | } else { | |
398 | lightrec_propagate_sub(c.r.rs, c.r.rt, c.r.rd, v); | |
399 | } | |
400 | break; | |
401 | ||
402 | case OP_SPECIAL_AND: | |
403 | v[c.r.rd].known = (v[c.r.rt].known & v[c.r.rs].known) | |
404 | | (~v[c.r.rt].value & v[c.r.rt].known) | |
405 | | (~v[c.r.rs].value & v[c.r.rs].known); | |
406 | v[c.r.rd].value = v[c.r.rt].value & v[c.r.rs].value & v[c.r.rd].known; | |
407 | v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign; | |
408 | break; | |
409 | ||
410 | case OP_SPECIAL_OR: | |
411 | v[c.r.rd].known = (v[c.r.rt].known & v[c.r.rs].known) | |
412 | | (v[c.r.rt].value & v[c.r.rt].known) | |
413 | | (v[c.r.rs].value & v[c.r.rs].known); | |
414 | v[c.r.rd].value = (v[c.r.rt].value | v[c.r.rs].value) & v[c.r.rd].known; | |
415 | v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign; | |
416 | break; | |
417 | ||
418 | case OP_SPECIAL_XOR: | |
419 | v[c.r.rd].value = v[c.r.rt].value ^ v[c.r.rs].value; | |
420 | v[c.r.rd].known = v[c.r.rt].known & v[c.r.rs].known; | |
421 | v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign; | |
422 | break; | |
423 | ||
424 | case OP_SPECIAL_NOR: | |
425 | v[c.r.rd].known = (v[c.r.rt].known & v[c.r.rs].known) | |
426 | | (v[c.r.rt].value & v[c.r.rt].known) | |
427 | | (v[c.r.rs].value & v[c.r.rs].known); | |
428 | v[c.r.rd].value = ~(v[c.r.rt].value | v[c.r.rs].value) & v[c.r.rd].known; | |
429 | v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign; | |
430 | break; | |
431 | ||
432 | case OP_SPECIAL_SLT: | |
433 | case OP_SPECIAL_SLTU: | |
434 | lightrec_propagate_slt(c.r.rs, c.r.rd, | |
435 | c.r.op == OP_SPECIAL_SLT, | |
436 | &v[c.r.rt], v); | |
437 | break; | |
438 | ||
439 | case OP_SPECIAL_MULT: | |
440 | case OP_SPECIAL_MULTU: | |
441 | case OP_SPECIAL_DIV: | |
442 | case OP_SPECIAL_DIVU: | |
443 | if (OPT_FLAG_MULT_DIV && c.r.rd) { | |
444 | v[c.r.rd].known = 0; | |
445 | v[c.r.rd].sign = 0; | |
446 | } | |
447 | if (OPT_FLAG_MULT_DIV && c.r.imm) { | |
448 | v[c.r.imm].known = 0; | |
449 | v[c.r.imm].sign = 0; | |
450 | } | |
451 | break; | |
452 | ||
453 | case OP_SPECIAL_MFLO: | |
454 | case OP_SPECIAL_MFHI: | |
455 | v[c.r.rd].known = 0; | |
456 | v[c.r.rd].sign = 0; | |
457 | break; | |
cb72ea13 PC |
458 | |
459 | case OP_SPECIAL_JALR: | |
460 | v[c.r.rd].known = 0xffffffff; | |
461 | v[c.r.rd].sign = 0; | |
462 | v[c.r.rd].value = block->pc + (idx + 2 << 2); | |
463 | break; | |
464 | ||
9259d748 PC |
465 | default: |
466 | break; | |
467 | } | |
468 | break; | |
469 | ||
470 | case OP_META_MULT2: | |
471 | case OP_META_MULTU2: | |
472 | if (OPT_FLAG_MULT_DIV && c.r.rd) { | |
473 | if (c.r.op < 32) { | |
474 | v[c.r.rd].value = v[c.r.rs].value << c.r.op; | |
475 | v[c.r.rd].known = (v[c.r.rs].known << c.r.op) | |
476 | | (BIT(c.r.op) - 1); | |
477 | v[c.r.rd].sign = v[c.r.rs].sign << c.r.op; | |
478 | } else { | |
479 | v[c.r.rd].value = 0; | |
480 | v[c.r.rd].known = 0xffffffff; | |
481 | v[c.r.rd].sign = 0; | |
482 | } | |
483 | } | |
484 | ||
485 | if (OPT_FLAG_MULT_DIV && c.r.imm) { | |
486 | if (c.r.op >= 32) { | |
487 | v[c.r.imm].value = v[c.r.rs].value << c.r.op - 32; | |
488 | v[c.r.imm].known = (v[c.r.rs].known << c.r.op - 32) | |
489 | | (BIT(c.r.op - 32) - 1); | |
490 | v[c.r.imm].sign = v[c.r.rs].sign << c.r.op - 32; | |
491 | } else if (c.i.op == OP_META_MULT2) { | |
492 | v[c.r.imm].value = (s32)v[c.r.rs].value >> 32 - c.r.op; | |
493 | v[c.r.imm].known = (s32)v[c.r.rs].known >> 32 - c.r.op; | |
494 | v[c.r.imm].sign = (s32)v[c.r.rs].sign >> 32 - c.r.op; | |
495 | } else { | |
496 | v[c.r.imm].value = v[c.r.rs].value >> 32 - c.r.op; | |
497 | v[c.r.imm].known = v[c.r.rs].known >> 32 - c.r.op; | |
498 | v[c.r.imm].sign = v[c.r.rs].sign >> 32 - c.r.op; | |
499 | } | |
500 | } | |
501 | break; | |
502 | ||
503 | case OP_REGIMM: | |
504 | break; | |
505 | ||
506 | case OP_ADDI: | |
507 | case OP_ADDIU: | |
508 | if (c.i.imm) { | |
509 | struct constprop_data d = { | |
510 | .value = (s32)(s16)c.i.imm, | |
511 | .known = 0xffffffff, | |
512 | .sign = 0, | |
513 | }; | |
514 | ||
515 | lightrec_propagate_addi(c.i.rs, c.i.rt, &d, v); | |
516 | } else { | |
517 | /* immediate is zero - that's just a register copy. */ | |
518 | v[c.i.rt] = v[c.i.rs]; | |
519 | } | |
520 | break; | |
521 | ||
522 | case OP_SLTI: | |
523 | case OP_SLTIU: | |
524 | { | |
525 | struct constprop_data d = { | |
526 | .value = (s32)(s16)c.i.imm, | |
527 | .known = 0xffffffff, | |
528 | .sign = 0, | |
529 | }; | |
530 | ||
531 | lightrec_propagate_slt(c.i.rs, c.i.rt, | |
532 | c.i.op == OP_SLTI, &d, v); | |
533 | } | |
534 | break; | |
535 | ||
536 | case OP_ANDI: | |
537 | v[c.i.rt].value = v[c.i.rs].value & c.i.imm; | |
538 | v[c.i.rt].known = v[c.i.rs].known | ~c.i.imm; | |
539 | v[c.i.rt].sign = 0; | |
540 | break; | |
541 | ||
542 | case OP_ORI: | |
543 | v[c.i.rt].value = v[c.i.rs].value | c.i.imm; | |
544 | v[c.i.rt].known = v[c.i.rs].known | c.i.imm; | |
545 | v[c.i.rt].sign = (v[c.i.rs].sign & 0xffff) ? 0xffff0000 : v[c.i.rs].sign; | |
546 | break; | |
547 | ||
548 | case OP_XORI: | |
549 | v[c.i.rt].value = v[c.i.rs].value ^ c.i.imm; | |
550 | v[c.i.rt].known = v[c.i.rs].known; | |
551 | v[c.i.rt].sign = (v[c.i.rs].sign & 0xffff) ? 0xffff0000 : v[c.i.rs].sign; | |
552 | break; | |
553 | ||
554 | case OP_LUI: | |
555 | v[c.i.rt].value = c.i.imm << 16; | |
556 | v[c.i.rt].known = 0xffffffff; | |
557 | v[c.i.rt].sign = 0; | |
558 | break; | |
559 | ||
560 | case OP_CP0: | |
561 | switch (c.r.rs) { | |
562 | case OP_CP0_MFC0: | |
563 | case OP_CP0_CFC0: | |
564 | v[c.r.rt].known = 0; | |
565 | v[c.r.rt].sign = 0; | |
566 | break; | |
567 | default: | |
568 | break; | |
569 | } | |
570 | break; | |
571 | ||
572 | case OP_CP2: | |
573 | if (c.r.op == OP_CP2_BASIC) { | |
574 | switch (c.r.rs) { | |
575 | case OP_CP2_BASIC_MFC2: | |
576 | switch (c.r.rd) { | |
577 | case 1: | |
578 | case 3: | |
579 | case 5: | |
580 | case 8: | |
581 | case 9: | |
582 | case 10: | |
583 | case 11: | |
584 | /* Signed 16-bit */ | |
585 | v[c.r.rt].known = 0; | |
586 | v[c.r.rt].sign = 0xffff8000; | |
587 | break; | |
588 | case 7: | |
589 | case 16: | |
590 | case 17: | |
591 | case 18: | |
592 | case 19: | |
593 | /* Unsigned 16-bit */ | |
594 | v[c.r.rt].value = 0; | |
595 | v[c.r.rt].known = 0xffff0000; | |
596 | v[c.r.rt].sign = 0; | |
597 | break; | |
598 | default: | |
599 | /* 32-bit */ | |
600 | v[c.r.rt].known = 0; | |
601 | v[c.r.rt].sign = 0; | |
602 | break; | |
603 | } | |
604 | break; | |
605 | case OP_CP2_BASIC_CFC2: | |
606 | switch (c.r.rd) { | |
607 | case 4: | |
608 | case 12: | |
609 | case 20: | |
610 | case 26: | |
611 | case 27: | |
612 | case 29: | |
613 | case 30: | |
614 | /* Signed 16-bit */ | |
615 | v[c.r.rt].known = 0; | |
616 | v[c.r.rt].sign = 0xffff8000; | |
617 | break; | |
618 | default: | |
619 | /* 32-bit */ | |
620 | v[c.r.rt].known = 0; | |
621 | v[c.r.rt].sign = 0; | |
622 | break; | |
623 | } | |
624 | break; | |
625 | } | |
626 | } | |
627 | break; | |
628 | case OP_LB: | |
629 | v[c.i.rt].known = 0; | |
630 | v[c.i.rt].sign = 0xffffff80; | |
631 | break; | |
632 | case OP_LH: | |
633 | v[c.i.rt].known = 0; | |
634 | v[c.i.rt].sign = 0xffff8000; | |
635 | break; | |
636 | case OP_LBU: | |
637 | v[c.i.rt].value = 0; | |
638 | v[c.i.rt].known = 0xffffff00; | |
639 | v[c.i.rt].sign = 0; | |
640 | break; | |
641 | case OP_LHU: | |
642 | v[c.i.rt].value = 0; | |
643 | v[c.i.rt].known = 0xffff0000; | |
644 | v[c.i.rt].sign = 0; | |
645 | break; | |
646 | case OP_LWL: | |
647 | case OP_LWR: | |
648 | /* LWL/LWR don't write the full register if the address is | |
649 | * unaligned, so we only need to know the low 2 bits */ | |
650 | if (v[c.i.rs].known & 0x3) { | |
651 | imm = (v[c.i.rs].value & 0x3) * 8; | |
652 | ||
653 | if (c.i.op == OP_LWL) { | |
654 | imm = BIT(24 - imm) - 1; | |
655 | v[c.i.rt].sign &= ~imm; | |
656 | } else { | |
657 | imm = imm ? GENMASK(31, 32 - imm) : 0; | |
658 | v[c.i.rt].sign = 0; | |
659 | } | |
cb72ea13 | 660 | v[c.i.rt].known &= imm; |
9259d748 PC |
661 | break; |
662 | } | |
663 | fallthrough; | |
664 | case OP_LW: | |
665 | v[c.i.rt].known = 0; | |
666 | v[c.i.rt].sign = 0; | |
667 | break; | |
cb72ea13 PC |
668 | case OP_META: |
669 | switch (c.m.op) { | |
670 | case OP_META_MOV: | |
671 | v[c.m.rd] = v[c.m.rs]; | |
672 | break; | |
9259d748 | 673 | |
cb72ea13 PC |
674 | case OP_META_EXTC: |
675 | v[c.m.rd].value = (s32)(s8)v[c.m.rs].value; | |
676 | if (v[c.m.rs].known & BIT(7)) { | |
677 | v[c.m.rd].known = v[c.m.rs].known | 0xffffff00; | |
678 | v[c.m.rd].sign = 0; | |
679 | } else { | |
680 | v[c.m.rd].known = v[c.m.rs].known & 0x7f; | |
681 | v[c.m.rd].sign = 0xffffff80; | |
682 | } | |
683 | break; | |
684 | ||
685 | case OP_META_EXTS: | |
686 | v[c.m.rd].value = (s32)(s16)v[c.m.rs].value; | |
687 | if (v[c.m.rs].known & BIT(15)) { | |
688 | v[c.m.rd].known = v[c.m.rs].known | 0xffff0000; | |
689 | v[c.m.rd].sign = 0; | |
690 | } else { | |
691 | v[c.m.rd].known = v[c.m.rs].known & 0x7fff; | |
692 | v[c.m.rd].sign = 0xffff8000; | |
693 | } | |
694 | break; | |
695 | ||
696 | case OP_META_COM: | |
697 | v[c.m.rd].known = v[c.m.rs].known; | |
698 | v[c.m.rd].value = ~v[c.m.rs].value; | |
699 | v[c.m.rd].sign = v[c.m.rs].sign; | |
700 | break; | |
701 | default: | |
702 | break; | |
9259d748 PC |
703 | } |
704 | break; | |
cb72ea13 PC |
705 | case OP_JAL: |
706 | v[31].known = 0xffffffff; | |
707 | v[31].sign = 0; | |
708 | v[31].value = block->pc + (idx + 2 << 2); | |
709 | break; | |
9259d748 PC |
710 | |
711 | default: | |
712 | break; | |
713 | } | |
714 | ||
715 | /* Reset register 0 which may have been used as a target */ | |
716 | v[0].value = 0; | |
717 | v[0].sign = 0; | |
718 | v[0].known = 0xffffffff; | |
719 | } | |
720 | ||
721 | enum psx_map | |
722 | lightrec_get_constprop_map(const struct lightrec_state *state, | |
723 | const struct constprop_data *v, u8 reg, s16 imm) | |
724 | { | |
725 | const struct lightrec_mem_map *map; | |
726 | unsigned int i; | |
727 | u32 min, max; | |
728 | ||
729 | min = get_min_value(&v[reg]) + imm; | |
730 | max = get_max_value(&v[reg]) + imm; | |
731 | ||
732 | /* Handle the case where max + imm overflows */ | |
733 | if ((min & 0xe0000000) != (max & 0xe0000000)) | |
734 | return PSX_MAP_UNKNOWN; | |
735 | ||
736 | pr_debug("Min: 0x%08x max: 0x%08x Known: 0x%08x Sign: 0x%08x\n", | |
737 | min, max, v[reg].known, v[reg].sign); | |
738 | ||
739 | min = kunseg(min); | |
740 | max = kunseg(max); | |
741 | ||
742 | for (i = 0; i < state->nb_maps; i++) { | |
743 | map = &state->maps[i]; | |
744 | ||
745 | if (min >= map->pc && min < map->pc + map->length | |
746 | && max >= map->pc && max < map->pc + map->length) | |
747 | return (enum psx_map) i; | |
748 | } | |
749 | ||
750 | return PSX_MAP_UNKNOWN; | |
751 | } |