f535537f |
1 | /* Bra.c -- Branch converters for RISC code |
2 | 2024-01-20 : Igor Pavlov : Public domain */ |
3 | |
4 | #include "Precomp.h" |
5 | |
6 | #include "Bra.h" |
7 | #include "RotateDefs.h" |
8 | #include "CpuArch.h" |
9 | |
10 | #if defined(MY_CPU_SIZEOF_POINTER) \ |
11 | && ( MY_CPU_SIZEOF_POINTER == 4 \ |
12 | || MY_CPU_SIZEOF_POINTER == 8) |
13 | #define BR_CONV_USE_OPT_PC_PTR |
14 | #endif |
15 | |
16 | #ifdef BR_CONV_USE_OPT_PC_PTR |
17 | #define BR_PC_INIT pc -= (UInt32)(SizeT)p; |
18 | #define BR_PC_GET (pc + (UInt32)(SizeT)p) |
19 | #else |
20 | #define BR_PC_INIT pc += (UInt32)size; |
21 | #define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) |
22 | // #define BR_PC_INIT |
23 | // #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) |
24 | #endif |
25 | |
26 | #define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; |
27 | // #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; |
28 | |
29 | #define Z7_BRANCH_CONV(name) z7_ ## name |
30 | |
31 | #define Z7_BRANCH_FUNC_MAIN(name) \ |
32 | static \ |
33 | Z7_FORCE_INLINE \ |
34 | Z7_ATTRIB_NO_VECTOR \ |
35 | Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding) |
36 | |
37 | #define Z7_BRANCH_FUNC_IMP(name, m, encoding) \ |
38 | Z7_NO_INLINE \ |
39 | Z7_ATTRIB_NO_VECTOR \ |
40 | Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \ |
41 | { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \ |
42 | |
43 | #ifdef Z7_EXTRACT_ONLY |
44 | #define Z7_BRANCH_FUNCS_IMP(name) \ |
45 | Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) |
46 | #else |
47 | #define Z7_BRANCH_FUNCS_IMP(name) \ |
48 | Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \ |
49 | Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1) |
50 | #endif |
51 | |
52 | #if defined(__clang__) |
53 | #define BR_EXTERNAL_FOR |
54 | #define BR_NEXT_ITERATION continue; |
55 | #else |
56 | #define BR_EXTERNAL_FOR for (;;) |
57 | #define BR_NEXT_ITERATION break; |
58 | #endif |
59 | |
60 | #if defined(__clang__) && (__clang_major__ >= 8) \ |
61 | || defined(__GNUC__) && (__GNUC__ >= 1000) \ |
62 | // GCC is not good for __builtin_expect() here |
63 | /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */ |
64 | // #define Z7_unlikely [[unlikely]] |
65 | // #define Z7_LIKELY(x) (__builtin_expect((x), 1)) |
66 | #define Z7_UNLIKELY(x) (__builtin_expect((x), 0)) |
67 | // #define Z7_likely [[likely]] |
68 | #else |
69 | // #define Z7_LIKELY(x) (x) |
70 | #define Z7_UNLIKELY(x) (x) |
71 | // #define Z7_likely |
72 | #endif |
73 | |
74 | |
75 | Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64) |
76 | { |
77 | // Byte *p = data; |
78 | const Byte *lim; |
79 | const UInt32 flag = (UInt32)1 << (24 - 4); |
80 | const UInt32 mask = ((UInt32)1 << 24) - (flag << 1); |
81 | size &= ~(SizeT)3; |
82 | // if (size == 0) return p; |
83 | lim = p + size; |
84 | BR_PC_INIT |
85 | pc -= 4; // because (p) will point to next instruction |
86 | |
87 | BR_EXTERNAL_FOR |
88 | { |
89 | // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE |
90 | for (;;) |
91 | { |
92 | UInt32 v; |
93 | if Z7_UNLIKELY(p == lim) |
94 | return p; |
95 | v = GetUi32a(p); |
96 | p += 4; |
97 | if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0) |
98 | { |
99 | UInt32 c = BR_PC_GET >> 2; |
100 | BR_CONVERT_VAL(v, c) |
101 | v &= 0x03ffffff; |
102 | v |= 0x94000000; |
103 | SetUi32a(p - 4, v) |
104 | BR_NEXT_ITERATION |
105 | } |
106 | // v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0) |
107 | v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0) |
108 | { |
109 | UInt32 z, c; |
110 | // v = rotrFixed(v, 8); |
111 | v += flag; if Z7_UNLIKELY(v & mask) continue; |
112 | z = (v & 0xffffffe0) | (v >> 26); |
113 | c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7; |
114 | BR_CONVERT_VAL(z, c) |
115 | v &= 0x1f; |
116 | v |= 0x90000000; |
117 | v |= z << 26; |
118 | v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag); |
119 | SetUi32a(p - 4, v) |
120 | } |
121 | } |
122 | } |
123 | } |
124 | Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64) |
125 | |
126 | |
127 | Z7_BRANCH_FUNC_MAIN(BranchConv_ARM) |
128 | { |
129 | // Byte *p = data; |
130 | const Byte *lim; |
131 | size &= ~(SizeT)3; |
132 | lim = p + size; |
133 | BR_PC_INIT |
134 | /* in ARM: branch offset is relative to the +2 instructions from current instruction. |
135 | (p) will point to next instruction */ |
136 | pc += 8 - 4; |
137 | |
138 | for (;;) |
139 | { |
140 | for (;;) |
141 | { |
142 | if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; |
143 | if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; |
144 | } |
145 | { |
146 | UInt32 v = GetUi32a(p - 4); |
147 | UInt32 c = BR_PC_GET >> 2; |
148 | BR_CONVERT_VAL(v, c) |
149 | v &= 0x00ffffff; |
150 | v |= 0xeb000000; |
151 | SetUi32a(p - 4, v) |
152 | } |
153 | } |
154 | } |
155 | Z7_BRANCH_FUNCS_IMP(BranchConv_ARM) |
156 | |
157 | |
158 | Z7_BRANCH_FUNC_MAIN(BranchConv_PPC) |
159 | { |
160 | // Byte *p = data; |
161 | const Byte *lim; |
162 | size &= ~(SizeT)3; |
163 | lim = p + size; |
164 | BR_PC_INIT |
165 | pc -= 4; // because (p) will point to next instruction |
166 | |
167 | for (;;) |
168 | { |
169 | UInt32 v; |
170 | for (;;) |
171 | { |
172 | if Z7_UNLIKELY(p == lim) |
173 | return p; |
174 | // v = GetBe32a(p); |
175 | v = *(UInt32 *)(void *)p; |
176 | p += 4; |
177 | // if ((v & 0xfc000003) == 0x48000001) break; |
178 | // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break; |
179 | if Z7_UNLIKELY( |
180 | ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001)) |
181 | & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break; |
182 | } |
183 | { |
184 | v = Z7_CONV_NATIVE_TO_BE_32(v); |
185 | { |
186 | UInt32 c = BR_PC_GET; |
187 | BR_CONVERT_VAL(v, c) |
188 | } |
189 | v &= 0x03ffffff; |
190 | v |= 0x48000000; |
191 | SetBe32a(p - 4, v) |
192 | } |
193 | } |
194 | } |
195 | Z7_BRANCH_FUNCS_IMP(BranchConv_PPC) |
196 | |
197 | |
198 | #ifdef Z7_CPU_FAST_ROTATE_SUPPORTED |
199 | #define BR_SPARC_USE_ROTATE |
200 | #endif |
201 | |
202 | Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC) |
203 | { |
204 | // Byte *p = data; |
205 | const Byte *lim; |
206 | const UInt32 flag = (UInt32)1 << 22; |
207 | size &= ~(SizeT)3; |
208 | lim = p + size; |
209 | BR_PC_INIT |
210 | pc -= 4; // because (p) will point to next instruction |
211 | for (;;) |
212 | { |
213 | UInt32 v; |
214 | for (;;) |
215 | { |
216 | if Z7_UNLIKELY(p == lim) |
217 | return p; |
218 | /* // the code without GetBe32a(): |
219 | { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; } |
220 | */ |
221 | v = GetBe32a(p); |
222 | p += 4; |
223 | #ifdef BR_SPARC_USE_ROTATE |
224 | v = rotlFixed(v, 2); |
225 | v += (flag << 2) - 1; |
226 | if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0) |
227 | #else |
228 | v += (UInt32)5 << 29; |
229 | v ^= (UInt32)7 << 29; |
230 | v += flag; |
231 | if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0) |
232 | #endif |
233 | break; |
234 | } |
235 | { |
236 | // UInt32 v = GetBe32a(p - 4); |
237 | #ifndef BR_SPARC_USE_ROTATE |
238 | v <<= 2; |
239 | #endif |
240 | { |
241 | UInt32 c = BR_PC_GET; |
242 | BR_CONVERT_VAL(v, c) |
243 | } |
244 | v &= (flag << 3) - 1; |
245 | #ifdef BR_SPARC_USE_ROTATE |
246 | v -= (flag << 2) - 1; |
247 | v = rotrFixed(v, 2); |
248 | #else |
249 | v -= (flag << 2); |
250 | v >>= 2; |
251 | v |= (UInt32)1 << 30; |
252 | #endif |
253 | SetBe32a(p - 4, v) |
254 | } |
255 | } |
256 | } |
257 | Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC) |
258 | |
259 | |
260 | Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT) |
261 | { |
262 | // Byte *p = data; |
263 | Byte *lim; |
264 | size &= ~(SizeT)1; |
265 | // if (size == 0) return p; |
266 | if (size <= 2) return p; |
267 | size -= 2; |
268 | lim = p + size; |
269 | BR_PC_INIT |
270 | /* in ARM: branch offset is relative to the +2 instructions from current instruction. |
271 | (p) will point to the +2 instructions from current instruction */ |
272 | // pc += 4 - 4; |
273 | // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1; |
274 | // #define ARMT_TAIL_PROC { goto armt_tail; } |
275 | #define ARMT_TAIL_PROC { return p; } |
276 | |
277 | do |
278 | { |
279 | /* in MSVC 32-bit x86 compilers: |
280 | UInt32 version : it loads value from memory with movzx |
281 | Byte version : it loads value to 8-bit register (AL/CL) |
282 | movzx version is slightly faster in some cpus |
283 | */ |
284 | unsigned b1; |
285 | // Byte / unsigned |
286 | b1 = p[1]; |
287 | // optimized version to reduce one (p >= lim) check: |
288 | // unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8) |
289 | for (;;) |
290 | { |
291 | unsigned b3; // Byte / UInt32 |
292 | /* (Byte)(b3) normalization can use low byte computations in MSVC. |
293 | It gives smaller code, and no loss of speed in some compilers/cpus. |
294 | But new MSVC 32-bit x86 compilers use more slow load |
295 | from memory to low byte register in that case. |
296 | So we try to use full 32-bit computations for faster code. |
297 | */ |
298 | // if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break; |
299 | if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break; |
300 | if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break; |
301 | } |
302 | { |
303 | /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation. |
304 | But gcc/clang for arm64 can use bfi instruction for full code here */ |
305 | UInt32 v = |
306 | ((UInt32)GetUi16a(p - 2) << 11) | |
307 | ((UInt32)GetUi16a(p) & 0x7FF); |
308 | /* |
309 | UInt32 v = |
310 | ((UInt32)p[1 - 2] << 19) |
311 | + (((UInt32)p[1] & 0x7) << 8) |
312 | + (((UInt32)p[-2] << 11)) |
313 | + (p[0]); |
314 | */ |
315 | p += 2; |
316 | { |
317 | UInt32 c = BR_PC_GET >> 1; |
318 | BR_CONVERT_VAL(v, c) |
319 | } |
320 | SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000)) |
321 | SetUi16a(p - 2, (UInt16)(v | 0xf800)) |
322 | /* |
323 | p[-4] = (Byte)(v >> 11); |
324 | p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7)); |
325 | p[-2] = (Byte)v; |
326 | p[-1] = (Byte)(0xf8 | (v >> 8)); |
327 | */ |
328 | } |
329 | } |
330 | while (p < lim); |
331 | return p; |
332 | // armt_tail: |
333 | // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim; |
334 | // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2)); |
335 | // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2)); |
336 | // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2)); |
337 | } |
338 | Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT) |
339 | |
340 | |
341 | // #define BR_IA64_NO_INLINE |
342 | |
343 | Z7_BRANCH_FUNC_MAIN(BranchConv_IA64) |
344 | { |
345 | // Byte *p = data; |
346 | const Byte *lim; |
347 | size &= ~(SizeT)15; |
348 | lim = p + size; |
349 | pc -= 1 << 4; |
350 | pc >>= 4 - 1; |
351 | // pc -= 1 << 1; |
352 | |
353 | for (;;) |
354 | { |
355 | unsigned m; |
356 | for (;;) |
357 | { |
358 | if Z7_UNLIKELY(p == lim) |
359 | return p; |
360 | m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e)); |
361 | p += 16; |
362 | pc += 1 << 1; |
363 | if (m &= 3) |
364 | break; |
365 | } |
366 | { |
367 | p += (ptrdiff_t)m * 5 - 20; // negative value is expected here. |
368 | do |
369 | { |
370 | const UInt32 t = |
371 | #if defined(MY_CPU_X86_OR_AMD64) |
372 | // we use 32-bit load here to reduce code size on x86: |
373 | GetUi32(p); |
374 | #else |
375 | GetUi16(p); |
376 | #endif |
377 | UInt32 z = GetUi32(p + 1) >> m; |
378 | p += 5; |
379 | if (((t >> m) & (0x70 << 1)) == 0 |
380 | && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0) |
381 | { |
382 | UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z; |
383 | z ^= v; |
384 | #ifdef BR_IA64_NO_INLINE |
385 | v |= (v & ((UInt32)1 << (23 + 1))) >> 3; |
386 | { |
387 | UInt32 c = pc; |
388 | BR_CONVERT_VAL(v, c) |
389 | } |
390 | v &= (0x1fffff << 1) | 1; |
391 | #else |
392 | { |
393 | if (encoding) |
394 | { |
395 | // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits |
396 | pc &= (0x1fffff << 1) | 1; |
397 | v += pc; |
398 | } |
399 | else |
400 | { |
401 | // pc |= 0xc00000 << 1; // we need to set at least 2 bits |
402 | pc |= ~(UInt32)((0x1fffff << 1) | 1); |
403 | v -= pc; |
404 | } |
405 | } |
406 | v &= ~(UInt32)(0x600000 << 1); |
407 | #endif |
408 | v += (0x700000 << 1); |
409 | v &= (0x8fffff << 1) | 1; |
410 | z |= v; |
411 | z <<= m; |
412 | SetUi32(p + 1 - 5, z) |
413 | } |
414 | m++; |
415 | } |
416 | while (m &= 3); // while (m < 4); |
417 | } |
418 | } |
419 | } |
420 | Z7_BRANCH_FUNCS_IMP(BranchConv_IA64) |
421 | |
422 | |
423 | #define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET; |
424 | #define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET; |
425 | |
426 | #if 1 && defined(MY_CPU_LE_UNALIGN) |
427 | #define RISCV_USE_UNALIGNED_LOAD |
428 | #endif |
429 | |
430 | #ifdef RISCV_USE_UNALIGNED_LOAD |
431 | #define RISCV_GET_UI32(p) GetUi32(p) |
432 | #define RISCV_SET_UI32(p, v) { SetUi32(p, v) } |
433 | #else |
434 | #define RISCV_GET_UI32(p) \ |
435 | ((UInt32)GetUi16a(p) + \ |
436 | ((UInt32)GetUi16a((p) + 2) << 16)) |
437 | #define RISCV_SET_UI32(p, v) { \ |
438 | SetUi16a(p, (UInt16)(v)) \ |
439 | SetUi16a((p) + 2, (UInt16)(v >> 16)) } |
440 | #endif |
441 | |
442 | #if 1 && defined(MY_CPU_LE) |
443 | #define RISCV_USE_16BIT_LOAD |
444 | #endif |
445 | |
446 | #ifdef RISCV_USE_16BIT_LOAD |
447 | #define RISCV_LOAD_VAL(p) GetUi16a(p) |
448 | #else |
449 | #define RISCV_LOAD_VAL(p) (*(p)) |
450 | #endif |
451 | |
452 | #define RISCV_INSTR_SIZE 2 |
453 | #define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE) |
454 | #define RISCV_STEP_2 4 |
455 | #define RISCV_REG_VAL (2 << 7) |
456 | #define RISCV_CMD_VAL 3 |
457 | #if 1 |
458 | // for code size optimization: |
459 | #define RISCV_DELTA_7F 0x7f |
460 | #else |
461 | #define RISCV_DELTA_7F 0 |
462 | #endif |
463 | |
464 | #define RISCV_CHECK_1(v, b) \ |
465 | (((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0) |
466 | |
467 | #if 1 |
468 | #define RISCV_CHECK_2(v, r) \ |
469 | ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \ |
470 | << 18) \ |
471 | < ((r) & 0x1d)) |
472 | #else |
473 | // this branch gives larger code, because |
474 | // compilers generate larger code for big constants. |
475 | #define RISCV_CHECK_2(v, r) \ |
476 | ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \ |
477 | & ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \ |
478 | < ((r) & 0x1d)) |
479 | #endif |
480 | |
481 | |
482 | #define RISCV_SCAN_LOOP \ |
483 | Byte *lim; \ |
484 | size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \ |
485 | if (size <= 6) return p; \ |
486 | size -= 6; \ |
487 | lim = p + size; \ |
488 | BR_PC_INIT \ |
489 | for (;;) \ |
490 | { \ |
491 | UInt32 a, v; \ |
492 | /* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \ |
493 | for (;;) \ |
494 | { \ |
495 | if Z7_UNLIKELY(p >= lim) { return p; } \ |
496 | a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \ |
497 | if ((a & 0x77) == 0) break; \ |
498 | a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \ |
499 | p += RISCV_INSTR_SIZE * 2; \ |
500 | if ((a & 0x77) == 0) \ |
501 | { \ |
502 | p -= RISCV_INSTR_SIZE; \ |
503 | if Z7_UNLIKELY(p >= lim) { return p; } \ |
504 | break; \ |
505 | } \ |
506 | } |
507 | // (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL |
508 | // (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL |
509 | // (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC |
510 | // (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC |
511 | |
512 | Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc) |
513 | { |
514 | RISCV_SCAN_LOOP |
515 | v = a; |
516 | a = RISCV_GET_UI32(p); |
517 | #ifndef RISCV_USE_16BIT_LOAD |
518 | v += (UInt32)p[1] << 8; |
519 | #endif |
520 | |
521 | if ((v & 8) == 0) // JAL |
522 | { |
523 | if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80) |
524 | { |
525 | p += RISCV_INSTR_SIZE; |
526 | continue; |
527 | } |
528 | { |
529 | v = ((a & 1u << 31) >> 11) |
530 | | ((a & 0x3ff << 21) >> 20) |
531 | | ((a & 1 << 20) >> 9) |
532 | | (a & 0xff << 12); |
533 | BR_CONVERT_VAL_ENC(v) |
534 | // ((v & 1) == 0) |
535 | // v: bits [1 : 20] contain offset bits |
536 | #if 0 && defined(RISCV_USE_UNALIGNED_LOAD) |
537 | a &= 0xfff; |
538 | a |= ((UInt32)(v << 23)) |
539 | | ((UInt32)(v << 7) & ((UInt32)0xff << 16)) |
540 | | ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8)); |
541 | RISCV_SET_UI32(p, a) |
542 | #else // aligned |
543 | #if 0 |
544 | SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff))) |
545 | #else |
546 | p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf)); |
547 | #endif |
548 | |
549 | #if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) |
550 | v <<= 15; |
551 | v = Z7_BSWAP32(v); |
552 | SetUi16a(p + 2, (UInt16)v) |
553 | #else |
554 | p[2] = (Byte)(v >> 9); |
555 | p[3] = (Byte)(v >> 1); |
556 | #endif |
557 | #endif // aligned |
558 | } |
559 | p += 4; |
560 | continue; |
561 | } // JAL |
562 | |
563 | { |
564 | // AUIPC |
565 | if (v & 0xe80) // (not x0) and (not x2) |
566 | { |
567 | const UInt32 b = RISCV_GET_UI32(p + 4); |
568 | if (RISCV_CHECK_1(v, b)) |
569 | { |
570 | { |
571 | const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL); |
572 | RISCV_SET_UI32(p, temp) |
573 | } |
574 | a &= 0xfffff000; |
575 | { |
576 | #if 1 |
577 | const int t = -1 >> 1; |
578 | if (t != -1) |
579 | a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation |
580 | else |
581 | #endif |
582 | a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension). |
583 | } |
584 | BR_CONVERT_VAL_ENC(a) |
585 | #if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) |
586 | a = Z7_BSWAP32(a); |
587 | RISCV_SET_UI32(p + 4, a) |
588 | #else |
589 | SetBe32(p + 4, a) |
590 | #endif |
591 | p += 8; |
592 | } |
593 | else |
594 | p += RISCV_STEP_1; |
595 | } |
596 | else |
597 | { |
598 | UInt32 r = a >> 27; |
599 | if (RISCV_CHECK_2(v, r)) |
600 | { |
601 | v = RISCV_GET_UI32(p + 4); |
602 | r = (r << 7) + 0x17 + (v & 0xfffff000); |
603 | a = (a >> 12) | (v << 20); |
604 | RISCV_SET_UI32(p, r) |
605 | RISCV_SET_UI32(p + 4, a) |
606 | p += 8; |
607 | } |
608 | else |
609 | p += RISCV_STEP_2; |
610 | } |
611 | } |
612 | } // for |
613 | } |
614 | |
615 | |
616 | Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc) |
617 | { |
618 | RISCV_SCAN_LOOP |
619 | #ifdef RISCV_USE_16BIT_LOAD |
620 | if ((a & 8) == 0) |
621 | { |
622 | #else |
623 | v = a; |
624 | a += (UInt32)p[1] << 8; |
625 | if ((v & 8) == 0) |
626 | { |
627 | #endif |
628 | // JAL |
629 | a -= 0x100 - RISCV_DELTA_7F; |
630 | if (a & 0xd80) |
631 | { |
632 | p += RISCV_INSTR_SIZE; |
633 | continue; |
634 | } |
635 | { |
636 | const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff; |
637 | #if 0 // unaligned |
638 | a = GetUi32(p); |
639 | v = (UInt32)(a >> 23) & ((UInt32)0xff << 1) |
640 | | (UInt32)(a >> 7) & ((UInt32)0xff << 9) |
641 | #elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) |
642 | v = GetUi16a(p + 2); |
643 | v = Z7_BSWAP32(v) >> 15 |
644 | #else |
645 | v = (UInt32)p[3] << 1 |
646 | | (UInt32)p[2] << 9 |
647 | #endif |
648 | | (UInt32)((a & 0xf000) << 5); |
649 | BR_CONVERT_VAL_DEC(v) |
650 | a = a_old |
651 | | (v << 11 & 1u << 31) |
652 | | (v << 20 & 0x3ff << 21) |
653 | | (v << 9 & 1 << 20) |
654 | | (v & 0xff << 12); |
655 | RISCV_SET_UI32(p, a) |
656 | } |
657 | p += 4; |
658 | continue; |
659 | } // JAL |
660 | |
661 | { |
662 | // AUIPC |
663 | v = a; |
664 | #if 1 && defined(RISCV_USE_UNALIGNED_LOAD) |
665 | a = GetUi32(p); |
666 | #else |
667 | a |= (UInt32)GetUi16a(p + 2) << 16; |
668 | #endif |
669 | if ((v & 0xe80) == 0) // x0/x2 |
670 | { |
671 | const UInt32 r = a >> 27; |
672 | if (RISCV_CHECK_2(v, r)) |
673 | { |
674 | UInt32 b; |
675 | #if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) |
676 | b = RISCV_GET_UI32(p + 4); |
677 | b = Z7_BSWAP32(b); |
678 | #else |
679 | b = GetBe32(p + 4); |
680 | #endif |
681 | v = a >> 12; |
682 | BR_CONVERT_VAL_DEC(b) |
683 | a = (r << 7) + 0x17; |
684 | a += (b + 0x800) & 0xfffff000; |
685 | v |= b << 20; |
686 | RISCV_SET_UI32(p, a) |
687 | RISCV_SET_UI32(p + 4, v) |
688 | p += 8; |
689 | } |
690 | else |
691 | p += RISCV_STEP_2; |
692 | } |
693 | else |
694 | { |
695 | const UInt32 b = RISCV_GET_UI32(p + 4); |
696 | if (!RISCV_CHECK_1(v, b)) |
697 | p += RISCV_STEP_1; |
698 | else |
699 | { |
700 | v = (a & 0xfffff000) | (b >> 20); |
701 | a = (b << 12) | (0x17 + RISCV_REG_VAL); |
702 | RISCV_SET_UI32(p, a) |
703 | RISCV_SET_UI32(p + 4, v) |
704 | p += 8; |
705 | } |
706 | } |
707 | } |
708 | } // for |
709 | } |