f535537f |
1 | /* Bra86.c -- Branch converter for X86 code (BCJ) |
2 | 2023-04-02 : Igor Pavlov : Public domain */ |
3 | |
4 | #include "Precomp.h" |
5 | |
6 | #include "Bra.h" |
7 | #include "CpuArch.h" |
8 | |
9 | |
10 | #if defined(MY_CPU_SIZEOF_POINTER) \ |
11 | && ( MY_CPU_SIZEOF_POINTER == 4 \ |
12 | || MY_CPU_SIZEOF_POINTER == 8) |
13 | #define BR_CONV_USE_OPT_PC_PTR |
14 | #endif |
15 | |
16 | #ifdef BR_CONV_USE_OPT_PC_PTR |
17 | #define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t) |
18 | #define BR_PC_GET (pc + (UInt32)(SizeT)p) |
19 | #else |
20 | #define BR_PC_INIT pc += (UInt32)size; |
21 | #define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) |
22 | // #define BR_PC_INIT |
23 | // #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) |
24 | #endif |
25 | |
26 | #define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; |
27 | // #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; |
28 | |
29 | #define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name |
30 | |
31 | #define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0) |
32 | |
33 | #ifdef MY_CPU_LE_UNALIGN |
34 | #define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8; |
35 | #define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0) |
36 | #else |
37 | #define BR86_PREPARE_BCJ_SCAN |
38 | // bad for MSVC X86 (partial write to byte reg): |
39 | #define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8) |
40 | // bad for old MSVC (partial write to byte reg): |
41 | // #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0) |
42 | #endif |
43 | |
44 | static |
45 | Z7_FORCE_INLINE |
46 | Z7_ATTRIB_NO_VECTOR |
47 | Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding) |
48 | { |
49 | if (size < 5) |
50 | return p; |
51 | { |
52 | // Byte *p = data; |
53 | const Byte *lim = p + size - 4; |
54 | unsigned mask = (unsigned)*state; // & 7; |
55 | #ifdef BR_CONV_USE_OPT_PC_PTR |
56 | /* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4), |
57 | because call/jump offset is relative to the next instruction. |
58 | if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4), |
59 | because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before. |
60 | */ |
61 | pc += 4; |
62 | #endif |
63 | BR_PC_INIT |
64 | goto start; |
65 | |
66 | for (;; mask |= 4) |
67 | { |
68 | // cont: mask |= 4; |
69 | start: |
70 | if (p >= lim) |
71 | goto fin; |
72 | { |
73 | BR86_PREPARE_BCJ_SCAN |
74 | p += 4; |
75 | if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1; |
76 | if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1; |
77 | if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0; |
78 | if (BR86_IS_BCJ_BYTE(3)) { goto a3; } |
79 | } |
80 | goto main_loop; |
81 | |
82 | m0: p--; |
83 | m1: p--; |
84 | m2: p--; |
85 | if (mask == 0) |
86 | goto a3; |
87 | if (p > lim) |
88 | goto fin_p; |
89 | |
90 | // if (((0x17u >> mask) & 1) == 0) |
91 | if (mask > 4 || mask == 3) |
92 | { |
93 | mask >>= 1; |
94 | continue; // goto cont; |
95 | } |
96 | mask >>= 1; |
97 | if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask])) |
98 | continue; // goto cont; |
99 | // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; |
100 | { |
101 | UInt32 v = GetUi32(p); |
102 | UInt32 c; |
103 | v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; |
104 | c = BR_PC_GET; |
105 | BR_CONVERT_VAL(v, c) |
106 | { |
107 | mask <<= 3; |
108 | if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask)) |
109 | { |
110 | v ^= (((UInt32)0x100 << mask) - 1); |
111 | #ifdef MY_CPU_X86 |
112 | // for X86 : we can recalculate (c) to reduce register pressure |
113 | c = BR_PC_GET; |
114 | #endif |
115 | BR_CONVERT_VAL(v, c) |
116 | } |
117 | mask = 0; |
118 | } |
119 | // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); |
120 | v &= (1 << 25) - 1; v -= (1 << 24); |
121 | SetUi32(p, v) |
122 | p += 4; |
123 | goto main_loop; |
124 | } |
125 | |
126 | main_loop: |
127 | if (p >= lim) |
128 | goto fin; |
129 | for (;;) |
130 | { |
131 | BR86_PREPARE_BCJ_SCAN |
132 | p += 4; |
133 | if (BR86_IS_BCJ_BYTE(0)) { goto a0; } |
134 | if (BR86_IS_BCJ_BYTE(1)) { goto a1; } |
135 | if (BR86_IS_BCJ_BYTE(2)) { goto a2; } |
136 | if (BR86_IS_BCJ_BYTE(3)) { goto a3; } |
137 | if (p >= lim) |
138 | goto fin; |
139 | } |
140 | |
141 | a0: p--; |
142 | a1: p--; |
143 | a2: p--; |
144 | a3: |
145 | if (p > lim) |
146 | goto fin_p; |
147 | // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; |
148 | { |
149 | UInt32 v = GetUi32(p); |
150 | UInt32 c; |
151 | v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; |
152 | c = BR_PC_GET; |
153 | BR_CONVERT_VAL(v, c) |
154 | // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); |
155 | v &= (1 << 25) - 1; v -= (1 << 24); |
156 | SetUi32(p, v) |
157 | p += 4; |
158 | goto main_loop; |
159 | } |
160 | } |
161 | |
162 | fin_p: |
163 | p--; |
164 | fin: |
165 | // the following processing for tail is optional and can be commented |
166 | /* |
167 | lim += 4; |
168 | for (; p < lim; p++, mask >>= 1) |
169 | if ((*p & 0xfe) == 0xe8) |
170 | break; |
171 | */ |
172 | *state = (UInt32)mask; |
173 | return p; |
174 | } |
175 | } |
176 | |
177 | |
178 | #define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \ |
179 | Z7_NO_INLINE \ |
180 | Z7_ATTRIB_NO_VECTOR \ |
181 | Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \ |
182 | { return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); } |
183 | |
184 | Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0) |
185 | #ifndef Z7_EXTRACT_ONLY |
186 | Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1) |
187 | #endif |