9e052883 |
1 | /* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code)\r |
2 | 2021-02-09 : Igor Pavlov : Public domain */\r |
3 | \r |
4 | #include "Precomp.h"\r |
5 | \r |
6 | /* #define SHOW_STAT */\r |
7 | \r |
8 | #ifdef SHOW_STAT\r |
9 | #include <stdio.h>\r |
10 | #define PRF(x) x\r |
11 | #else\r |
12 | #define PRF(x)\r |
13 | #endif\r |
14 | \r |
15 | #include <string.h>\r |
16 | \r |
17 | #include "Bcj2.h"\r |
18 | #include "CpuArch.h"\r |
19 | \r |
20 | #define CProb UInt16\r |
21 | \r |
22 | #define kTopValue ((UInt32)1 << 24)\r |
23 | #define kNumModelBits 11\r |
24 | #define kBitModelTotal (1 << kNumModelBits)\r |
25 | #define kNumMoveBits 5\r |
26 | \r |
27 | void Bcj2Enc_Init(CBcj2Enc *p)\r |
28 | {\r |
29 | unsigned i;\r |
30 | \r |
31 | p->state = BCJ2_ENC_STATE_OK;\r |
32 | p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;\r |
33 | \r |
34 | p->prevByte = 0;\r |
35 | \r |
36 | p->cache = 0;\r |
37 | p->range = 0xFFFFFFFF;\r |
38 | p->low = 0;\r |
39 | p->cacheSize = 1;\r |
40 | \r |
41 | p->ip = 0;\r |
42 | \r |
43 | p->fileIp = 0;\r |
44 | p->fileSize = 0;\r |
45 | p->relatLimit = BCJ2_RELAT_LIMIT;\r |
46 | \r |
47 | p->tempPos = 0;\r |
48 | \r |
49 | p->flushPos = 0;\r |
50 | \r |
51 | for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)\r |
52 | p->probs[i] = kBitModelTotal >> 1;\r |
53 | }\r |
54 | \r |
55 | static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p)\r |
56 | {\r |
57 | if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0)\r |
58 | {\r |
59 | Byte *buf = p->bufs[BCJ2_STREAM_RC];\r |
60 | do\r |
61 | {\r |
62 | if (buf == p->lims[BCJ2_STREAM_RC])\r |
63 | {\r |
64 | p->state = BCJ2_STREAM_RC;\r |
65 | p->bufs[BCJ2_STREAM_RC] = buf;\r |
66 | return True;\r |
67 | }\r |
68 | *buf++ = (Byte)(p->cache + (Byte)(p->low >> 32));\r |
69 | p->cache = 0xFF;\r |
70 | }\r |
71 | while (--p->cacheSize);\r |
72 | p->bufs[BCJ2_STREAM_RC] = buf;\r |
73 | p->cache = (Byte)((UInt32)p->low >> 24);\r |
74 | }\r |
75 | p->cacheSize++;\r |
76 | p->low = (UInt32)p->low << 8;\r |
77 | return False;\r |
78 | }\r |
79 | \r |
80 | static void Bcj2Enc_Encode_2(CBcj2Enc *p)\r |
81 | {\r |
82 | if (BCJ2_IS_32BIT_STREAM(p->state))\r |
83 | {\r |
84 | Byte *cur = p->bufs[p->state];\r |
85 | if (cur == p->lims[p->state])\r |
86 | return;\r |
87 | SetBe32(cur, p->tempTarget);\r |
88 | p->bufs[p->state] = cur + 4;\r |
89 | }\r |
90 | \r |
91 | p->state = BCJ2_ENC_STATE_ORIG;\r |
92 | \r |
93 | for (;;)\r |
94 | {\r |
95 | if (p->range < kTopValue)\r |
96 | {\r |
97 | if (RangeEnc_ShiftLow(p))\r |
98 | return;\r |
99 | p->range <<= 8;\r |
100 | }\r |
101 | \r |
102 | {\r |
103 | {\r |
104 | const Byte *src = p->src;\r |
105 | const Byte *srcLim;\r |
106 | Byte *dest;\r |
107 | SizeT num = (SizeT)(p->srcLim - src);\r |
108 | \r |
109 | if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)\r |
110 | {\r |
111 | if (num <= 4)\r |
112 | return;\r |
113 | num -= 4;\r |
114 | }\r |
115 | else if (num == 0)\r |
116 | break;\r |
117 | \r |
118 | dest = p->bufs[BCJ2_STREAM_MAIN];\r |
119 | if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest))\r |
120 | {\r |
121 | num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest);\r |
122 | if (num == 0)\r |
123 | {\r |
124 | p->state = BCJ2_STREAM_MAIN;\r |
125 | return;\r |
126 | }\r |
127 | }\r |
128 | \r |
129 | srcLim = src + num;\r |
130 | \r |
131 | if (p->prevByte == 0x0F && (src[0] & 0xF0) == 0x80)\r |
132 | *dest = src[0];\r |
133 | else for (;;)\r |
134 | {\r |
135 | Byte b = *src;\r |
136 | *dest = b;\r |
137 | if (b != 0x0F)\r |
138 | {\r |
139 | if ((b & 0xFE) == 0xE8)\r |
140 | break;\r |
141 | dest++;\r |
142 | if (++src != srcLim)\r |
143 | continue;\r |
144 | break;\r |
145 | }\r |
146 | dest++;\r |
147 | if (++src == srcLim)\r |
148 | break;\r |
149 | if ((*src & 0xF0) != 0x80)\r |
150 | continue;\r |
151 | *dest = *src;\r |
152 | break;\r |
153 | }\r |
154 | \r |
155 | num = (SizeT)(src - p->src);\r |
156 | \r |
157 | if (src == srcLim)\r |
158 | {\r |
159 | p->prevByte = src[-1];\r |
160 | p->bufs[BCJ2_STREAM_MAIN] = dest;\r |
161 | p->src = src;\r |
162 | p->ip += (UInt32)num;\r |
163 | continue;\r |
164 | }\r |
165 | \r |
166 | {\r |
167 | Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]);\r |
168 | BoolInt needConvert;\r |
169 | \r |
170 | p->bufs[BCJ2_STREAM_MAIN] = dest + 1;\r |
171 | p->ip += (UInt32)num + 1;\r |
172 | src++;\r |
173 | \r |
174 | needConvert = False;\r |
175 | \r |
176 | if ((SizeT)(p->srcLim - src) >= 4)\r |
177 | {\r |
178 | UInt32 relatVal = GetUi32(src);\r |
179 | if ((p->fileSize == 0 || (UInt32)(p->ip + 4 + relatVal - p->fileIp) < p->fileSize)\r |
180 | && ((relatVal + p->relatLimit) >> 1) < p->relatLimit)\r |
181 | needConvert = True;\r |
182 | }\r |
183 | \r |
184 | {\r |
185 | UInt32 bound;\r |
186 | unsigned ttt;\r |
187 | Byte b = src[-1];\r |
188 | CProb *prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)context : (b == 0xE9 ? 1 : 0));\r |
189 | \r |
190 | ttt = *prob;\r |
191 | bound = (p->range >> kNumModelBits) * ttt;\r |
192 | \r |
193 | if (!needConvert)\r |
194 | {\r |
195 | p->range = bound;\r |
196 | *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));\r |
197 | p->src = src;\r |
198 | p->prevByte = b;\r |
199 | continue;\r |
200 | }\r |
201 | \r |
202 | p->low += bound;\r |
203 | p->range -= bound;\r |
204 | *prob = (CProb)(ttt - (ttt >> kNumMoveBits));\r |
205 | \r |
206 | {\r |
207 | UInt32 relatVal = GetUi32(src);\r |
208 | UInt32 absVal;\r |
209 | p->ip += 4;\r |
210 | absVal = p->ip + relatVal;\r |
211 | p->prevByte = src[3];\r |
212 | src += 4;\r |
213 | p->src = src;\r |
214 | {\r |
215 | unsigned cj = (b == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;\r |
216 | Byte *cur = p->bufs[cj];\r |
217 | if (cur == p->lims[cj])\r |
218 | {\r |
219 | p->state = cj;\r |
220 | p->tempTarget = absVal;\r |
221 | return;\r |
222 | }\r |
223 | SetBe32(cur, absVal);\r |
224 | p->bufs[cj] = cur + 4;\r |
225 | }\r |
226 | }\r |
227 | }\r |
228 | }\r |
229 | }\r |
230 | }\r |
231 | }\r |
232 | \r |
233 | if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)\r |
234 | return;\r |
235 | \r |
236 | for (; p->flushPos < 5; p->flushPos++)\r |
237 | if (RangeEnc_ShiftLow(p))\r |
238 | return;\r |
239 | p->state = BCJ2_ENC_STATE_OK;\r |
240 | }\r |
241 | \r |
242 | \r |
243 | void Bcj2Enc_Encode(CBcj2Enc *p)\r |
244 | {\r |
245 | PRF(printf("\n"));\r |
246 | PRF(printf("---- ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));\r |
247 | \r |
248 | if (p->tempPos != 0)\r |
249 | {\r |
250 | unsigned extra = 0;\r |
251 | \r |
252 | for (;;)\r |
253 | {\r |
254 | const Byte *src = p->src;\r |
255 | const Byte *srcLim = p->srcLim;\r |
256 | EBcj2Enc_FinishMode finishMode = p->finishMode;\r |
257 | \r |
258 | p->src = p->temp;\r |
259 | p->srcLim = p->temp + p->tempPos;\r |
260 | if (src != srcLim)\r |
261 | p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;\r |
262 | \r |
263 | PRF(printf(" ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));\r |
264 | \r |
265 | Bcj2Enc_Encode_2(p);\r |
266 | \r |
267 | {\r |
268 | unsigned num = (unsigned)(p->src - p->temp);\r |
269 | unsigned tempPos = p->tempPos - num;\r |
270 | unsigned i;\r |
271 | p->tempPos = tempPos;\r |
272 | for (i = 0; i < tempPos; i++)\r |
273 | p->temp[i] = p->temp[(size_t)i + num];\r |
274 | \r |
275 | p->src = src;\r |
276 | p->srcLim = srcLim;\r |
277 | p->finishMode = finishMode;\r |
278 | \r |
279 | if (p->state != BCJ2_ENC_STATE_ORIG || src == srcLim)\r |
280 | return;\r |
281 | \r |
282 | if (extra >= tempPos)\r |
283 | {\r |
284 | p->src = src - tempPos;\r |
285 | p->tempPos = 0;\r |
286 | break;\r |
287 | }\r |
288 | \r |
289 | p->temp[tempPos] = src[0];\r |
290 | p->tempPos = tempPos + 1;\r |
291 | p->src = src + 1;\r |
292 | extra++;\r |
293 | }\r |
294 | }\r |
295 | }\r |
296 | \r |
297 | PRF(printf("++++ ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));\r |
298 | \r |
299 | Bcj2Enc_Encode_2(p);\r |
300 | \r |
301 | if (p->state == BCJ2_ENC_STATE_ORIG)\r |
302 | {\r |
303 | const Byte *src = p->src;\r |
304 | unsigned rem = (unsigned)(p->srcLim - src);\r |
305 | unsigned i;\r |
306 | for (i = 0; i < rem; i++)\r |
307 | p->temp[i] = src[i];\r |
308 | p->tempPos = rem;\r |
309 | p->src = src + rem;\r |
310 | }\r |
311 | }\r |