ce188d4d |
1 | /* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code)\r |
2 | 2014-11-10 : Igor Pavlov : Public domain */\r |
3 | \r |
4 | #include "Precomp.h"\r |
5 | \r |
6 | /* #define SHOW_STAT */\r |
7 | \r |
8 | #ifdef SHOW_STAT\r |
9 | #include <stdio.h>\r |
10 | #define PRF(x) x\r |
11 | #else\r |
12 | #define PRF(x)\r |
13 | #endif\r |
14 | \r |
15 | #include <windows.h>\r |
16 | #include <string.h>\r |
17 | \r |
18 | #include "Bcj2.h"\r |
19 | #include "CpuArch.h"\r |
20 | \r |
21 | #define CProb UInt16\r |
22 | \r |
23 | #define kTopValue ((UInt32)1 << 24)\r |
24 | #define kNumModelBits 11\r |
25 | #define kBitModelTotal (1 << kNumModelBits)\r |
26 | #define kNumMoveBits 5\r |
27 | \r |
28 | void Bcj2Enc_Init(CBcj2Enc *p)\r |
29 | {\r |
30 | unsigned i;\r |
31 | \r |
32 | p->state = BCJ2_ENC_STATE_OK;\r |
33 | p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;\r |
34 | \r |
35 | p->prevByte = 0;\r |
36 | \r |
37 | p->cache = 0;\r |
38 | p->range = 0xFFFFFFFF;\r |
39 | p->low = 0;\r |
40 | p->cacheSize = 1;\r |
41 | \r |
42 | p->ip = 0;\r |
43 | \r |
44 | p->fileIp = 0;\r |
45 | p->fileSize = 0;\r |
46 | p->relatLimit = BCJ2_RELAT_LIMIT;\r |
47 | \r |
48 | p->tempPos = 0;\r |
49 | \r |
50 | p->flushPos = 0;\r |
51 | \r |
52 | for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)\r |
53 | p->probs[i] = kBitModelTotal >> 1;\r |
54 | }\r |
55 | \r |
56 | static Bool MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p)\r |
57 | {\r |
58 | if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0)\r |
59 | {\r |
60 | Byte *buf = p->bufs[BCJ2_STREAM_RC];\r |
61 | do\r |
62 | {\r |
63 | if (buf == p->lims[BCJ2_STREAM_RC])\r |
64 | {\r |
65 | p->state = BCJ2_STREAM_RC;\r |
66 | p->bufs[BCJ2_STREAM_RC] = buf;\r |
67 | return True;\r |
68 | }\r |
69 | *buf++ = (Byte)(p->cache + (Byte)(p->low >> 32));\r |
70 | p->cache = 0xFF;\r |
71 | }\r |
72 | while (--p->cacheSize);\r |
73 | p->bufs[BCJ2_STREAM_RC] = buf;\r |
74 | p->cache = (Byte)((UInt32)p->low >> 24);\r |
75 | }\r |
76 | p->cacheSize++;\r |
77 | p->low = (UInt32)p->low << 8;\r |
78 | return False;\r |
79 | }\r |
80 | \r |
81 | static void Bcj2Enc_Encode_2(CBcj2Enc *p)\r |
82 | {\r |
83 | if (BCJ2_IS_32BIT_STREAM(p->state))\r |
84 | {\r |
85 | Byte *cur = p->bufs[p->state];\r |
86 | if (cur == p->lims[p->state])\r |
87 | return;\r |
88 | SetBe32(cur, p->tempTarget);\r |
89 | p->bufs[p->state] = cur + 4;\r |
90 | }\r |
91 | \r |
92 | p->state = BCJ2_ENC_STATE_ORIG;\r |
93 | \r |
94 | for (;;)\r |
95 | {\r |
96 | if (p->range < kTopValue)\r |
97 | {\r |
98 | if (RangeEnc_ShiftLow(p))\r |
99 | return;\r |
100 | p->range <<= 8;\r |
101 | }\r |
102 | \r |
103 | {\r |
104 | {\r |
105 | const Byte *src = p->src;\r |
106 | const Byte *srcLim;\r |
107 | Byte *dest;\r |
108 | SizeT num = p->srcLim - src;\r |
109 | \r |
110 | if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)\r |
111 | {\r |
112 | if (num <= 4)\r |
113 | return;\r |
114 | num -= 4;\r |
115 | }\r |
116 | else if (num == 0)\r |
117 | break;\r |
118 | \r |
119 | dest = p->bufs[BCJ2_STREAM_MAIN];\r |
120 | if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest))\r |
121 | {\r |
122 | num = p->lims[BCJ2_STREAM_MAIN] - dest;\r |
123 | if (num == 0)\r |
124 | {\r |
125 | p->state = BCJ2_STREAM_MAIN;\r |
126 | return;\r |
127 | }\r |
128 | }\r |
129 | \r |
130 | srcLim = src + num;\r |
131 | \r |
132 | if (p->prevByte == 0x0F && (src[0] & 0xF0) == 0x80)\r |
133 | *dest = src[0];\r |
134 | else for (;;)\r |
135 | {\r |
136 | Byte b = *src;\r |
137 | *dest = b;\r |
138 | if (b != 0x0F)\r |
139 | {\r |
140 | if ((b & 0xFE) == 0xE8)\r |
141 | break;\r |
142 | dest++;\r |
143 | if (++src != srcLim)\r |
144 | continue;\r |
145 | break;\r |
146 | }\r |
147 | dest++;\r |
148 | if (++src == srcLim)\r |
149 | break;\r |
150 | if ((*src & 0xF0) != 0x80)\r |
151 | continue;\r |
152 | *dest = *src;\r |
153 | break;\r |
154 | }\r |
155 | \r |
156 | num = src - p->src;\r |
157 | \r |
158 | if (src == srcLim)\r |
159 | {\r |
160 | p->prevByte = src[-1];\r |
161 | p->bufs[BCJ2_STREAM_MAIN] = dest;\r |
162 | p->src = src;\r |
163 | p->ip += (UInt32)num;\r |
164 | continue;\r |
165 | }\r |
166 | \r |
167 | {\r |
168 | Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]);\r |
169 | Bool needConvert;\r |
170 | \r |
171 | p->bufs[BCJ2_STREAM_MAIN] = dest + 1;\r |
172 | p->ip += (UInt32)num + 1;\r |
173 | src++;\r |
174 | \r |
175 | needConvert = False;\r |
176 | \r |
177 | if ((SizeT)(p->srcLim - src) >= 4)\r |
178 | {\r |
179 | UInt32 relatVal = GetUi32(src);\r |
180 | if ((p->fileSize == 0 || (UInt32)(p->ip + 4 + relatVal - p->fileIp) < p->fileSize)\r |
181 | && ((relatVal + p->relatLimit) >> 1) < p->relatLimit)\r |
182 | needConvert = True;\r |
183 | }\r |
184 | \r |
185 | {\r |
186 | UInt32 bound;\r |
187 | unsigned ttt;\r |
188 | Byte b = src[-1];\r |
189 | CProb *prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)context : (b == 0xE9 ? 1 : 0));\r |
190 | \r |
191 | ttt = *prob;\r |
192 | bound = (p->range >> kNumModelBits) * ttt;\r |
193 | \r |
194 | if (!needConvert)\r |
195 | {\r |
196 | p->range = bound;\r |
197 | *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));\r |
198 | p->src = src;\r |
199 | p->prevByte = b;\r |
200 | continue;\r |
201 | }\r |
202 | \r |
203 | p->low += bound;\r |
204 | p->range -= bound;\r |
205 | *prob = (CProb)(ttt - (ttt >> kNumMoveBits));\r |
206 | \r |
207 | {\r |
208 | UInt32 relatVal = GetUi32(src);\r |
209 | UInt32 absVal;\r |
210 | p->ip += 4;\r |
211 | absVal = p->ip + relatVal;\r |
212 | p->prevByte = src[3];\r |
213 | src += 4;\r |
214 | p->src = src;\r |
215 | {\r |
216 | unsigned cj = (b == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;\r |
217 | Byte *cur = p->bufs[cj];\r |
218 | if (cur == p->lims[cj])\r |
219 | {\r |
220 | p->state = cj;\r |
221 | p->tempTarget = absVal;\r |
222 | return;\r |
223 | }\r |
224 | SetBe32(cur, absVal);\r |
225 | p->bufs[cj] = cur + 4;\r |
226 | }\r |
227 | }\r |
228 | }\r |
229 | }\r |
230 | }\r |
231 | }\r |
232 | }\r |
233 | \r |
234 | if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)\r |
235 | return;\r |
236 | \r |
237 | for (; p->flushPos < 5; p->flushPos++)\r |
238 | if (RangeEnc_ShiftLow(p))\r |
239 | return;\r |
240 | p->state = BCJ2_ENC_STATE_OK;\r |
241 | }\r |
242 | \r |
243 | \r |
244 | void Bcj2Enc_Encode(CBcj2Enc *p)\r |
245 | {\r |
246 | PRF(printf("\n"));\r |
247 | PRF(printf("---- ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));\r |
248 | \r |
249 | if (p->tempPos != 0)\r |
250 | {\r |
251 | unsigned extra = 0;\r |
252 | \r |
253 | for (;;)\r |
254 | {\r |
255 | const Byte *src = p->src;\r |
256 | const Byte *srcLim = p->srcLim;\r |
257 | unsigned finishMode = p->finishMode;\r |
258 | \r |
259 | p->src = p->temp;\r |
260 | p->srcLim = p->temp + p->tempPos;\r |
261 | if (src != srcLim)\r |
262 | p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;\r |
263 | \r |
264 | PRF(printf(" ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));\r |
265 | \r |
266 | Bcj2Enc_Encode_2(p);\r |
267 | \r |
268 | {\r |
269 | unsigned num = (unsigned)(p->src - p->temp);\r |
270 | unsigned tempPos = p->tempPos - num;\r |
271 | unsigned i;\r |
272 | p->tempPos = tempPos;\r |
273 | for (i = 0; i < tempPos; i++)\r |
274 | p->temp[i] = p->temp[i + num];\r |
275 | \r |
276 | p->src = src;\r |
277 | p->srcLim = srcLim;\r |
278 | p->finishMode = finishMode;\r |
279 | \r |
280 | if (p->state != BCJ2_ENC_STATE_ORIG || src == srcLim)\r |
281 | return;\r |
282 | \r |
283 | if (extra >= tempPos)\r |
284 | {\r |
285 | p->src = src - tempPos;\r |
286 | p->tempPos = 0;\r |
287 | break;\r |
288 | }\r |
289 | \r |
290 | p->temp[tempPos] = src[0];\r |
291 | p->tempPos = tempPos + 1;\r |
292 | p->src = src + 1;\r |
293 | extra++;\r |
294 | }\r |
295 | }\r |
296 | }\r |
297 | \r |
298 | PRF(printf("++++ ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));\r |
299 | \r |
300 | Bcj2Enc_Encode_2(p);\r |
301 | \r |
302 | if (p->state == BCJ2_ENC_STATE_ORIG)\r |
303 | {\r |
304 | const Byte *src = p->src;\r |
305 | unsigned rem = (unsigned)(p->srcLim - src);\r |
306 | unsigned i;\r |
307 | for (i = 0; i < rem; i++)\r |
308 | p->temp[i] = src[i];\r |
309 | p->tempPos = rem;\r |
310 | p->src = src + rem;\r |
311 | }\r |
312 | }\r |