9e052883 |
1 | ; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function\r |
2 | ; 2021-02-23: Igor Pavlov : Public domain\r |
3 | ;\r |
4 | ; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()\r |
5 | ; function for check at link time.\r |
6 | ; That code is tightly coupled with LzmaDec_TryDummy()\r |
7 | ; and with another functions in LzmaDec.c file.\r |
8 | ; CLzmaDec structure, (probs) array layout, input and output of\r |
9 | ; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM).\r |
10 | \r |
11 | ifndef x64\r |
12 | ; x64=1\r |
13 | ; .err <x64_IS_REQUIRED>\r |
14 | endif\r |
15 | \r |
16 | include 7zAsm.asm\r |
17 | \r |
18 | MY_ASM_START\r |
19 | \r |
20 | _TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'\r |
21 | \r |
22 | MY_ALIGN macro num:req\r |
23 | align num\r |
24 | endm\r |
25 | \r |
26 | MY_ALIGN_16 macro\r |
27 | MY_ALIGN 16\r |
28 | endm\r |
29 | \r |
30 | MY_ALIGN_32 macro\r |
31 | MY_ALIGN 32\r |
32 | endm\r |
33 | \r |
34 | MY_ALIGN_64 macro\r |
35 | MY_ALIGN 64\r |
36 | endm\r |
37 | \r |
38 | \r |
39 | ; _LZMA_SIZE_OPT equ 1\r |
40 | \r |
41 | ; _LZMA_PROB32 equ 1\r |
42 | \r |
43 | ifdef _LZMA_PROB32\r |
44 | PSHIFT equ 2\r |
45 | PLOAD macro dest, mem\r |
46 | mov dest, dword ptr [mem]\r |
47 | endm\r |
48 | PSTORE macro src, mem\r |
49 | mov dword ptr [mem], src\r |
50 | endm\r |
51 | else\r |
52 | PSHIFT equ 1\r |
53 | PLOAD macro dest, mem\r |
54 | movzx dest, word ptr [mem]\r |
55 | endm\r |
56 | PSTORE macro src, mem\r |
57 | mov word ptr [mem], @CatStr(src, _W)\r |
58 | endm\r |
59 | endif\r |
60 | \r |
61 | PMULT equ (1 SHL PSHIFT)\r |
62 | PMULT_HALF equ (1 SHL (PSHIFT - 1))\r |
63 | PMULT_2 equ (1 SHL (PSHIFT + 1))\r |
64 | \r |
65 | kMatchSpecLen_Error_Data equ (1 SHL 9)\r |
66 | \r |
67 | ; x0 range\r |
68 | ; x1 pbPos / (prob) TREE\r |
69 | ; x2 probBranch / prm (MATCHED) / pbPos / cnt\r |
70 | ; x3 sym\r |
71 | ;====== r4 === RSP\r |
72 | ; x5 cod\r |
73 | ; x6 t1 NORM_CALC / probs_state / dist\r |
74 | ; x7 t0 NORM_CALC / prob2 IF_BIT_1\r |
75 | ; x8 state\r |
76 | ; x9 match (MATCHED) / sym2 / dist2 / lpMask_reg\r |
77 | ; x10 kBitModelTotal_reg\r |
78 | ; r11 probs\r |
79 | ; x12 offs (MATCHED) / dic / len_temp\r |
80 | ; x13 processedPos\r |
81 | ; x14 bit (MATCHED) / dicPos\r |
82 | ; r15 buf\r |
83 | \r |
84 | \r |
85 | cod equ x5\r |
86 | cod_L equ x5_L\r |
87 | range equ x0\r |
88 | state equ x8\r |
89 | state_R equ r8\r |
90 | buf equ r15\r |
91 | processedPos equ x13\r |
92 | kBitModelTotal_reg equ x10\r |
93 | \r |
94 | probBranch equ x2\r |
95 | probBranch_R equ r2\r |
96 | probBranch_W equ x2_W\r |
97 | \r |
98 | pbPos equ x1\r |
99 | pbPos_R equ r1\r |
100 | \r |
101 | cnt equ x2\r |
102 | cnt_R equ r2\r |
103 | \r |
104 | lpMask_reg equ x9\r |
105 | dicPos equ r14\r |
106 | \r |
107 | sym equ x3\r |
108 | sym_R equ r3\r |
109 | sym_L equ x3_L\r |
110 | \r |
111 | probs equ r11\r |
112 | dic equ r12\r |
113 | \r |
114 | t0 equ x7\r |
115 | t0_W equ x7_W\r |
116 | t0_R equ r7\r |
117 | \r |
118 | prob2 equ t0\r |
119 | prob2_W equ t0_W\r |
120 | \r |
121 | t1 equ x6\r |
122 | t1_R equ r6\r |
123 | \r |
124 | probs_state equ t1\r |
125 | probs_state_R equ t1_R\r |
126 | \r |
127 | prm equ r2\r |
128 | match equ x9\r |
129 | match_R equ r9\r |
130 | offs equ x12\r |
131 | offs_R equ r12\r |
132 | bit equ x14\r |
133 | bit_R equ r14\r |
134 | \r |
135 | sym2 equ x9\r |
136 | sym2_R equ r9\r |
137 | \r |
138 | len_temp equ x12\r |
139 | \r |
140 | dist equ sym\r |
141 | dist2 equ x9\r |
142 | \r |
143 | \r |
144 | \r |
145 | kNumBitModelTotalBits equ 11\r |
146 | kBitModelTotal equ (1 SHL kNumBitModelTotalBits)\r |
147 | kNumMoveBits equ 5\r |
148 | kBitModelOffset equ ((1 SHL kNumMoveBits) - 1)\r |
149 | kTopValue equ (1 SHL 24)\r |
150 | \r |
151 | NORM_2 macro\r |
152 | ; movzx t0, BYTE PTR [buf]\r |
153 | shl cod, 8\r |
154 | mov cod_L, BYTE PTR [buf]\r |
155 | shl range, 8\r |
156 | ; or cod, t0\r |
157 | inc buf\r |
158 | endm\r |
159 | \r |
160 | \r |
161 | NORM macro\r |
162 | cmp range, kTopValue\r |
163 | jae SHORT @F\r |
164 | NORM_2\r |
165 | @@:\r |
166 | endm\r |
167 | \r |
168 | \r |
169 | ; ---------- Branch MACROS ----------\r |
170 | \r |
171 | UPDATE_0 macro probsArray:req, probOffset:req, probDisp:req\r |
172 | mov prob2, kBitModelTotal_reg\r |
173 | sub prob2, probBranch\r |
174 | shr prob2, kNumMoveBits\r |
175 | add probBranch, prob2\r |
176 | PSTORE probBranch, probOffset * 1 + probsArray + probDisp * PMULT\r |
177 | endm\r |
178 | \r |
179 | \r |
180 | UPDATE_1 macro probsArray:req, probOffset:req, probDisp:req\r |
181 | sub prob2, range\r |
182 | sub cod, range\r |
183 | mov range, prob2\r |
184 | mov prob2, probBranch\r |
185 | shr probBranch, kNumMoveBits\r |
186 | sub prob2, probBranch\r |
187 | PSTORE prob2, probOffset * 1 + probsArray + probDisp * PMULT\r |
188 | endm\r |
189 | \r |
190 | \r |
191 | CMP_COD macro probsArray:req, probOffset:req, probDisp:req\r |
192 | PLOAD probBranch, probOffset * 1 + probsArray + probDisp * PMULT\r |
193 | NORM\r |
194 | mov prob2, range\r |
195 | shr range, kNumBitModelTotalBits\r |
196 | imul range, probBranch\r |
197 | cmp cod, range\r |
198 | endm\r |
199 | \r |
200 | \r |
201 | IF_BIT_1_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req\r |
202 | CMP_COD probsArray, probOffset, probDisp\r |
203 | jae toLabel\r |
204 | endm\r |
205 | \r |
206 | \r |
207 | IF_BIT_1 macro probsArray:req, probOffset:req, probDisp:req, toLabel:req\r |
208 | IF_BIT_1_NOUP probsArray, probOffset, probDisp, toLabel\r |
209 | UPDATE_0 probsArray, probOffset, probDisp\r |
210 | endm\r |
211 | \r |
212 | \r |
213 | IF_BIT_0_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req\r |
214 | CMP_COD probsArray, probOffset, probDisp\r |
215 | jb toLabel\r |
216 | endm\r |
217 | \r |
218 | \r |
219 | ; ---------- CMOV MACROS ----------\r |
220 | \r |
221 | NORM_CALC macro prob:req\r |
222 | NORM\r |
223 | mov t0, range\r |
224 | shr range, kNumBitModelTotalBits\r |
225 | imul range, prob\r |
226 | sub t0, range\r |
227 | mov t1, cod\r |
228 | sub cod, range\r |
229 | endm\r |
230 | \r |
231 | \r |
232 | PUP macro prob:req, probPtr:req\r |
233 | sub t0, prob\r |
234 | ; only sar works for both 16/32 bit prob modes\r |
235 | sar t0, kNumMoveBits\r |
236 | add t0, prob\r |
237 | PSTORE t0, probPtr\r |
238 | endm\r |
239 | \r |
240 | \r |
241 | PUP_SUB macro prob:req, probPtr:req, symSub:req\r |
242 | sbb sym, symSub\r |
243 | PUP prob, probPtr\r |
244 | endm\r |
245 | \r |
246 | \r |
247 | PUP_COD macro prob:req, probPtr:req, symSub:req\r |
248 | mov t0, kBitModelOffset\r |
249 | cmovb cod, t1\r |
250 | mov t1, sym\r |
251 | cmovb t0, kBitModelTotal_reg\r |
252 | PUP_SUB prob, probPtr, symSub\r |
253 | endm\r |
254 | \r |
255 | \r |
256 | BIT_0 macro prob:req, probNext:req\r |
257 | PLOAD prob, probs + 1 * PMULT\r |
258 | PLOAD probNext, probs + 1 * PMULT_2\r |
259 | \r |
260 | NORM_CALC prob\r |
261 | \r |
262 | cmovae range, t0\r |
263 | PLOAD t0, probs + 1 * PMULT_2 + PMULT\r |
264 | cmovae probNext, t0\r |
265 | mov t0, kBitModelOffset\r |
266 | cmovb cod, t1\r |
267 | cmovb t0, kBitModelTotal_reg\r |
268 | mov sym, 2\r |
269 | PUP_SUB prob, probs + 1 * PMULT, 0 - 1\r |
270 | endm\r |
271 | \r |
272 | \r |
273 | BIT_1 macro prob:req, probNext:req\r |
274 | PLOAD probNext, probs + sym_R * PMULT_2\r |
275 | add sym, sym\r |
276 | \r |
277 | NORM_CALC prob\r |
278 | \r |
279 | cmovae range, t0\r |
280 | PLOAD t0, probs + sym_R * PMULT + PMULT\r |
281 | cmovae probNext, t0\r |
282 | PUP_COD prob, probs + t1_R * PMULT_HALF, 0 - 1\r |
283 | endm\r |
284 | \r |
285 | \r |
286 | BIT_2 macro prob:req, symSub:req\r |
287 | add sym, sym\r |
288 | \r |
289 | NORM_CALC prob\r |
290 | \r |
291 | cmovae range, t0\r |
292 | PUP_COD prob, probs + t1_R * PMULT_HALF, symSub\r |
293 | endm\r |
294 | \r |
295 | \r |
296 | ; ---------- MATCHED LITERAL ----------\r |
297 | \r |
298 | LITM_0 macro\r |
299 | mov offs, 256 * PMULT\r |
300 | shl match, (PSHIFT + 1)\r |
301 | mov bit, offs\r |
302 | and bit, match\r |
303 | PLOAD x1, probs + 256 * PMULT + bit_R * 1 + 1 * PMULT\r |
304 | lea prm, [probs + 256 * PMULT + bit_R * 1 + 1 * PMULT]\r |
305 | ; lea prm, [probs + 256 * PMULT + 1 * PMULT]\r |
306 | ; add prm, bit_R\r |
307 | xor offs, bit\r |
308 | add match, match\r |
309 | \r |
310 | NORM_CALC x1\r |
311 | \r |
312 | cmovae offs, bit\r |
313 | mov bit, match\r |
314 | cmovae range, t0\r |
315 | mov t0, kBitModelOffset\r |
316 | cmovb cod, t1\r |
317 | cmovb t0, kBitModelTotal_reg\r |
318 | mov sym, 0\r |
319 | PUP_SUB x1, prm, -2-1\r |
320 | endm\r |
321 | \r |
322 | \r |
323 | LITM macro\r |
324 | and bit, offs\r |
325 | lea prm, [probs + offs_R * 1]\r |
326 | add prm, bit_R\r |
327 | PLOAD x1, prm + sym_R * PMULT\r |
328 | xor offs, bit\r |
329 | add sym, sym\r |
330 | add match, match\r |
331 | \r |
332 | NORM_CALC x1\r |
333 | \r |
334 | cmovae offs, bit\r |
335 | mov bit, match\r |
336 | cmovae range, t0\r |
337 | PUP_COD x1, prm + t1_R * PMULT_HALF, - 1\r |
338 | endm\r |
339 | \r |
340 | \r |
341 | LITM_2 macro\r |
342 | and bit, offs\r |
343 | lea prm, [probs + offs_R * 1]\r |
344 | add prm, bit_R\r |
345 | PLOAD x1, prm + sym_R * PMULT\r |
346 | add sym, sym\r |
347 | \r |
348 | NORM_CALC x1\r |
349 | \r |
350 | cmovae range, t0\r |
351 | PUP_COD x1, prm + t1_R * PMULT_HALF, 256 - 1\r |
352 | endm\r |
353 | \r |
354 | \r |
355 | ; ---------- REVERSE BITS ----------\r |
356 | \r |
357 | REV_0 macro prob:req, probNext:req\r |
358 | ; PLOAD prob, probs + 1 * PMULT\r |
359 | ; lea sym2_R, [probs + 2 * PMULT]\r |
360 | ; PLOAD probNext, probs + 2 * PMULT\r |
361 | PLOAD probNext, sym2_R\r |
362 | \r |
363 | NORM_CALC prob\r |
364 | \r |
365 | cmovae range, t0\r |
366 | PLOAD t0, probs + 3 * PMULT\r |
367 | cmovae probNext, t0\r |
368 | cmovb cod, t1\r |
369 | mov t0, kBitModelOffset\r |
370 | cmovb t0, kBitModelTotal_reg\r |
371 | lea t1_R, [probs + 3 * PMULT]\r |
372 | cmovae sym2_R, t1_R\r |
373 | PUP prob, probs + 1 * PMULT\r |
374 | endm\r |
375 | \r |
376 | \r |
377 | REV_1 macro prob:req, probNext:req, step:req\r |
378 | add sym2_R, step * PMULT\r |
379 | PLOAD probNext, sym2_R\r |
380 | \r |
381 | NORM_CALC prob\r |
382 | \r |
383 | cmovae range, t0\r |
384 | PLOAD t0, sym2_R + step * PMULT\r |
385 | cmovae probNext, t0\r |
386 | cmovb cod, t1\r |
387 | mov t0, kBitModelOffset\r |
388 | cmovb t0, kBitModelTotal_reg\r |
389 | lea t1_R, [sym2_R + step * PMULT]\r |
390 | cmovae sym2_R, t1_R\r |
391 | PUP prob, t1_R - step * PMULT_2\r |
392 | endm\r |
393 | \r |
394 | \r |
395 | REV_2 macro prob:req, step:req\r |
396 | sub sym2_R, probs\r |
397 | shr sym2, PSHIFT\r |
398 | or sym, sym2\r |
399 | \r |
400 | NORM_CALC prob\r |
401 | \r |
402 | cmovae range, t0\r |
403 | lea t0, [sym - step]\r |
404 | cmovb sym, t0\r |
405 | cmovb cod, t1\r |
406 | mov t0, kBitModelOffset\r |
407 | cmovb t0, kBitModelTotal_reg\r |
408 | PUP prob, probs + sym2_R * PMULT\r |
409 | endm\r |
410 | \r |
411 | \r |
412 | REV_1_VAR macro prob:req\r |
413 | PLOAD prob, sym_R\r |
414 | mov probs, sym_R\r |
415 | add sym_R, sym2_R\r |
416 | \r |
417 | NORM_CALC prob\r |
418 | \r |
419 | cmovae range, t0\r |
420 | lea t0_R, [sym_R + 1 * sym2_R]\r |
421 | cmovae sym_R, t0_R\r |
422 | mov t0, kBitModelOffset\r |
423 | cmovb cod, t1\r |
424 | ; mov t1, kBitModelTotal\r |
425 | ; cmovb t0, t1\r |
426 | cmovb t0, kBitModelTotal_reg\r |
427 | add sym2, sym2\r |
428 | PUP prob, probs\r |
429 | endm\r |
430 | \r |
431 | \r |
432 | \r |
433 | \r |
434 | LIT_PROBS macro lpMaskParam:req\r |
435 | ; prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);\r |
436 | mov t0, processedPos\r |
437 | shl t0, 8\r |
438 | add sym, t0\r |
439 | and sym, lpMaskParam\r |
440 | add probs_state_R, pbPos_R\r |
441 | mov x1, LOC lc2\r |
442 | lea sym, dword ptr[sym_R + 2 * sym_R]\r |
443 | add probs, Literal * PMULT\r |
444 | shl sym, x1_L\r |
445 | add probs, sym_R\r |
446 | UPDATE_0 probs_state_R, 0, IsMatch\r |
447 | inc processedPos\r |
448 | endm\r |
449 | \r |
450 | \r |
451 | \r |
452 | kNumPosBitsMax equ 4\r |
453 | kNumPosStatesMax equ (1 SHL kNumPosBitsMax)\r |
454 | \r |
455 | kLenNumLowBits equ 3\r |
456 | kLenNumLowSymbols equ (1 SHL kLenNumLowBits)\r |
457 | kLenNumHighBits equ 8\r |
458 | kLenNumHighSymbols equ (1 SHL kLenNumHighBits)\r |
459 | kNumLenProbs equ (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols)\r |
460 | \r |
461 | LenLow equ 0\r |
462 | LenChoice equ LenLow\r |
463 | LenChoice2 equ (LenLow + kLenNumLowSymbols)\r |
464 | LenHigh equ (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax)\r |
465 | \r |
466 | kNumStates equ 12\r |
467 | kNumStates2 equ 16\r |
468 | kNumLitStates equ 7\r |
469 | \r |
470 | kStartPosModelIndex equ 4\r |
471 | kEndPosModelIndex equ 14\r |
472 | kNumFullDistances equ (1 SHL (kEndPosModelIndex SHR 1))\r |
473 | \r |
474 | kNumPosSlotBits equ 6\r |
475 | kNumLenToPosStates equ 4\r |
476 | \r |
477 | kNumAlignBits equ 4\r |
478 | kAlignTableSize equ (1 SHL kNumAlignBits)\r |
479 | \r |
480 | kMatchMinLen equ 2\r |
481 | kMatchSpecLenStart equ (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)\r |
482 | \r |
483 | kStartOffset equ 1664\r |
484 | SpecPos equ (-kStartOffset)\r |
485 | IsRep0Long equ (SpecPos + kNumFullDistances)\r |
486 | RepLenCoder equ (IsRep0Long + (kNumStates2 SHL kNumPosBitsMax))\r |
487 | LenCoder equ (RepLenCoder + kNumLenProbs)\r |
488 | IsMatch equ (LenCoder + kNumLenProbs)\r |
489 | kAlign equ (IsMatch + (kNumStates2 SHL kNumPosBitsMax))\r |
490 | IsRep equ (kAlign + kAlignTableSize)\r |
491 | IsRepG0 equ (IsRep + kNumStates)\r |
492 | IsRepG1 equ (IsRepG0 + kNumStates)\r |
493 | IsRepG2 equ (IsRepG1 + kNumStates)\r |
494 | PosSlot equ (IsRepG2 + kNumStates)\r |
495 | Literal equ (PosSlot + (kNumLenToPosStates SHL kNumPosSlotBits))\r |
496 | NUM_BASE_PROBS equ (Literal + kStartOffset)\r |
497 | \r |
498 | if kAlign ne 0\r |
499 | .err <Stop_Compiling_Bad_LZMA_kAlign>\r |
500 | endif\r |
501 | \r |
502 | if NUM_BASE_PROBS ne 1984\r |
503 | .err <Stop_Compiling_Bad_LZMA_PROBS>\r |
504 | endif\r |
505 | \r |
506 | \r |
507 | PTR_FIELD equ dq ?\r |
508 | \r |
509 | CLzmaDec_Asm struct\r |
510 | lc db ?\r |
511 | lp db ?\r |
512 | pb db ?\r |
513 | _pad_ db ?\r |
514 | dicSize dd ?\r |
515 | \r |
516 | probs_Spec PTR_FIELD\r |
517 | probs_1664 PTR_FIELD\r |
518 | dic_Spec PTR_FIELD\r |
519 | dicBufSize PTR_FIELD\r |
520 | dicPos_Spec PTR_FIELD\r |
521 | buf_Spec PTR_FIELD\r |
522 | \r |
523 | range_Spec dd ?\r |
524 | code_Spec dd ?\r |
525 | processedPos_Spec dd ?\r |
526 | checkDicSize dd ?\r |
527 | rep0 dd ?\r |
528 | rep1 dd ?\r |
529 | rep2 dd ?\r |
530 | rep3 dd ?\r |
531 | state_Spec dd ?\r |
532 | remainLen dd ?\r |
533 | CLzmaDec_Asm ends\r |
534 | \r |
535 | \r |
536 | CLzmaDec_Asm_Loc struct\r |
537 | OLD_RSP PTR_FIELD\r |
538 | lzmaPtr PTR_FIELD\r |
539 | _pad0_ PTR_FIELD\r |
540 | _pad1_ PTR_FIELD\r |
541 | _pad2_ PTR_FIELD\r |
542 | dicBufSize PTR_FIELD\r |
543 | probs_Spec PTR_FIELD\r |
544 | dic_Spec PTR_FIELD\r |
545 | \r |
546 | limit PTR_FIELD\r |
547 | bufLimit PTR_FIELD\r |
548 | lc2 dd ?\r |
549 | lpMask dd ?\r |
550 | pbMask dd ?\r |
551 | checkDicSize dd ?\r |
552 | \r |
553 | _pad_ dd ?\r |
554 | remainLen dd ?\r |
555 | dicPos_Spec PTR_FIELD\r |
556 | rep0 dd ?\r |
557 | rep1 dd ?\r |
558 | rep2 dd ?\r |
559 | rep3 dd ?\r |
560 | CLzmaDec_Asm_Loc ends\r |
561 | \r |
562 | \r |
563 | GLOB_2 equ [sym_R].CLzmaDec_Asm.\r |
564 | GLOB equ [r1].CLzmaDec_Asm.\r |
565 | LOC_0 equ [r0].CLzmaDec_Asm_Loc.\r |
566 | LOC equ [RSP].CLzmaDec_Asm_Loc.\r |
567 | \r |
568 | \r |
569 | COPY_VAR macro name\r |
570 | mov t0, GLOB_2 name\r |
571 | mov LOC_0 name, t0\r |
572 | endm\r |
573 | \r |
574 | \r |
575 | RESTORE_VAR macro name\r |
576 | mov t0, LOC name\r |
577 | mov GLOB name, t0\r |
578 | endm\r |
579 | \r |
580 | \r |
581 | \r |
582 | IsMatchBranch_Pre macro reg\r |
583 | ; prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;\r |
584 | mov pbPos, LOC pbMask\r |
585 | and pbPos, processedPos\r |
586 | shl pbPos, (kLenNumLowBits + 1 + PSHIFT)\r |
587 | lea probs_state_R, [probs + 1 * state_R]\r |
588 | endm\r |
589 | \r |
590 | \r |
591 | IsMatchBranch macro reg\r |
592 | IsMatchBranch_Pre\r |
593 | IF_BIT_1 probs_state_R, pbPos_R, IsMatch, IsMatch_label\r |
594 | endm\r |
595 | \r |
596 | \r |
597 | CheckLimits macro reg\r |
598 | cmp buf, LOC bufLimit\r |
599 | jae fin_OK\r |
600 | cmp dicPos, LOC limit\r |
601 | jae fin_OK\r |
602 | endm\r |
603 | \r |
604 | \r |
605 | \r |
606 | ; RSP is (16x + 8) bytes aligned in WIN64-x64\r |
607 | ; LocalSize equ ((((SIZEOF CLzmaDec_Asm_Loc) + 7) / 16 * 16) + 8)\r |
608 | \r |
609 | PARAM_lzma equ REG_ABI_PARAM_0\r |
610 | PARAM_limit equ REG_ABI_PARAM_1\r |
611 | PARAM_bufLimit equ REG_ABI_PARAM_2\r |
612 | \r |
613 | ; MY_ALIGN_64\r |
614 | MY_PROC LzmaDec_DecodeReal_3, 3\r |
615 | MY_PUSH_PRESERVED_ABI_REGS\r |
616 | \r |
617 | lea r0, [RSP - (SIZEOF CLzmaDec_Asm_Loc)]\r |
618 | and r0, -128\r |
619 | mov r5, RSP\r |
620 | mov RSP, r0\r |
621 | mov LOC_0 Old_RSP, r5\r |
622 | mov LOC_0 lzmaPtr, PARAM_lzma\r |
623 | \r |
624 | mov LOC_0 remainLen, 0 ; remainLen must be ZERO\r |
625 | \r |
626 | mov LOC_0 bufLimit, PARAM_bufLimit\r |
627 | mov sym_R, PARAM_lzma ; CLzmaDec_Asm_Loc pointer for GLOB_2\r |
628 | mov dic, GLOB_2 dic_Spec\r |
629 | add PARAM_limit, dic\r |
630 | mov LOC_0 limit, PARAM_limit\r |
631 | \r |
632 | COPY_VAR(rep0)\r |
633 | COPY_VAR(rep1)\r |
634 | COPY_VAR(rep2)\r |
635 | COPY_VAR(rep3)\r |
636 | \r |
637 | mov dicPos, GLOB_2 dicPos_Spec\r |
638 | add dicPos, dic\r |
639 | mov LOC_0 dicPos_Spec, dicPos\r |
640 | mov LOC_0 dic_Spec, dic\r |
641 | \r |
642 | mov x1_L, GLOB_2 pb\r |
643 | mov t0, 1\r |
644 | shl t0, x1_L\r |
645 | dec t0\r |
646 | mov LOC_0 pbMask, t0\r |
647 | \r |
648 | ; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;\r |
649 | ; unsigned lc = p->prop.lc;\r |
650 | ; unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);\r |
651 | \r |
652 | mov x1_L, GLOB_2 lc\r |
653 | mov x2, 100h\r |
654 | mov t0, x2\r |
655 | shr x2, x1_L\r |
656 | ; inc x1\r |
657 | add x1_L, PSHIFT\r |
658 | mov LOC_0 lc2, x1\r |
659 | mov x1_L, GLOB_2 lp\r |
660 | shl t0, x1_L\r |
661 | sub t0, x2\r |
662 | mov LOC_0 lpMask, t0\r |
663 | mov lpMask_reg, t0\r |
664 | \r |
665 | ; mov probs, GLOB_2 probs_Spec\r |
666 | ; add probs, kStartOffset SHL PSHIFT\r |
667 | mov probs, GLOB_2 probs_1664\r |
668 | mov LOC_0 probs_Spec, probs\r |
669 | \r |
670 | mov t0_R, GLOB_2 dicBufSize\r |
671 | mov LOC_0 dicBufSize, t0_R\r |
672 | \r |
673 | mov x1, GLOB_2 checkDicSize\r |
674 | mov LOC_0 checkDicSize, x1\r |
675 | \r |
676 | mov processedPos, GLOB_2 processedPos_Spec\r |
677 | \r |
678 | mov state, GLOB_2 state_Spec\r |
679 | shl state, PSHIFT\r |
680 | \r |
681 | mov buf, GLOB_2 buf_Spec\r |
682 | mov range, GLOB_2 range_Spec\r |
683 | mov cod, GLOB_2 code_Spec\r |
684 | mov kBitModelTotal_reg, kBitModelTotal\r |
685 | xor sym, sym\r |
686 | \r |
687 | ; if (processedPos != 0 || checkDicSize != 0)\r |
688 | or x1, processedPos\r |
689 | jz @f\r |
690 | \r |
691 | add t0_R, dic\r |
692 | cmp dicPos, dic\r |
693 | cmovnz t0_R, dicPos\r |
694 | movzx sym, byte ptr[t0_R - 1]\r |
695 | \r |
696 | @@:\r |
697 | IsMatchBranch_Pre\r |
698 | cmp state, 4 * PMULT\r |
699 | jb lit_end\r |
700 | cmp state, kNumLitStates * PMULT\r |
701 | jb lit_matched_end\r |
702 | jmp lz_end\r |
703 | \r |
704 | \r |
705 | \r |
706 | \r |
707 | ; ---------- LITERAL ----------\r |
708 | MY_ALIGN_64\r |
709 | lit_start:\r |
710 | xor state, state\r |
711 | lit_start_2:\r |
712 | LIT_PROBS lpMask_reg\r |
713 | \r |
714 | ifdef _LZMA_SIZE_OPT\r |
715 | \r |
716 | PLOAD x1, probs + 1 * PMULT\r |
717 | mov sym, 1\r |
718 | MY_ALIGN_16\r |
719 | lit_loop:\r |
720 | BIT_1 x1, x2\r |
721 | mov x1, x2\r |
722 | cmp sym, 127\r |
723 | jbe lit_loop\r |
724 | \r |
725 | else\r |
726 | \r |
727 | BIT_0 x1, x2\r |
728 | BIT_1 x2, x1\r |
729 | BIT_1 x1, x2\r |
730 | BIT_1 x2, x1\r |
731 | BIT_1 x1, x2\r |
732 | BIT_1 x2, x1\r |
733 | BIT_1 x1, x2\r |
734 | \r |
735 | endif\r |
736 | \r |
737 | BIT_2 x2, 256 - 1\r |
738 | \r |
739 | ; mov dic, LOC dic_Spec\r |
740 | mov probs, LOC probs_Spec\r |
741 | IsMatchBranch_Pre\r |
742 | mov byte ptr[dicPos], sym_L\r |
743 | inc dicPos\r |
744 | \r |
745 | CheckLimits\r |
746 | lit_end:\r |
747 | IF_BIT_0_NOUP probs_state_R, pbPos_R, IsMatch, lit_start\r |
748 | \r |
749 | ; jmp IsMatch_label\r |
750 | \r |
751 | ; ---------- MATCHES ----------\r |
752 | ; MY_ALIGN_32\r |
753 | IsMatch_label:\r |
754 | UPDATE_1 probs_state_R, pbPos_R, IsMatch\r |
755 | IF_BIT_1 probs_state_R, 0, IsRep, IsRep_label\r |
756 | \r |
757 | add probs, LenCoder * PMULT\r |
758 | add state, kNumStates * PMULT\r |
759 | \r |
760 | ; ---------- LEN DECODE ----------\r |
761 | len_decode:\r |
762 | mov len_temp, 8 - 1 - kMatchMinLen\r |
763 | IF_BIT_0_NOUP probs, 0, 0, len_mid_0\r |
764 | UPDATE_1 probs, 0, 0\r |
765 | add probs, (1 SHL (kLenNumLowBits + PSHIFT))\r |
766 | mov len_temp, -1 - kMatchMinLen\r |
767 | IF_BIT_0_NOUP probs, 0, 0, len_mid_0\r |
768 | UPDATE_1 probs, 0, 0\r |
769 | add probs, LenHigh * PMULT - (1 SHL (kLenNumLowBits + PSHIFT))\r |
770 | mov sym, 1\r |
771 | PLOAD x1, probs + 1 * PMULT\r |
772 | \r |
773 | MY_ALIGN_32\r |
774 | len8_loop:\r |
775 | BIT_1 x1, x2\r |
776 | mov x1, x2\r |
777 | cmp sym, 64\r |
778 | jb len8_loop\r |
779 | \r |
780 | mov len_temp, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - 1 - kMatchMinLen\r |
781 | jmp short len_mid_2 ; we use short here for MASM that doesn't optimize that code as another assembler programs\r |
782 | \r |
783 | MY_ALIGN_32\r |
784 | len_mid_0:\r |
785 | UPDATE_0 probs, 0, 0\r |
786 | add probs, pbPos_R\r |
787 | BIT_0 x2, x1\r |
788 | len_mid_2:\r |
789 | BIT_1 x1, x2\r |
790 | BIT_2 x2, len_temp\r |
791 | mov probs, LOC probs_Spec\r |
792 | cmp state, kNumStates * PMULT\r |
793 | jb copy_match\r |
794 | \r |
795 | \r |
796 | ; ---------- DECODE DISTANCE ----------\r |
797 | ; probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);\r |
798 | \r |
799 | mov t0, 3 + kMatchMinLen\r |
800 | cmp sym, 3 + kMatchMinLen\r |
801 | cmovb t0, sym\r |
802 | add probs, PosSlot * PMULT - (kMatchMinLen SHL (kNumPosSlotBits + PSHIFT))\r |
803 | shl t0, (kNumPosSlotBits + PSHIFT)\r |
804 | add probs, t0_R\r |
805 | \r |
806 | ; sym = Len\r |
807 | ; mov LOC remainLen, sym\r |
808 | mov len_temp, sym\r |
809 | \r |
810 | ifdef _LZMA_SIZE_OPT\r |
811 | \r |
812 | PLOAD x1, probs + 1 * PMULT\r |
813 | mov sym, 1\r |
814 | MY_ALIGN_16\r |
815 | slot_loop:\r |
816 | BIT_1 x1, x2\r |
817 | mov x1, x2\r |
818 | cmp sym, 32\r |
819 | jb slot_loop\r |
820 | \r |
821 | else\r |
822 | \r |
823 | BIT_0 x1, x2\r |
824 | BIT_1 x2, x1\r |
825 | BIT_1 x1, x2\r |
826 | BIT_1 x2, x1\r |
827 | BIT_1 x1, x2\r |
828 | \r |
829 | endif\r |
830 | \r |
831 | mov x1, sym\r |
832 | BIT_2 x2, 64-1\r |
833 | \r |
834 | and sym, 3\r |
835 | mov probs, LOC probs_Spec\r |
836 | cmp x1, 32 + kEndPosModelIndex / 2\r |
837 | jb short_dist\r |
838 | \r |
839 | ; unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));\r |
840 | sub x1, (32 + 1 + kNumAlignBits)\r |
841 | ; distance = (2 | (distance & 1));\r |
842 | or sym, 2\r |
843 | PLOAD x2, probs + 1 * PMULT\r |
844 | shl sym, kNumAlignBits + 1\r |
845 | lea sym2_R, [probs + 2 * PMULT]\r |
846 | \r |
847 | jmp direct_norm\r |
848 | ; lea t1, [sym_R + (1 SHL kNumAlignBits)]\r |
849 | ; cmp range, kTopValue\r |
850 | ; jb direct_norm\r |
851 | \r |
852 | ; ---------- DIRECT DISTANCE ----------\r |
853 | MY_ALIGN_32\r |
854 | direct_loop:\r |
855 | shr range, 1\r |
856 | mov t0, cod\r |
857 | sub cod, range\r |
858 | cmovs cod, t0\r |
859 | cmovns sym, t1\r |
860 | \r |
861 | comment ~\r |
862 | sub cod, range\r |
863 | mov x2, cod\r |
864 | sar x2, 31\r |
865 | lea sym, dword ptr [r2 + sym_R * 2 + 1]\r |
866 | and x2, range\r |
867 | add cod, x2\r |
868 | ~\r |
869 | dec x1\r |
870 | je direct_end\r |
871 | \r |
872 | add sym, sym\r |
873 | direct_norm:\r |
874 | lea t1, [sym_R + (1 SHL kNumAlignBits)]\r |
875 | cmp range, kTopValue\r |
876 | jae near ptr direct_loop\r |
877 | ; we align for 32 here with "near ptr" command above\r |
878 | NORM_2\r |
879 | jmp direct_loop\r |
880 | \r |
881 | MY_ALIGN_32\r |
882 | direct_end:\r |
883 | ; prob = + kAlign;\r |
884 | ; distance <<= kNumAlignBits;\r |
885 | REV_0 x2, x1\r |
886 | REV_1 x1, x2, 2\r |
887 | REV_1 x2, x1, 4\r |
888 | REV_2 x1, 8\r |
889 | \r |
890 | decode_dist_end:\r |
891 | \r |
892 | ; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))\r |
893 | \r |
894 | mov t1, LOC rep0\r |
895 | mov x1, LOC rep1\r |
896 | mov x2, LOC rep2\r |
897 | \r |
898 | mov t0, LOC checkDicSize\r |
899 | test t0, t0\r |
900 | cmove t0, processedPos\r |
901 | cmp sym, t0\r |
902 | jae end_of_payload\r |
903 | ; jmp end_of_payload ; for debug\r |
904 | \r |
905 | ; rep3 = rep2;\r |
906 | ; rep2 = rep1;\r |
907 | ; rep1 = rep0;\r |
908 | ; rep0 = distance + 1;\r |
909 | \r |
910 | inc sym\r |
911 | mov LOC rep0, sym\r |
912 | ; mov sym, LOC remainLen\r |
913 | mov sym, len_temp\r |
914 | mov LOC rep1, t1\r |
915 | mov LOC rep2, x1\r |
916 | mov LOC rep3, x2\r |
917 | \r |
918 | ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;\r |
919 | cmp state, (kNumStates + kNumLitStates) * PMULT\r |
920 | mov state, kNumLitStates * PMULT\r |
921 | mov t0, (kNumLitStates + 3) * PMULT\r |
922 | cmovae state, t0\r |
923 | \r |
924 | \r |
925 | ; ---------- COPY MATCH ----------\r |
926 | copy_match:\r |
927 | \r |
928 | ; len += kMatchMinLen;\r |
929 | ; add sym, kMatchMinLen\r |
930 | \r |
931 | ; if ((rem = limit - dicPos) == 0)\r |
932 | ; {\r |
933 | ; p->dicPos = dicPos;\r |
934 | ; return SZ_ERROR_DATA;\r |
935 | ; }\r |
936 | mov cnt_R, LOC limit\r |
937 | sub cnt_R, dicPos\r |
938 | jz fin_dicPos_LIMIT\r |
939 | \r |
940 | ; curLen = ((rem < len) ? (unsigned)rem : len);\r |
941 | cmp cnt_R, sym_R\r |
942 | ; cmovae cnt_R, sym_R ; 64-bit\r |
943 | cmovae cnt, sym ; 32-bit\r |
944 | \r |
945 | mov dic, LOC dic_Spec\r |
946 | mov x1, LOC rep0\r |
947 | \r |
948 | mov t0_R, dicPos\r |
949 | add dicPos, cnt_R\r |
950 | ; processedPos += curLen;\r |
951 | add processedPos, cnt\r |
952 | ; len -= curLen;\r |
953 | sub sym, cnt\r |
954 | mov LOC remainLen, sym\r |
955 | \r |
956 | sub t0_R, dic\r |
957 | \r |
958 | ; pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);\r |
959 | sub t0_R, r1\r |
960 | jae @f\r |
961 | \r |
962 | mov r1, LOC dicBufSize\r |
963 | add t0_R, r1\r |
964 | sub r1, t0_R\r |
965 | cmp cnt_R, r1\r |
966 | ja copy_match_cross\r |
967 | @@:\r |
968 | ; if (curLen <= dicBufSize - pos)\r |
969 | \r |
970 | ; ---------- COPY MATCH FAST ----------\r |
971 | ; Byte *dest = dic + dicPos;\r |
972 | ; mov r1, dic\r |
973 | ; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;\r |
974 | ; sub t0_R, dicPos\r |
975 | ; dicPos += curLen;\r |
976 | \r |
977 | ; const Byte *lim = dest + curLen;\r |
978 | add t0_R, dic\r |
979 | movzx sym, byte ptr[t0_R]\r |
980 | add t0_R, cnt_R\r |
981 | neg cnt_R\r |
982 | ; lea r1, [dicPos - 1]\r |
983 | copy_common:\r |
984 | dec dicPos\r |
985 | ; cmp LOC rep0, 1\r |
986 | ; je rep0Label\r |
987 | \r |
988 | ; t0_R - src_lim\r |
989 | ; r1 - dest_lim - 1\r |
990 | ; cnt_R - (-cnt)\r |
991 | \r |
992 | IsMatchBranch_Pre\r |
993 | inc cnt_R\r |
994 | jz copy_end\r |
995 | MY_ALIGN_16\r |
996 | @@:\r |
997 | mov byte ptr[cnt_R * 1 + dicPos], sym_L\r |
998 | movzx sym, byte ptr[cnt_R * 1 + t0_R]\r |
999 | inc cnt_R\r |
1000 | jnz @b\r |
1001 | \r |
1002 | copy_end:\r |
1003 | lz_end_match:\r |
1004 | mov byte ptr[dicPos], sym_L\r |
1005 | inc dicPos\r |
1006 | \r |
1007 | ; IsMatchBranch_Pre\r |
1008 | CheckLimits\r |
1009 | lz_end:\r |
1010 | IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label\r |
1011 | \r |
1012 | \r |
1013 | \r |
1014 | ; ---------- LITERAL MATCHED ----------\r |
1015 | \r |
1016 | LIT_PROBS LOC lpMask\r |
1017 | \r |
1018 | ; matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];\r |
1019 | mov x1, LOC rep0\r |
1020 | ; mov dic, LOC dic_Spec\r |
1021 | mov LOC dicPos_Spec, dicPos\r |
1022 | \r |
1023 | ; state -= (state < 10) ? 3 : 6;\r |
1024 | lea t0, [state_R - 6 * PMULT]\r |
1025 | sub state, 3 * PMULT\r |
1026 | cmp state, 7 * PMULT\r |
1027 | cmovae state, t0\r |
1028 | \r |
1029 | sub dicPos, dic\r |
1030 | sub dicPos, r1\r |
1031 | jae @f\r |
1032 | add dicPos, LOC dicBufSize\r |
1033 | @@:\r |
1034 | comment ~\r |
1035 | xor t0, t0\r |
1036 | sub dicPos, r1\r |
1037 | cmovb t0_R, LOC dicBufSize\r |
1038 | ~\r |
1039 | \r |
1040 | movzx match, byte ptr[dic + dicPos * 1]\r |
1041 | \r |
1042 | ifdef _LZMA_SIZE_OPT\r |
1043 | \r |
1044 | mov offs, 256 * PMULT\r |
1045 | shl match, (PSHIFT + 1)\r |
1046 | mov bit, match\r |
1047 | mov sym, 1\r |
1048 | MY_ALIGN_16\r |
1049 | litm_loop:\r |
1050 | LITM\r |
1051 | cmp sym, 256\r |
1052 | jb litm_loop\r |
1053 | sub sym, 256\r |
1054 | \r |
1055 | else\r |
1056 | \r |
1057 | LITM_0\r |
1058 | LITM\r |
1059 | LITM\r |
1060 | LITM\r |
1061 | LITM\r |
1062 | LITM\r |
1063 | LITM\r |
1064 | LITM_2\r |
1065 | \r |
1066 | endif\r |
1067 | \r |
1068 | mov probs, LOC probs_Spec\r |
1069 | IsMatchBranch_Pre\r |
1070 | ; mov dic, LOC dic_Spec\r |
1071 | mov dicPos, LOC dicPos_Spec\r |
1072 | mov byte ptr[dicPos], sym_L\r |
1073 | inc dicPos\r |
1074 | \r |
1075 | CheckLimits\r |
1076 | lit_matched_end:\r |
1077 | IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label\r |
1078 | ; IsMatchBranch\r |
1079 | mov lpMask_reg, LOC lpMask\r |
1080 | sub state, 3 * PMULT\r |
1081 | jmp lit_start_2\r |
1082 | \r |
1083 | \r |
1084 | \r |
1085 | ; ---------- REP 0 LITERAL ----------\r |
1086 | MY_ALIGN_32\r |
1087 | IsRep0Short_label:\r |
1088 | UPDATE_0 probs_state_R, pbPos_R, IsRep0Long\r |
1089 | \r |
1090 | ; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];\r |
1091 | mov dic, LOC dic_Spec\r |
1092 | mov t0_R, dicPos\r |
1093 | mov probBranch, LOC rep0\r |
1094 | sub t0_R, dic\r |
1095 | \r |
1096 | sub probs, RepLenCoder * PMULT\r |
1097 | \r |
1098 | ; state = state < kNumLitStates ? 9 : 11;\r |
1099 | or state, 1 * PMULT\r |
1100 | \r |
1101 | ; the caller doesn't allow (dicPos >= limit) case for REP_SHORT\r |
1102 | ; so we don't need the following (dicPos == limit) check here:\r |
1103 | ; cmp dicPos, LOC limit\r |
1104 | ; jae fin_dicPos_LIMIT_REP_SHORT\r |
1105 | \r |
1106 | inc processedPos\r |
1107 | \r |
1108 | IsMatchBranch_Pre\r |
1109 | \r |
1110 | ; xor sym, sym\r |
1111 | ; sub t0_R, probBranch_R\r |
1112 | ; cmovb sym_R, LOC dicBufSize\r |
1113 | ; add t0_R, sym_R\r |
1114 | sub t0_R, probBranch_R\r |
1115 | jae @f\r |
1116 | add t0_R, LOC dicBufSize\r |
1117 | @@:\r |
1118 | movzx sym, byte ptr[dic + t0_R * 1]\r |
1119 | jmp lz_end_match\r |
1120 | \r |
1121 | \r |
1122 | MY_ALIGN_32\r |
1123 | IsRep_label:\r |
1124 | UPDATE_1 probs_state_R, 0, IsRep\r |
1125 | \r |
1126 | ; The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode.\r |
1127 | ; So we don't check it here.\r |
1128 | \r |
1129 | ; mov t0, processedPos\r |
1130 | ; or t0, LOC checkDicSize\r |
1131 | ; jz fin_ERROR_2\r |
1132 | \r |
1133 | ; state = state < kNumLitStates ? 8 : 11;\r |
1134 | cmp state, kNumLitStates * PMULT\r |
1135 | mov state, 8 * PMULT\r |
1136 | mov probBranch, 11 * PMULT\r |
1137 | cmovae state, probBranch\r |
1138 | \r |
1139 | ; prob = probs + RepLenCoder;\r |
1140 | add probs, RepLenCoder * PMULT\r |
1141 | \r |
1142 | IF_BIT_1 probs_state_R, 0, IsRepG0, IsRepG0_label\r |
1143 | IF_BIT_0_NOUP probs_state_R, pbPos_R, IsRep0Long, IsRep0Short_label\r |
1144 | UPDATE_1 probs_state_R, pbPos_R, IsRep0Long\r |
1145 | jmp len_decode\r |
1146 | \r |
1147 | MY_ALIGN_32\r |
1148 | IsRepG0_label:\r |
1149 | UPDATE_1 probs_state_R, 0, IsRepG0\r |
1150 | mov dist2, LOC rep0\r |
1151 | mov dist, LOC rep1\r |
1152 | mov LOC rep1, dist2\r |
1153 | \r |
1154 | IF_BIT_1 probs_state_R, 0, IsRepG1, IsRepG1_label\r |
1155 | mov LOC rep0, dist\r |
1156 | jmp len_decode\r |
1157 | \r |
1158 | ; MY_ALIGN_32\r |
1159 | IsRepG1_label:\r |
1160 | UPDATE_1 probs_state_R, 0, IsRepG1\r |
1161 | mov dist2, LOC rep2\r |
1162 | mov LOC rep2, dist\r |
1163 | \r |
1164 | IF_BIT_1 probs_state_R, 0, IsRepG2, IsRepG2_label\r |
1165 | mov LOC rep0, dist2\r |
1166 | jmp len_decode\r |
1167 | \r |
1168 | ; MY_ALIGN_32\r |
1169 | IsRepG2_label:\r |
1170 | UPDATE_1 probs_state_R, 0, IsRepG2\r |
1171 | mov dist, LOC rep3\r |
1172 | mov LOC rep3, dist2\r |
1173 | mov LOC rep0, dist\r |
1174 | jmp len_decode\r |
1175 | \r |
1176 | \r |
1177 | \r |
1178 | ; ---------- SPEC SHORT DISTANCE ----------\r |
1179 | \r |
1180 | MY_ALIGN_32\r |
1181 | short_dist:\r |
1182 | sub x1, 32 + 1\r |
1183 | jbe decode_dist_end\r |
1184 | or sym, 2\r |
1185 | shl sym, x1_L\r |
1186 | lea sym_R, [probs + sym_R * PMULT + SpecPos * PMULT + 1 * PMULT]\r |
1187 | mov sym2, PMULT ; step\r |
1188 | MY_ALIGN_32\r |
1189 | spec_loop:\r |
1190 | REV_1_VAR x2\r |
1191 | dec x1\r |
1192 | jnz spec_loop\r |
1193 | \r |
1194 | mov probs, LOC probs_Spec\r |
1195 | sub sym, sym2\r |
1196 | sub sym, SpecPos * PMULT\r |
1197 | sub sym_R, probs\r |
1198 | shr sym, PSHIFT\r |
1199 | \r |
1200 | jmp decode_dist_end\r |
1201 | \r |
1202 | \r |
1203 | ; ---------- COPY MATCH CROSS ----------\r |
1204 | copy_match_cross:\r |
1205 | ; t0_R - src pos\r |
1206 | ; r1 - len to dicBufSize\r |
1207 | ; cnt_R - total copy len\r |
1208 | \r |
1209 | mov t1_R, t0_R ; srcPos\r |
1210 | mov t0_R, dic\r |
1211 | mov r1, LOC dicBufSize ;\r |
1212 | neg cnt_R\r |
1213 | @@:\r |
1214 | movzx sym, byte ptr[t1_R * 1 + t0_R]\r |
1215 | inc t1_R\r |
1216 | mov byte ptr[cnt_R * 1 + dicPos], sym_L\r |
1217 | inc cnt_R\r |
1218 | cmp t1_R, r1\r |
1219 | jne @b\r |
1220 | \r |
1221 | movzx sym, byte ptr[t0_R]\r |
1222 | sub t0_R, cnt_R\r |
1223 | jmp copy_common\r |
1224 | \r |
1225 | \r |
1226 | \r |
1227 | \r |
1228 | ; fin_dicPos_LIMIT_REP_SHORT:\r |
1229 | ; mov sym, 1\r |
1230 | \r |
1231 | fin_dicPos_LIMIT:\r |
1232 | mov LOC remainLen, sym\r |
1233 | jmp fin_OK\r |
1234 | ; For more strict mode we can stop decoding with error\r |
1235 | ; mov sym, 1\r |
1236 | ; jmp fin\r |
1237 | \r |
1238 | \r |
1239 | fin_ERROR_MATCH_DIST:\r |
1240 | \r |
1241 | ; rep3 = rep2;\r |
1242 | ; rep2 = rep1;\r |
1243 | ; rep1 = rep0;\r |
1244 | ; rep0 = distance + 1;\r |
1245 | \r |
1246 | add len_temp, kMatchSpecLen_Error_Data\r |
1247 | mov LOC remainLen, len_temp\r |
1248 | \r |
1249 | mov LOC rep0, sym\r |
1250 | mov LOC rep1, t1\r |
1251 | mov LOC rep2, x1\r |
1252 | mov LOC rep3, x2\r |
1253 | \r |
1254 | ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;\r |
1255 | cmp state, (kNumStates + kNumLitStates) * PMULT\r |
1256 | mov state, kNumLitStates * PMULT\r |
1257 | mov t0, (kNumLitStates + 3) * PMULT\r |
1258 | cmovae state, t0\r |
1259 | \r |
1260 | ; jmp fin_OK\r |
1261 | mov sym, 1\r |
1262 | jmp fin\r |
1263 | \r |
1264 | end_of_payload:\r |
1265 | inc sym\r |
1266 | jnz fin_ERROR_MATCH_DIST\r |
1267 | \r |
1268 | mov LOC remainLen, kMatchSpecLenStart\r |
1269 | sub state, kNumStates * PMULT\r |
1270 | \r |
1271 | fin_OK:\r |
1272 | xor sym, sym\r |
1273 | \r |
1274 | fin:\r |
1275 | NORM\r |
1276 | \r |
1277 | mov r1, LOC lzmaPtr\r |
1278 | \r |
1279 | sub dicPos, LOC dic_Spec\r |
1280 | mov GLOB dicPos_Spec, dicPos\r |
1281 | mov GLOB buf_Spec, buf\r |
1282 | mov GLOB range_Spec, range\r |
1283 | mov GLOB code_Spec, cod\r |
1284 | shr state, PSHIFT\r |
1285 | mov GLOB state_Spec, state\r |
1286 | mov GLOB processedPos_Spec, processedPos\r |
1287 | \r |
1288 | RESTORE_VAR(remainLen)\r |
1289 | RESTORE_VAR(rep0)\r |
1290 | RESTORE_VAR(rep1)\r |
1291 | RESTORE_VAR(rep2)\r |
1292 | RESTORE_VAR(rep3)\r |
1293 | \r |
1294 | mov x0, sym\r |
1295 | \r |
1296 | mov RSP, LOC Old_RSP\r |
1297 | \r |
1298 | MY_POP_PRESERVED_ABI_REGS\r |
1299 | MY_ENDP\r |
1300 | \r |
1301 | _TEXT$LZMADECOPT ENDS\r |
1302 | \r |
1303 | end\r |