attempt to fix build
[pcsx_rearmed.git] / deps / libchdr / deps / lzma-22.01 / src / Asm / x86 / LzmaDecOpt.asm
CommitLineData
9e052883 1; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function\r
2; 2021-02-23: Igor Pavlov : Public domain\r
3;\r
4; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()\r
5; function for check at link time.\r
6; That code is tightly coupled with LzmaDec_TryDummy()\r
7; and with another functions in LzmaDec.c file.\r
8; CLzmaDec structure, (probs) array layout, input and output of\r
9; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM).\r
10\r
11ifndef x64\r
12; x64=1\r
13; .err <x64_IS_REQUIRED>\r
14endif\r
15\r
16include 7zAsm.asm\r
17\r
18MY_ASM_START\r
19\r
20_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'\r
21\r
22MY_ALIGN macro num:req\r
23 align num\r
24endm\r
25\r
26MY_ALIGN_16 macro\r
27 MY_ALIGN 16\r
28endm\r
29\r
30MY_ALIGN_32 macro\r
31 MY_ALIGN 32\r
32endm\r
33\r
34MY_ALIGN_64 macro\r
35 MY_ALIGN 64\r
36endm\r
37\r
38\r
39; _LZMA_SIZE_OPT equ 1\r
40\r
41; _LZMA_PROB32 equ 1\r
42\r
43ifdef _LZMA_PROB32\r
44 PSHIFT equ 2\r
45 PLOAD macro dest, mem\r
46 mov dest, dword ptr [mem]\r
47 endm\r
48 PSTORE macro src, mem\r
49 mov dword ptr [mem], src\r
50 endm\r
51else\r
52 PSHIFT equ 1\r
53 PLOAD macro dest, mem\r
54 movzx dest, word ptr [mem]\r
55 endm\r
56 PSTORE macro src, mem\r
57 mov word ptr [mem], @CatStr(src, _W)\r
58 endm\r
59endif\r
60\r
61PMULT equ (1 SHL PSHIFT)\r
62PMULT_HALF equ (1 SHL (PSHIFT - 1))\r
63PMULT_2 equ (1 SHL (PSHIFT + 1))\r
64\r
65kMatchSpecLen_Error_Data equ (1 SHL 9)\r
66\r
67; x0 range\r
68; x1 pbPos / (prob) TREE\r
69; x2 probBranch / prm (MATCHED) / pbPos / cnt\r
70; x3 sym\r
71;====== r4 === RSP\r
72; x5 cod\r
73; x6 t1 NORM_CALC / probs_state / dist\r
74; x7 t0 NORM_CALC / prob2 IF_BIT_1\r
75; x8 state\r
76; x9 match (MATCHED) / sym2 / dist2 / lpMask_reg\r
77; x10 kBitModelTotal_reg\r
78; r11 probs\r
79; x12 offs (MATCHED) / dic / len_temp\r
80; x13 processedPos\r
81; x14 bit (MATCHED) / dicPos\r
82; r15 buf\r
83\r
84\r
85cod equ x5\r
86cod_L equ x5_L\r
87range equ x0\r
88state equ x8\r
89state_R equ r8\r
90buf equ r15\r
91processedPos equ x13\r
92kBitModelTotal_reg equ x10\r
93\r
94probBranch equ x2\r
95probBranch_R equ r2\r
96probBranch_W equ x2_W\r
97\r
98pbPos equ x1\r
99pbPos_R equ r1\r
100\r
101cnt equ x2\r
102cnt_R equ r2\r
103\r
104lpMask_reg equ x9\r
105dicPos equ r14\r
106\r
107sym equ x3\r
108sym_R equ r3\r
109sym_L equ x3_L\r
110\r
111probs equ r11\r
112dic equ r12\r
113\r
114t0 equ x7\r
115t0_W equ x7_W\r
116t0_R equ r7\r
117\r
118prob2 equ t0\r
119prob2_W equ t0_W\r
120\r
121t1 equ x6\r
122t1_R equ r6\r
123\r
124probs_state equ t1\r
125probs_state_R equ t1_R\r
126\r
127prm equ r2\r
128match equ x9\r
129match_R equ r9\r
130offs equ x12\r
131offs_R equ r12\r
132bit equ x14\r
133bit_R equ r14\r
134\r
135sym2 equ x9\r
136sym2_R equ r9\r
137\r
138len_temp equ x12\r
139\r
140dist equ sym\r
141dist2 equ x9\r
142\r
143\r
144\r
145kNumBitModelTotalBits equ 11\r
146kBitModelTotal equ (1 SHL kNumBitModelTotalBits)\r
147kNumMoveBits equ 5\r
148kBitModelOffset equ ((1 SHL kNumMoveBits) - 1)\r
149kTopValue equ (1 SHL 24)\r
150\r
151NORM_2 macro\r
152 ; movzx t0, BYTE PTR [buf]\r
153 shl cod, 8\r
154 mov cod_L, BYTE PTR [buf]\r
155 shl range, 8\r
156 ; or cod, t0\r
157 inc buf\r
158endm\r
159\r
160\r
161NORM macro\r
162 cmp range, kTopValue\r
163 jae SHORT @F\r
164 NORM_2\r
165@@:\r
166endm\r
167\r
168\r
169; ---------- Branch MACROS ----------\r
170\r
171UPDATE_0 macro probsArray:req, probOffset:req, probDisp:req\r
172 mov prob2, kBitModelTotal_reg\r
173 sub prob2, probBranch\r
174 shr prob2, kNumMoveBits\r
175 add probBranch, prob2\r
176 PSTORE probBranch, probOffset * 1 + probsArray + probDisp * PMULT\r
177endm\r
178\r
179\r
180UPDATE_1 macro probsArray:req, probOffset:req, probDisp:req\r
181 sub prob2, range\r
182 sub cod, range\r
183 mov range, prob2\r
184 mov prob2, probBranch\r
185 shr probBranch, kNumMoveBits\r
186 sub prob2, probBranch\r
187 PSTORE prob2, probOffset * 1 + probsArray + probDisp * PMULT\r
188endm\r
189\r
190\r
191CMP_COD macro probsArray:req, probOffset:req, probDisp:req\r
192 PLOAD probBranch, probOffset * 1 + probsArray + probDisp * PMULT\r
193 NORM\r
194 mov prob2, range\r
195 shr range, kNumBitModelTotalBits\r
196 imul range, probBranch\r
197 cmp cod, range\r
198endm\r
199\r
200\r
201IF_BIT_1_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req\r
202 CMP_COD probsArray, probOffset, probDisp\r
203 jae toLabel\r
204endm\r
205\r
206\r
207IF_BIT_1 macro probsArray:req, probOffset:req, probDisp:req, toLabel:req\r
208 IF_BIT_1_NOUP probsArray, probOffset, probDisp, toLabel\r
209 UPDATE_0 probsArray, probOffset, probDisp\r
210endm\r
211\r
212\r
213IF_BIT_0_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req\r
214 CMP_COD probsArray, probOffset, probDisp\r
215 jb toLabel\r
216endm\r
217\r
218\r
219; ---------- CMOV MACROS ----------\r
220\r
221NORM_CALC macro prob:req\r
222 NORM\r
223 mov t0, range\r
224 shr range, kNumBitModelTotalBits\r
225 imul range, prob\r
226 sub t0, range\r
227 mov t1, cod\r
228 sub cod, range\r
229endm\r
230\r
231\r
232PUP macro prob:req, probPtr:req\r
233 sub t0, prob\r
234 ; only sar works for both 16/32 bit prob modes\r
235 sar t0, kNumMoveBits\r
236 add t0, prob\r
237 PSTORE t0, probPtr\r
238endm\r
239\r
240\r
241PUP_SUB macro prob:req, probPtr:req, symSub:req\r
242 sbb sym, symSub\r
243 PUP prob, probPtr\r
244endm\r
245\r
246\r
247PUP_COD macro prob:req, probPtr:req, symSub:req\r
248 mov t0, kBitModelOffset\r
249 cmovb cod, t1\r
250 mov t1, sym\r
251 cmovb t0, kBitModelTotal_reg\r
252 PUP_SUB prob, probPtr, symSub\r
253endm\r
254\r
255\r
256BIT_0 macro prob:req, probNext:req\r
257 PLOAD prob, probs + 1 * PMULT\r
258 PLOAD probNext, probs + 1 * PMULT_2\r
259\r
260 NORM_CALC prob\r
261 \r
262 cmovae range, t0\r
263 PLOAD t0, probs + 1 * PMULT_2 + PMULT\r
264 cmovae probNext, t0\r
265 mov t0, kBitModelOffset\r
266 cmovb cod, t1\r
267 cmovb t0, kBitModelTotal_reg\r
268 mov sym, 2\r
269 PUP_SUB prob, probs + 1 * PMULT, 0 - 1\r
270endm\r
271\r
272\r
273BIT_1 macro prob:req, probNext:req\r
274 PLOAD probNext, probs + sym_R * PMULT_2\r
275 add sym, sym\r
276 \r
277 NORM_CALC prob\r
278 \r
279 cmovae range, t0\r
280 PLOAD t0, probs + sym_R * PMULT + PMULT\r
281 cmovae probNext, t0\r
282 PUP_COD prob, probs + t1_R * PMULT_HALF, 0 - 1\r
283endm\r
284\r
285\r
286BIT_2 macro prob:req, symSub:req\r
287 add sym, sym\r
288\r
289 NORM_CALC prob\r
290 \r
291 cmovae range, t0\r
292 PUP_COD prob, probs + t1_R * PMULT_HALF, symSub\r
293endm\r
294\r
295\r
296; ---------- MATCHED LITERAL ----------\r
297\r
298LITM_0 macro\r
299 mov offs, 256 * PMULT\r
300 shl match, (PSHIFT + 1)\r
301 mov bit, offs\r
302 and bit, match\r
303 PLOAD x1, probs + 256 * PMULT + bit_R * 1 + 1 * PMULT\r
304 lea prm, [probs + 256 * PMULT + bit_R * 1 + 1 * PMULT]\r
305 ; lea prm, [probs + 256 * PMULT + 1 * PMULT]\r
306 ; add prm, bit_R\r
307 xor offs, bit\r
308 add match, match\r
309\r
310 NORM_CALC x1\r
311\r
312 cmovae offs, bit\r
313 mov bit, match\r
314 cmovae range, t0\r
315 mov t0, kBitModelOffset\r
316 cmovb cod, t1\r
317 cmovb t0, kBitModelTotal_reg\r
318 mov sym, 0\r
319 PUP_SUB x1, prm, -2-1\r
320endm\r
321\r
322\r
323LITM macro\r
324 and bit, offs\r
325 lea prm, [probs + offs_R * 1]\r
326 add prm, bit_R\r
327 PLOAD x1, prm + sym_R * PMULT\r
328 xor offs, bit\r
329 add sym, sym\r
330 add match, match\r
331\r
332 NORM_CALC x1\r
333\r
334 cmovae offs, bit\r
335 mov bit, match\r
336 cmovae range, t0\r
337 PUP_COD x1, prm + t1_R * PMULT_HALF, - 1\r
338endm\r
339\r
340\r
341LITM_2 macro\r
342 and bit, offs\r
343 lea prm, [probs + offs_R * 1]\r
344 add prm, bit_R\r
345 PLOAD x1, prm + sym_R * PMULT\r
346 add sym, sym\r
347\r
348 NORM_CALC x1\r
349\r
350 cmovae range, t0\r
351 PUP_COD x1, prm + t1_R * PMULT_HALF, 256 - 1\r
352endm\r
353\r
354\r
355; ---------- REVERSE BITS ----------\r
356\r
357REV_0 macro prob:req, probNext:req\r
358 ; PLOAD prob, probs + 1 * PMULT\r
359 ; lea sym2_R, [probs + 2 * PMULT]\r
360 ; PLOAD probNext, probs + 2 * PMULT\r
361 PLOAD probNext, sym2_R\r
362\r
363 NORM_CALC prob\r
364\r
365 cmovae range, t0\r
366 PLOAD t0, probs + 3 * PMULT\r
367 cmovae probNext, t0\r
368 cmovb cod, t1\r
369 mov t0, kBitModelOffset\r
370 cmovb t0, kBitModelTotal_reg\r
371 lea t1_R, [probs + 3 * PMULT]\r
372 cmovae sym2_R, t1_R\r
373 PUP prob, probs + 1 * PMULT\r
374endm\r
375\r
376\r
377REV_1 macro prob:req, probNext:req, step:req\r
378 add sym2_R, step * PMULT\r
379 PLOAD probNext, sym2_R\r
380\r
381 NORM_CALC prob\r
382\r
383 cmovae range, t0\r
384 PLOAD t0, sym2_R + step * PMULT\r
385 cmovae probNext, t0\r
386 cmovb cod, t1\r
387 mov t0, kBitModelOffset\r
388 cmovb t0, kBitModelTotal_reg\r
389 lea t1_R, [sym2_R + step * PMULT]\r
390 cmovae sym2_R, t1_R\r
391 PUP prob, t1_R - step * PMULT_2\r
392endm\r
393\r
394\r
395REV_2 macro prob:req, step:req\r
396 sub sym2_R, probs\r
397 shr sym2, PSHIFT\r
398 or sym, sym2\r
399\r
400 NORM_CALC prob\r
401\r
402 cmovae range, t0\r
403 lea t0, [sym - step]\r
404 cmovb sym, t0\r
405 cmovb cod, t1\r
406 mov t0, kBitModelOffset\r
407 cmovb t0, kBitModelTotal_reg\r
408 PUP prob, probs + sym2_R * PMULT\r
409endm\r
410\r
411\r
412REV_1_VAR macro prob:req\r
413 PLOAD prob, sym_R\r
414 mov probs, sym_R\r
415 add sym_R, sym2_R\r
416\r
417 NORM_CALC prob\r
418\r
419 cmovae range, t0\r
420 lea t0_R, [sym_R + 1 * sym2_R]\r
421 cmovae sym_R, t0_R\r
422 mov t0, kBitModelOffset\r
423 cmovb cod, t1\r
424 ; mov t1, kBitModelTotal\r
425 ; cmovb t0, t1\r
426 cmovb t0, kBitModelTotal_reg\r
427 add sym2, sym2\r
428 PUP prob, probs\r
429endm\r
430\r
431\r
432\r
433\r
434LIT_PROBS macro lpMaskParam:req\r
435 ; prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);\r
436 mov t0, processedPos\r
437 shl t0, 8\r
438 add sym, t0\r
439 and sym, lpMaskParam\r
440 add probs_state_R, pbPos_R\r
441 mov x1, LOC lc2\r
442 lea sym, dword ptr[sym_R + 2 * sym_R]\r
443 add probs, Literal * PMULT\r
444 shl sym, x1_L\r
445 add probs, sym_R\r
446 UPDATE_0 probs_state_R, 0, IsMatch\r
447 inc processedPos\r
448endm\r
449\r
450\r
451\r
452kNumPosBitsMax equ 4\r
453kNumPosStatesMax equ (1 SHL kNumPosBitsMax)\r
454\r
455kLenNumLowBits equ 3\r
456kLenNumLowSymbols equ (1 SHL kLenNumLowBits)\r
457kLenNumHighBits equ 8\r
458kLenNumHighSymbols equ (1 SHL kLenNumHighBits)\r
459kNumLenProbs equ (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols)\r
460\r
461LenLow equ 0\r
462LenChoice equ LenLow\r
463LenChoice2 equ (LenLow + kLenNumLowSymbols)\r
464LenHigh equ (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax)\r
465\r
466kNumStates equ 12\r
467kNumStates2 equ 16\r
468kNumLitStates equ 7\r
469\r
470kStartPosModelIndex equ 4\r
471kEndPosModelIndex equ 14\r
472kNumFullDistances equ (1 SHL (kEndPosModelIndex SHR 1))\r
473\r
474kNumPosSlotBits equ 6\r
475kNumLenToPosStates equ 4\r
476\r
477kNumAlignBits equ 4\r
478kAlignTableSize equ (1 SHL kNumAlignBits)\r
479\r
480kMatchMinLen equ 2\r
481kMatchSpecLenStart equ (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)\r
482\r
483kStartOffset equ 1664\r
484SpecPos equ (-kStartOffset)\r
485IsRep0Long equ (SpecPos + kNumFullDistances)\r
486RepLenCoder equ (IsRep0Long + (kNumStates2 SHL kNumPosBitsMax))\r
487LenCoder equ (RepLenCoder + kNumLenProbs)\r
488IsMatch equ (LenCoder + kNumLenProbs)\r
489kAlign equ (IsMatch + (kNumStates2 SHL kNumPosBitsMax))\r
490IsRep equ (kAlign + kAlignTableSize)\r
491IsRepG0 equ (IsRep + kNumStates)\r
492IsRepG1 equ (IsRepG0 + kNumStates)\r
493IsRepG2 equ (IsRepG1 + kNumStates)\r
494PosSlot equ (IsRepG2 + kNumStates)\r
495Literal equ (PosSlot + (kNumLenToPosStates SHL kNumPosSlotBits))\r
496NUM_BASE_PROBS equ (Literal + kStartOffset)\r
497\r
498if kAlign ne 0\r
499 .err <Stop_Compiling_Bad_LZMA_kAlign>\r
500endif\r
501\r
502if NUM_BASE_PROBS ne 1984\r
503 .err <Stop_Compiling_Bad_LZMA_PROBS>\r
504endif\r
505\r
506\r
507PTR_FIELD equ dq ?\r
508\r
509CLzmaDec_Asm struct\r
510 lc db ?\r
511 lp db ?\r
512 pb db ?\r
513 _pad_ db ?\r
514 dicSize dd ?\r
515\r
516 probs_Spec PTR_FIELD\r
517 probs_1664 PTR_FIELD\r
518 dic_Spec PTR_FIELD\r
519 dicBufSize PTR_FIELD\r
520 dicPos_Spec PTR_FIELD\r
521 buf_Spec PTR_FIELD\r
522\r
523 range_Spec dd ?\r
524 code_Spec dd ?\r
525 processedPos_Spec dd ?\r
526 checkDicSize dd ?\r
527 rep0 dd ?\r
528 rep1 dd ?\r
529 rep2 dd ?\r
530 rep3 dd ?\r
531 state_Spec dd ?\r
532 remainLen dd ?\r
533CLzmaDec_Asm ends\r
534\r
535\r
536CLzmaDec_Asm_Loc struct\r
537 OLD_RSP PTR_FIELD\r
538 lzmaPtr PTR_FIELD\r
539 _pad0_ PTR_FIELD\r
540 _pad1_ PTR_FIELD\r
541 _pad2_ PTR_FIELD\r
542 dicBufSize PTR_FIELD\r
543 probs_Spec PTR_FIELD\r
544 dic_Spec PTR_FIELD\r
545 \r
546 limit PTR_FIELD\r
547 bufLimit PTR_FIELD\r
548 lc2 dd ?\r
549 lpMask dd ?\r
550 pbMask dd ?\r
551 checkDicSize dd ?\r
552\r
553 _pad_ dd ?\r
554 remainLen dd ?\r
555 dicPos_Spec PTR_FIELD\r
556 rep0 dd ?\r
557 rep1 dd ?\r
558 rep2 dd ?\r
559 rep3 dd ?\r
560CLzmaDec_Asm_Loc ends\r
561\r
562\r
563GLOB_2 equ [sym_R].CLzmaDec_Asm.\r
564GLOB equ [r1].CLzmaDec_Asm.\r
565LOC_0 equ [r0].CLzmaDec_Asm_Loc.\r
566LOC equ [RSP].CLzmaDec_Asm_Loc.\r
567\r
568\r
569COPY_VAR macro name\r
570 mov t0, GLOB_2 name\r
571 mov LOC_0 name, t0\r
572endm\r
573\r
574\r
575RESTORE_VAR macro name\r
576 mov t0, LOC name\r
577 mov GLOB name, t0\r
578endm\r
579\r
580\r
581\r
582IsMatchBranch_Pre macro reg\r
583 ; prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;\r
584 mov pbPos, LOC pbMask\r
585 and pbPos, processedPos\r
586 shl pbPos, (kLenNumLowBits + 1 + PSHIFT)\r
587 lea probs_state_R, [probs + 1 * state_R]\r
588endm\r
589\r
590\r
591IsMatchBranch macro reg\r
592 IsMatchBranch_Pre\r
593 IF_BIT_1 probs_state_R, pbPos_R, IsMatch, IsMatch_label\r
594endm\r
595 \r
596\r
597CheckLimits macro reg\r
598 cmp buf, LOC bufLimit\r
599 jae fin_OK\r
600 cmp dicPos, LOC limit\r
601 jae fin_OK\r
602endm\r
603\r
604\r
605\r
606; RSP is (16x + 8) bytes aligned in WIN64-x64\r
607; LocalSize equ ((((SIZEOF CLzmaDec_Asm_Loc) + 7) / 16 * 16) + 8)\r
608\r
609PARAM_lzma equ REG_ABI_PARAM_0\r
610PARAM_limit equ REG_ABI_PARAM_1\r
611PARAM_bufLimit equ REG_ABI_PARAM_2\r
612\r
613; MY_ALIGN_64\r
614MY_PROC LzmaDec_DecodeReal_3, 3\r
615MY_PUSH_PRESERVED_ABI_REGS\r
616\r
617 lea r0, [RSP - (SIZEOF CLzmaDec_Asm_Loc)]\r
618 and r0, -128\r
619 mov r5, RSP\r
620 mov RSP, r0\r
621 mov LOC_0 Old_RSP, r5\r
622 mov LOC_0 lzmaPtr, PARAM_lzma\r
623 \r
624 mov LOC_0 remainLen, 0 ; remainLen must be ZERO\r
625\r
626 mov LOC_0 bufLimit, PARAM_bufLimit\r
627 mov sym_R, PARAM_lzma ; CLzmaDec_Asm_Loc pointer for GLOB_2\r
628 mov dic, GLOB_2 dic_Spec\r
629 add PARAM_limit, dic\r
630 mov LOC_0 limit, PARAM_limit\r
631\r
632 COPY_VAR(rep0)\r
633 COPY_VAR(rep1)\r
634 COPY_VAR(rep2)\r
635 COPY_VAR(rep3)\r
636 \r
637 mov dicPos, GLOB_2 dicPos_Spec\r
638 add dicPos, dic\r
639 mov LOC_0 dicPos_Spec, dicPos\r
640 mov LOC_0 dic_Spec, dic\r
641 \r
642 mov x1_L, GLOB_2 pb\r
643 mov t0, 1\r
644 shl t0, x1_L\r
645 dec t0\r
646 mov LOC_0 pbMask, t0\r
647\r
648 ; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;\r
649 ; unsigned lc = p->prop.lc;\r
650 ; unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);\r
651\r
652 mov x1_L, GLOB_2 lc\r
653 mov x2, 100h\r
654 mov t0, x2\r
655 shr x2, x1_L\r
656 ; inc x1\r
657 add x1_L, PSHIFT\r
658 mov LOC_0 lc2, x1\r
659 mov x1_L, GLOB_2 lp\r
660 shl t0, x1_L\r
661 sub t0, x2\r
662 mov LOC_0 lpMask, t0\r
663 mov lpMask_reg, t0\r
664 \r
665 ; mov probs, GLOB_2 probs_Spec\r
666 ; add probs, kStartOffset SHL PSHIFT\r
667 mov probs, GLOB_2 probs_1664\r
668 mov LOC_0 probs_Spec, probs\r
669\r
670 mov t0_R, GLOB_2 dicBufSize\r
671 mov LOC_0 dicBufSize, t0_R\r
672 \r
673 mov x1, GLOB_2 checkDicSize\r
674 mov LOC_0 checkDicSize, x1\r
675\r
676 mov processedPos, GLOB_2 processedPos_Spec\r
677\r
678 mov state, GLOB_2 state_Spec\r
679 shl state, PSHIFT\r
680\r
681 mov buf, GLOB_2 buf_Spec\r
682 mov range, GLOB_2 range_Spec\r
683 mov cod, GLOB_2 code_Spec\r
684 mov kBitModelTotal_reg, kBitModelTotal\r
685 xor sym, sym\r
686\r
687 ; if (processedPos != 0 || checkDicSize != 0)\r
688 or x1, processedPos\r
689 jz @f\r
690 \r
691 add t0_R, dic\r
692 cmp dicPos, dic\r
693 cmovnz t0_R, dicPos\r
694 movzx sym, byte ptr[t0_R - 1]\r
695\r
696@@:\r
697 IsMatchBranch_Pre\r
698 cmp state, 4 * PMULT\r
699 jb lit_end\r
700 cmp state, kNumLitStates * PMULT\r
701 jb lit_matched_end\r
702 jmp lz_end\r
703 \r
704\r
705 \r
706\r
707; ---------- LITERAL ----------\r
708MY_ALIGN_64\r
709lit_start:\r
710 xor state, state\r
711lit_start_2:\r
712 LIT_PROBS lpMask_reg\r
713\r
714 ifdef _LZMA_SIZE_OPT\r
715\r
716 PLOAD x1, probs + 1 * PMULT\r
717 mov sym, 1\r
718MY_ALIGN_16\r
719lit_loop:\r
720 BIT_1 x1, x2\r
721 mov x1, x2\r
722 cmp sym, 127\r
723 jbe lit_loop\r
724 \r
725 else\r
726 \r
727 BIT_0 x1, x2\r
728 BIT_1 x2, x1\r
729 BIT_1 x1, x2\r
730 BIT_1 x2, x1\r
731 BIT_1 x1, x2\r
732 BIT_1 x2, x1\r
733 BIT_1 x1, x2\r
734 \r
735 endif\r
736\r
737 BIT_2 x2, 256 - 1\r
738 \r
739 ; mov dic, LOC dic_Spec\r
740 mov probs, LOC probs_Spec\r
741 IsMatchBranch_Pre\r
742 mov byte ptr[dicPos], sym_L\r
743 inc dicPos\r
744 \r
745 CheckLimits\r
746lit_end:\r
747 IF_BIT_0_NOUP probs_state_R, pbPos_R, IsMatch, lit_start\r
748\r
749 ; jmp IsMatch_label\r
750 \r
751; ---------- MATCHES ----------\r
752; MY_ALIGN_32\r
753IsMatch_label:\r
754 UPDATE_1 probs_state_R, pbPos_R, IsMatch\r
755 IF_BIT_1 probs_state_R, 0, IsRep, IsRep_label\r
756\r
757 add probs, LenCoder * PMULT\r
758 add state, kNumStates * PMULT\r
759\r
760; ---------- LEN DECODE ----------\r
761len_decode:\r
762 mov len_temp, 8 - 1 - kMatchMinLen\r
763 IF_BIT_0_NOUP probs, 0, 0, len_mid_0\r
764 UPDATE_1 probs, 0, 0\r
765 add probs, (1 SHL (kLenNumLowBits + PSHIFT))\r
766 mov len_temp, -1 - kMatchMinLen\r
767 IF_BIT_0_NOUP probs, 0, 0, len_mid_0\r
768 UPDATE_1 probs, 0, 0\r
769 add probs, LenHigh * PMULT - (1 SHL (kLenNumLowBits + PSHIFT))\r
770 mov sym, 1\r
771 PLOAD x1, probs + 1 * PMULT\r
772\r
773MY_ALIGN_32\r
774len8_loop:\r
775 BIT_1 x1, x2\r
776 mov x1, x2\r
777 cmp sym, 64\r
778 jb len8_loop\r
779 \r
780 mov len_temp, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - 1 - kMatchMinLen\r
781 jmp short len_mid_2 ; we use short here for MASM that doesn't optimize that code as another assembler programs\r
782 \r
783MY_ALIGN_32\r
784len_mid_0:\r
785 UPDATE_0 probs, 0, 0\r
786 add probs, pbPos_R\r
787 BIT_0 x2, x1\r
788len_mid_2:\r
789 BIT_1 x1, x2\r
790 BIT_2 x2, len_temp\r
791 mov probs, LOC probs_Spec\r
792 cmp state, kNumStates * PMULT\r
793 jb copy_match\r
794 \r
795\r
796; ---------- DECODE DISTANCE ----------\r
797 ; probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);\r
798\r
799 mov t0, 3 + kMatchMinLen\r
800 cmp sym, 3 + kMatchMinLen\r
801 cmovb t0, sym\r
802 add probs, PosSlot * PMULT - (kMatchMinLen SHL (kNumPosSlotBits + PSHIFT))\r
803 shl t0, (kNumPosSlotBits + PSHIFT)\r
804 add probs, t0_R\r
805 \r
806 ; sym = Len\r
807 ; mov LOC remainLen, sym\r
808 mov len_temp, sym\r
809\r
810 ifdef _LZMA_SIZE_OPT\r
811\r
812 PLOAD x1, probs + 1 * PMULT\r
813 mov sym, 1\r
814MY_ALIGN_16\r
815slot_loop:\r
816 BIT_1 x1, x2\r
817 mov x1, x2\r
818 cmp sym, 32\r
819 jb slot_loop\r
820 \r
821 else\r
822 \r
823 BIT_0 x1, x2\r
824 BIT_1 x2, x1\r
825 BIT_1 x1, x2\r
826 BIT_1 x2, x1\r
827 BIT_1 x1, x2\r
828 \r
829 endif\r
830 \r
831 mov x1, sym\r
832 BIT_2 x2, 64-1\r
833\r
834 and sym, 3\r
835 mov probs, LOC probs_Spec\r
836 cmp x1, 32 + kEndPosModelIndex / 2\r
837 jb short_dist\r
838\r
839 ; unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));\r
840 sub x1, (32 + 1 + kNumAlignBits)\r
841 ; distance = (2 | (distance & 1));\r
842 or sym, 2\r
843 PLOAD x2, probs + 1 * PMULT\r
844 shl sym, kNumAlignBits + 1\r
845 lea sym2_R, [probs + 2 * PMULT]\r
846 \r
847 jmp direct_norm\r
848 ; lea t1, [sym_R + (1 SHL kNumAlignBits)]\r
849 ; cmp range, kTopValue\r
850 ; jb direct_norm\r
851 \r
852; ---------- DIRECT DISTANCE ----------\r
853MY_ALIGN_32\r
854direct_loop:\r
855 shr range, 1\r
856 mov t0, cod\r
857 sub cod, range\r
858 cmovs cod, t0\r
859 cmovns sym, t1\r
860 \r
861 comment ~\r
862 sub cod, range\r
863 mov x2, cod\r
864 sar x2, 31\r
865 lea sym, dword ptr [r2 + sym_R * 2 + 1]\r
866 and x2, range\r
867 add cod, x2\r
868 ~\r
869 dec x1\r
870 je direct_end\r
871\r
872 add sym, sym\r
873direct_norm:\r
874 lea t1, [sym_R + (1 SHL kNumAlignBits)]\r
875 cmp range, kTopValue\r
876 jae near ptr direct_loop\r
877 ; we align for 32 here with "near ptr" command above\r
878 NORM_2\r
879 jmp direct_loop\r
880\r
881MY_ALIGN_32\r
882direct_end:\r
883 ; prob = + kAlign;\r
884 ; distance <<= kNumAlignBits;\r
885 REV_0 x2, x1\r
886 REV_1 x1, x2, 2\r
887 REV_1 x2, x1, 4\r
888 REV_2 x1, 8\r
889\r
890decode_dist_end:\r
891\r
892 ; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))\r
893\r
894 mov t1, LOC rep0\r
895 mov x1, LOC rep1\r
896 mov x2, LOC rep2\r
897 \r
898 mov t0, LOC checkDicSize\r
899 test t0, t0\r
900 cmove t0, processedPos\r
901 cmp sym, t0\r
902 jae end_of_payload\r
903 ; jmp end_of_payload ; for debug\r
904 \r
905 ; rep3 = rep2;\r
906 ; rep2 = rep1;\r
907 ; rep1 = rep0;\r
908 ; rep0 = distance + 1;\r
909\r
910 inc sym\r
911 mov LOC rep0, sym\r
912 ; mov sym, LOC remainLen\r
913 mov sym, len_temp\r
914 mov LOC rep1, t1\r
915 mov LOC rep2, x1\r
916 mov LOC rep3, x2\r
917 \r
918 ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;\r
919 cmp state, (kNumStates + kNumLitStates) * PMULT\r
920 mov state, kNumLitStates * PMULT\r
921 mov t0, (kNumLitStates + 3) * PMULT\r
922 cmovae state, t0\r
923\r
924 \r
925; ---------- COPY MATCH ----------\r
926copy_match:\r
927\r
928 ; len += kMatchMinLen;\r
929 ; add sym, kMatchMinLen\r
930\r
931 ; if ((rem = limit - dicPos) == 0)\r
932 ; {\r
933 ; p->dicPos = dicPos;\r
934 ; return SZ_ERROR_DATA;\r
935 ; }\r
936 mov cnt_R, LOC limit\r
937 sub cnt_R, dicPos\r
938 jz fin_dicPos_LIMIT\r
939\r
940 ; curLen = ((rem < len) ? (unsigned)rem : len);\r
941 cmp cnt_R, sym_R\r
942 ; cmovae cnt_R, sym_R ; 64-bit\r
943 cmovae cnt, sym ; 32-bit\r
944\r
945 mov dic, LOC dic_Spec\r
946 mov x1, LOC rep0\r
947\r
948 mov t0_R, dicPos\r
949 add dicPos, cnt_R\r
950 ; processedPos += curLen;\r
951 add processedPos, cnt\r
952 ; len -= curLen;\r
953 sub sym, cnt\r
954 mov LOC remainLen, sym\r
955\r
956 sub t0_R, dic\r
957 \r
958 ; pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);\r
959 sub t0_R, r1\r
960 jae @f\r
961\r
962 mov r1, LOC dicBufSize\r
963 add t0_R, r1\r
964 sub r1, t0_R\r
965 cmp cnt_R, r1\r
966 ja copy_match_cross\r
967@@:\r
968 ; if (curLen <= dicBufSize - pos)\r
969\r
970; ---------- COPY MATCH FAST ----------\r
971 ; Byte *dest = dic + dicPos;\r
972 ; mov r1, dic\r
973 ; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;\r
974 ; sub t0_R, dicPos\r
975 ; dicPos += curLen;\r
976\r
977 ; const Byte *lim = dest + curLen;\r
978 add t0_R, dic\r
979 movzx sym, byte ptr[t0_R]\r
980 add t0_R, cnt_R\r
981 neg cnt_R\r
982 ; lea r1, [dicPos - 1]\r
983copy_common:\r
984 dec dicPos\r
985 ; cmp LOC rep0, 1\r
986 ; je rep0Label\r
987\r
988 ; t0_R - src_lim\r
989 ; r1 - dest_lim - 1\r
990 ; cnt_R - (-cnt)\r
991\r
992 IsMatchBranch_Pre\r
993 inc cnt_R\r
994 jz copy_end\r
995MY_ALIGN_16\r
996@@:\r
997 mov byte ptr[cnt_R * 1 + dicPos], sym_L\r
998 movzx sym, byte ptr[cnt_R * 1 + t0_R]\r
999 inc cnt_R\r
1000 jnz @b\r
1001\r
1002copy_end:\r
1003lz_end_match:\r
1004 mov byte ptr[dicPos], sym_L\r
1005 inc dicPos\r
1006 \r
1007 ; IsMatchBranch_Pre\r
1008 CheckLimits\r
1009lz_end:\r
1010 IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label\r
1011\r
1012\r
1013\r
1014; ---------- LITERAL MATCHED ----------\r
1015 \r
1016 LIT_PROBS LOC lpMask\r
1017 \r
1018 ; matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];\r
1019 mov x1, LOC rep0\r
1020 ; mov dic, LOC dic_Spec\r
1021 mov LOC dicPos_Spec, dicPos\r
1022 \r
1023 ; state -= (state < 10) ? 3 : 6;\r
1024 lea t0, [state_R - 6 * PMULT]\r
1025 sub state, 3 * PMULT\r
1026 cmp state, 7 * PMULT\r
1027 cmovae state, t0\r
1028 \r
1029 sub dicPos, dic\r
1030 sub dicPos, r1\r
1031 jae @f\r
1032 add dicPos, LOC dicBufSize\r
1033@@:\r
1034 comment ~\r
1035 xor t0, t0\r
1036 sub dicPos, r1\r
1037 cmovb t0_R, LOC dicBufSize\r
1038 ~\r
1039 \r
1040 movzx match, byte ptr[dic + dicPos * 1]\r
1041\r
1042 ifdef _LZMA_SIZE_OPT\r
1043\r
1044 mov offs, 256 * PMULT\r
1045 shl match, (PSHIFT + 1)\r
1046 mov bit, match\r
1047 mov sym, 1\r
1048MY_ALIGN_16\r
1049litm_loop:\r
1050 LITM\r
1051 cmp sym, 256\r
1052 jb litm_loop\r
1053 sub sym, 256\r
1054 \r
1055 else\r
1056 \r
1057 LITM_0\r
1058 LITM\r
1059 LITM\r
1060 LITM\r
1061 LITM\r
1062 LITM\r
1063 LITM\r
1064 LITM_2\r
1065 \r
1066 endif\r
1067 \r
1068 mov probs, LOC probs_Spec\r
1069 IsMatchBranch_Pre\r
1070 ; mov dic, LOC dic_Spec\r
1071 mov dicPos, LOC dicPos_Spec\r
1072 mov byte ptr[dicPos], sym_L\r
1073 inc dicPos\r
1074 \r
1075 CheckLimits\r
1076lit_matched_end:\r
1077 IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label\r
1078 ; IsMatchBranch\r
1079 mov lpMask_reg, LOC lpMask\r
1080 sub state, 3 * PMULT\r
1081 jmp lit_start_2\r
1082 \r
1083\r
1084\r
1085; ---------- REP 0 LITERAL ----------\r
1086MY_ALIGN_32\r
1087IsRep0Short_label:\r
1088 UPDATE_0 probs_state_R, pbPos_R, IsRep0Long\r
1089\r
1090 ; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];\r
1091 mov dic, LOC dic_Spec\r
1092 mov t0_R, dicPos\r
1093 mov probBranch, LOC rep0\r
1094 sub t0_R, dic\r
1095 \r
1096 sub probs, RepLenCoder * PMULT\r
1097 \r
1098 ; state = state < kNumLitStates ? 9 : 11;\r
1099 or state, 1 * PMULT\r
1100 \r
1101 ; the caller doesn't allow (dicPos >= limit) case for REP_SHORT\r
1102 ; so we don't need the following (dicPos == limit) check here:\r
1103 ; cmp dicPos, LOC limit\r
1104 ; jae fin_dicPos_LIMIT_REP_SHORT\r
1105\r
1106 inc processedPos\r
1107\r
1108 IsMatchBranch_Pre\r
1109 \r
1110; xor sym, sym\r
1111; sub t0_R, probBranch_R\r
1112; cmovb sym_R, LOC dicBufSize\r
1113; add t0_R, sym_R\r
1114 sub t0_R, probBranch_R\r
1115 jae @f\r
1116 add t0_R, LOC dicBufSize\r
1117@@:\r
1118 movzx sym, byte ptr[dic + t0_R * 1]\r
1119 jmp lz_end_match\r
1120 \r
1121 \r
1122MY_ALIGN_32\r
1123IsRep_label:\r
1124 UPDATE_1 probs_state_R, 0, IsRep\r
1125\r
1126 ; The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode.\r
1127 ; So we don't check it here.\r
1128 \r
1129 ; mov t0, processedPos\r
1130 ; or t0, LOC checkDicSize\r
1131 ; jz fin_ERROR_2\r
1132\r
1133 ; state = state < kNumLitStates ? 8 : 11;\r
1134 cmp state, kNumLitStates * PMULT\r
1135 mov state, 8 * PMULT\r
1136 mov probBranch, 11 * PMULT\r
1137 cmovae state, probBranch\r
1138\r
1139 ; prob = probs + RepLenCoder;\r
1140 add probs, RepLenCoder * PMULT\r
1141 \r
1142 IF_BIT_1 probs_state_R, 0, IsRepG0, IsRepG0_label\r
1143 IF_BIT_0_NOUP probs_state_R, pbPos_R, IsRep0Long, IsRep0Short_label\r
1144 UPDATE_1 probs_state_R, pbPos_R, IsRep0Long\r
1145 jmp len_decode\r
1146\r
1147MY_ALIGN_32\r
1148IsRepG0_label:\r
1149 UPDATE_1 probs_state_R, 0, IsRepG0\r
1150 mov dist2, LOC rep0\r
1151 mov dist, LOC rep1\r
1152 mov LOC rep1, dist2\r
1153 \r
1154 IF_BIT_1 probs_state_R, 0, IsRepG1, IsRepG1_label\r
1155 mov LOC rep0, dist\r
1156 jmp len_decode\r
1157 \r
1158; MY_ALIGN_32\r
1159IsRepG1_label:\r
1160 UPDATE_1 probs_state_R, 0, IsRepG1\r
1161 mov dist2, LOC rep2\r
1162 mov LOC rep2, dist\r
1163 \r
1164 IF_BIT_1 probs_state_R, 0, IsRepG2, IsRepG2_label\r
1165 mov LOC rep0, dist2\r
1166 jmp len_decode\r
1167\r
1168; MY_ALIGN_32\r
1169IsRepG2_label:\r
1170 UPDATE_1 probs_state_R, 0, IsRepG2\r
1171 mov dist, LOC rep3\r
1172 mov LOC rep3, dist2\r
1173 mov LOC rep0, dist\r
1174 jmp len_decode\r
1175\r
1176 \r
1177\r
1178; ---------- SPEC SHORT DISTANCE ----------\r
1179\r
1180MY_ALIGN_32\r
1181short_dist:\r
1182 sub x1, 32 + 1\r
1183 jbe decode_dist_end\r
1184 or sym, 2\r
1185 shl sym, x1_L\r
1186 lea sym_R, [probs + sym_R * PMULT + SpecPos * PMULT + 1 * PMULT]\r
1187 mov sym2, PMULT ; step\r
1188MY_ALIGN_32\r
1189spec_loop:\r
1190 REV_1_VAR x2\r
1191 dec x1\r
1192 jnz spec_loop\r
1193\r
1194 mov probs, LOC probs_Spec\r
1195 sub sym, sym2\r
1196 sub sym, SpecPos * PMULT\r
1197 sub sym_R, probs\r
1198 shr sym, PSHIFT\r
1199 \r
1200 jmp decode_dist_end\r
1201\r
1202\r
1203; ---------- COPY MATCH CROSS ----------\r
1204copy_match_cross:\r
1205 ; t0_R - src pos\r
1206 ; r1 - len to dicBufSize\r
1207 ; cnt_R - total copy len\r
1208\r
1209 mov t1_R, t0_R ; srcPos\r
1210 mov t0_R, dic\r
1211 mov r1, LOC dicBufSize ;\r
1212 neg cnt_R\r
1213@@:\r
1214 movzx sym, byte ptr[t1_R * 1 + t0_R]\r
1215 inc t1_R\r
1216 mov byte ptr[cnt_R * 1 + dicPos], sym_L\r
1217 inc cnt_R\r
1218 cmp t1_R, r1\r
1219 jne @b\r
1220 \r
1221 movzx sym, byte ptr[t0_R]\r
1222 sub t0_R, cnt_R\r
1223 jmp copy_common\r
1224\r
1225\r
1226\r
1227\r
1228; fin_dicPos_LIMIT_REP_SHORT:\r
1229 ; mov sym, 1\r
1230\r
1231fin_dicPos_LIMIT:\r
1232 mov LOC remainLen, sym\r
1233 jmp fin_OK\r
1234 ; For more strict mode we can stop decoding with error\r
1235 ; mov sym, 1\r
1236 ; jmp fin\r
1237\r
1238\r
1239fin_ERROR_MATCH_DIST:\r
1240\r
1241 ; rep3 = rep2;\r
1242 ; rep2 = rep1;\r
1243 ; rep1 = rep0;\r
1244 ; rep0 = distance + 1;\r
1245 \r
1246 add len_temp, kMatchSpecLen_Error_Data\r
1247 mov LOC remainLen, len_temp\r
1248\r
1249 mov LOC rep0, sym\r
1250 mov LOC rep1, t1\r
1251 mov LOC rep2, x1\r
1252 mov LOC rep3, x2\r
1253 \r
1254 ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;\r
1255 cmp state, (kNumStates + kNumLitStates) * PMULT\r
1256 mov state, kNumLitStates * PMULT\r
1257 mov t0, (kNumLitStates + 3) * PMULT\r
1258 cmovae state, t0\r
1259\r
1260 ; jmp fin_OK\r
1261 mov sym, 1\r
1262 jmp fin\r
1263\r
1264end_of_payload:\r
1265 inc sym\r
1266 jnz fin_ERROR_MATCH_DIST\r
1267\r
1268 mov LOC remainLen, kMatchSpecLenStart\r
1269 sub state, kNumStates * PMULT\r
1270\r
1271fin_OK:\r
1272 xor sym, sym\r
1273\r
1274fin:\r
1275 NORM\r
1276\r
1277 mov r1, LOC lzmaPtr\r
1278\r
1279 sub dicPos, LOC dic_Spec\r
1280 mov GLOB dicPos_Spec, dicPos\r
1281 mov GLOB buf_Spec, buf\r
1282 mov GLOB range_Spec, range\r
1283 mov GLOB code_Spec, cod\r
1284 shr state, PSHIFT\r
1285 mov GLOB state_Spec, state\r
1286 mov GLOB processedPos_Spec, processedPos\r
1287\r
1288 RESTORE_VAR(remainLen)\r
1289 RESTORE_VAR(rep0)\r
1290 RESTORE_VAR(rep1)\r
1291 RESTORE_VAR(rep2)\r
1292 RESTORE_VAR(rep3)\r
1293\r
1294 mov x0, sym\r
1295 \r
1296 mov RSP, LOC Old_RSP\r
1297\r
1298MY_POP_PRESERVED_ABI_REGS\r
1299MY_ENDP\r
1300\r
1301_TEXT$LZMADECOPT ENDS\r
1302\r
1303end\r