git subrepo pull (merge) --force deps/libchdr
[pcsx_rearmed.git] / deps / libchdr / deps / lzma-24.05 / Asm / x86 / LzmaDecOpt.asm
CommitLineData
f535537f 1; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
2; 2021-02-23: Igor Pavlov : Public domain
3;
4; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
5; function for check at link time.
6; That code is tightly coupled with LzmaDec_TryDummy()
7; and with another functions in LzmaDec.c file.
8; CLzmaDec structure, (probs) array layout, input and output of
9; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM).
10
11ifndef x64
12; x64=1
13; .err <x64_IS_REQUIRED>
14endif
15
16include 7zAsm.asm
17
18MY_ASM_START
19
20_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
21
22MY_ALIGN macro num:req
23 align num
24endm
25
26MY_ALIGN_16 macro
27 MY_ALIGN 16
28endm
29
30MY_ALIGN_32 macro
31 MY_ALIGN 32
32endm
33
34MY_ALIGN_64 macro
35 MY_ALIGN 64
36endm
37
38
39; _LZMA_SIZE_OPT equ 1
40
41; _LZMA_PROB32 equ 1
42
43ifdef _LZMA_PROB32
44 PSHIFT equ 2
45 PLOAD macro dest, mem
46 mov dest, dword ptr [mem]
47 endm
48 PSTORE macro src, mem
49 mov dword ptr [mem], src
50 endm
51else
52 PSHIFT equ 1
53 PLOAD macro dest, mem
54 movzx dest, word ptr [mem]
55 endm
56 PSTORE macro src, mem
57 mov word ptr [mem], @CatStr(src, _W)
58 endm
59endif
60
61PMULT equ (1 SHL PSHIFT)
62PMULT_HALF equ (1 SHL (PSHIFT - 1))
63PMULT_2 equ (1 SHL (PSHIFT + 1))
64
65kMatchSpecLen_Error_Data equ (1 SHL 9)
66
67; x0 range
68; x1 pbPos / (prob) TREE
69; x2 probBranch / prm (MATCHED) / pbPos / cnt
70; x3 sym
71;====== r4 === RSP
72; x5 cod
73; x6 t1 NORM_CALC / probs_state / dist
74; x7 t0 NORM_CALC / prob2 IF_BIT_1
75; x8 state
76; x9 match (MATCHED) / sym2 / dist2 / lpMask_reg
77; x10 kBitModelTotal_reg
78; r11 probs
79; x12 offs (MATCHED) / dic / len_temp
80; x13 processedPos
81; x14 bit (MATCHED) / dicPos
82; r15 buf
83
84
85cod equ x5
86cod_L equ x5_L
87range equ x0
88state equ x8
89state_R equ r8
90buf equ r15
91processedPos equ x13
92kBitModelTotal_reg equ x10
93
94probBranch equ x2
95probBranch_R equ r2
96probBranch_W equ x2_W
97
98pbPos equ x1
99pbPos_R equ r1
100
101cnt equ x2
102cnt_R equ r2
103
104lpMask_reg equ x9
105dicPos equ r14
106
107sym equ x3
108sym_R equ r3
109sym_L equ x3_L
110
111probs equ r11
112dic equ r12
113
114t0 equ x7
115t0_W equ x7_W
116t0_R equ r7
117
118prob2 equ t0
119prob2_W equ t0_W
120
121t1 equ x6
122t1_R equ r6
123
124probs_state equ t1
125probs_state_R equ t1_R
126
127prm equ r2
128match equ x9
129match_R equ r9
130offs equ x12
131offs_R equ r12
132bit equ x14
133bit_R equ r14
134
135sym2 equ x9
136sym2_R equ r9
137
138len_temp equ x12
139
140dist equ sym
141dist2 equ x9
142
143
144
145kNumBitModelTotalBits equ 11
146kBitModelTotal equ (1 SHL kNumBitModelTotalBits)
147kNumMoveBits equ 5
148kBitModelOffset equ ((1 SHL kNumMoveBits) - 1)
149kTopValue equ (1 SHL 24)
150
151NORM_2 macro
152 ; movzx t0, BYTE PTR [buf]
153 shl cod, 8
154 mov cod_L, BYTE PTR [buf]
155 shl range, 8
156 ; or cod, t0
157 inc buf
158endm
159
160
161NORM macro
162 cmp range, kTopValue
163 jae SHORT @F
164 NORM_2
165@@:
166endm
167
168
169; ---------- Branch MACROS ----------
170
171UPDATE_0 macro probsArray:req, probOffset:req, probDisp:req
172 mov prob2, kBitModelTotal_reg
173 sub prob2, probBranch
174 shr prob2, kNumMoveBits
175 add probBranch, prob2
176 PSTORE probBranch, probOffset * 1 + probsArray + probDisp * PMULT
177endm
178
179
180UPDATE_1 macro probsArray:req, probOffset:req, probDisp:req
181 sub prob2, range
182 sub cod, range
183 mov range, prob2
184 mov prob2, probBranch
185 shr probBranch, kNumMoveBits
186 sub prob2, probBranch
187 PSTORE prob2, probOffset * 1 + probsArray + probDisp * PMULT
188endm
189
190
191CMP_COD macro probsArray:req, probOffset:req, probDisp:req
192 PLOAD probBranch, probOffset * 1 + probsArray + probDisp * PMULT
193 NORM
194 mov prob2, range
195 shr range, kNumBitModelTotalBits
196 imul range, probBranch
197 cmp cod, range
198endm
199
200
201IF_BIT_1_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
202 CMP_COD probsArray, probOffset, probDisp
203 jae toLabel
204endm
205
206
207IF_BIT_1 macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
208 IF_BIT_1_NOUP probsArray, probOffset, probDisp, toLabel
209 UPDATE_0 probsArray, probOffset, probDisp
210endm
211
212
213IF_BIT_0_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
214 CMP_COD probsArray, probOffset, probDisp
215 jb toLabel
216endm
217
218
219; ---------- CMOV MACROS ----------
220
221NORM_CALC macro prob:req
222 NORM
223 mov t0, range
224 shr range, kNumBitModelTotalBits
225 imul range, prob
226 sub t0, range
227 mov t1, cod
228 sub cod, range
229endm
230
231
232PUP macro prob:req, probPtr:req
233 sub t0, prob
234 ; only sar works for both 16/32 bit prob modes
235 sar t0, kNumMoveBits
236 add t0, prob
237 PSTORE t0, probPtr
238endm
239
240
241PUP_SUB macro prob:req, probPtr:req, symSub:req
242 sbb sym, symSub
243 PUP prob, probPtr
244endm
245
246
247PUP_COD macro prob:req, probPtr:req, symSub:req
248 mov t0, kBitModelOffset
249 cmovb cod, t1
250 mov t1, sym
251 cmovb t0, kBitModelTotal_reg
252 PUP_SUB prob, probPtr, symSub
253endm
254
255
256BIT_0 macro prob:req, probNext:req
257 PLOAD prob, probs + 1 * PMULT
258 PLOAD probNext, probs + 1 * PMULT_2
259
260 NORM_CALC prob
261
262 cmovae range, t0
263 PLOAD t0, probs + 1 * PMULT_2 + PMULT
264 cmovae probNext, t0
265 mov t0, kBitModelOffset
266 cmovb cod, t1
267 cmovb t0, kBitModelTotal_reg
268 mov sym, 2
269 PUP_SUB prob, probs + 1 * PMULT, 0 - 1
270endm
271
272
273BIT_1 macro prob:req, probNext:req
274 PLOAD probNext, probs + sym_R * PMULT_2
275 add sym, sym
276
277 NORM_CALC prob
278
279 cmovae range, t0
280 PLOAD t0, probs + sym_R * PMULT + PMULT
281 cmovae probNext, t0
282 PUP_COD prob, probs + t1_R * PMULT_HALF, 0 - 1
283endm
284
285
286BIT_2 macro prob:req, symSub:req
287 add sym, sym
288
289 NORM_CALC prob
290
291 cmovae range, t0
292 PUP_COD prob, probs + t1_R * PMULT_HALF, symSub
293endm
294
295
296; ---------- MATCHED LITERAL ----------
297
298LITM_0 macro
299 mov offs, 256 * PMULT
300 shl match, (PSHIFT + 1)
301 mov bit, offs
302 and bit, match
303 PLOAD x1, probs + 256 * PMULT + bit_R * 1 + 1 * PMULT
304 lea prm, [probs + 256 * PMULT + bit_R * 1 + 1 * PMULT]
305 ; lea prm, [probs + 256 * PMULT + 1 * PMULT]
306 ; add prm, bit_R
307 xor offs, bit
308 add match, match
309
310 NORM_CALC x1
311
312 cmovae offs, bit
313 mov bit, match
314 cmovae range, t0
315 mov t0, kBitModelOffset
316 cmovb cod, t1
317 cmovb t0, kBitModelTotal_reg
318 mov sym, 0
319 PUP_SUB x1, prm, -2-1
320endm
321
322
323LITM macro
324 and bit, offs
325 lea prm, [probs + offs_R * 1]
326 add prm, bit_R
327 PLOAD x1, prm + sym_R * PMULT
328 xor offs, bit
329 add sym, sym
330 add match, match
331
332 NORM_CALC x1
333
334 cmovae offs, bit
335 mov bit, match
336 cmovae range, t0
337 PUP_COD x1, prm + t1_R * PMULT_HALF, - 1
338endm
339
340
341LITM_2 macro
342 and bit, offs
343 lea prm, [probs + offs_R * 1]
344 add prm, bit_R
345 PLOAD x1, prm + sym_R * PMULT
346 add sym, sym
347
348 NORM_CALC x1
349
350 cmovae range, t0
351 PUP_COD x1, prm + t1_R * PMULT_HALF, 256 - 1
352endm
353
354
355; ---------- REVERSE BITS ----------
356
357REV_0 macro prob:req, probNext:req
358 ; PLOAD prob, probs + 1 * PMULT
359 ; lea sym2_R, [probs + 2 * PMULT]
360 ; PLOAD probNext, probs + 2 * PMULT
361 PLOAD probNext, sym2_R
362
363 NORM_CALC prob
364
365 cmovae range, t0
366 PLOAD t0, probs + 3 * PMULT
367 cmovae probNext, t0
368 cmovb cod, t1
369 mov t0, kBitModelOffset
370 cmovb t0, kBitModelTotal_reg
371 lea t1_R, [probs + 3 * PMULT]
372 cmovae sym2_R, t1_R
373 PUP prob, probs + 1 * PMULT
374endm
375
376
377REV_1 macro prob:req, probNext:req, step:req
378 add sym2_R, step * PMULT
379 PLOAD probNext, sym2_R
380
381 NORM_CALC prob
382
383 cmovae range, t0
384 PLOAD t0, sym2_R + step * PMULT
385 cmovae probNext, t0
386 cmovb cod, t1
387 mov t0, kBitModelOffset
388 cmovb t0, kBitModelTotal_reg
389 lea t1_R, [sym2_R + step * PMULT]
390 cmovae sym2_R, t1_R
391 PUP prob, t1_R - step * PMULT_2
392endm
393
394
395REV_2 macro prob:req, step:req
396 sub sym2_R, probs
397 shr sym2, PSHIFT
398 or sym, sym2
399
400 NORM_CALC prob
401
402 cmovae range, t0
403 lea t0, [sym - step]
404 cmovb sym, t0
405 cmovb cod, t1
406 mov t0, kBitModelOffset
407 cmovb t0, kBitModelTotal_reg
408 PUP prob, probs + sym2_R * PMULT
409endm
410
411
412REV_1_VAR macro prob:req
413 PLOAD prob, sym_R
414 mov probs, sym_R
415 add sym_R, sym2_R
416
417 NORM_CALC prob
418
419 cmovae range, t0
420 lea t0_R, [sym_R + 1 * sym2_R]
421 cmovae sym_R, t0_R
422 mov t0, kBitModelOffset
423 cmovb cod, t1
424 ; mov t1, kBitModelTotal
425 ; cmovb t0, t1
426 cmovb t0, kBitModelTotal_reg
427 add sym2, sym2
428 PUP prob, probs
429endm
430
431
432
433
434LIT_PROBS macro lpMaskParam:req
435 ; prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
436 mov t0, processedPos
437 shl t0, 8
438 add sym, t0
439 and sym, lpMaskParam
440 add probs_state_R, pbPos_R
441 mov x1, LOC lc2
442 lea sym, dword ptr[sym_R + 2 * sym_R]
443 add probs, Literal * PMULT
444 shl sym, x1_L
445 add probs, sym_R
446 UPDATE_0 probs_state_R, 0, IsMatch
447 inc processedPos
448endm
449
450
451
452kNumPosBitsMax equ 4
453kNumPosStatesMax equ (1 SHL kNumPosBitsMax)
454
455kLenNumLowBits equ 3
456kLenNumLowSymbols equ (1 SHL kLenNumLowBits)
457kLenNumHighBits equ 8
458kLenNumHighSymbols equ (1 SHL kLenNumHighBits)
459kNumLenProbs equ (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols)
460
461LenLow equ 0
462LenChoice equ LenLow
463LenChoice2 equ (LenLow + kLenNumLowSymbols)
464LenHigh equ (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax)
465
466kNumStates equ 12
467kNumStates2 equ 16
468kNumLitStates equ 7
469
470kStartPosModelIndex equ 4
471kEndPosModelIndex equ 14
472kNumFullDistances equ (1 SHL (kEndPosModelIndex SHR 1))
473
474kNumPosSlotBits equ 6
475kNumLenToPosStates equ 4
476
477kNumAlignBits equ 4
478kAlignTableSize equ (1 SHL kNumAlignBits)
479
480kMatchMinLen equ 2
481kMatchSpecLenStart equ (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
482
483kStartOffset equ 1664
484SpecPos equ (-kStartOffset)
485IsRep0Long equ (SpecPos + kNumFullDistances)
486RepLenCoder equ (IsRep0Long + (kNumStates2 SHL kNumPosBitsMax))
487LenCoder equ (RepLenCoder + kNumLenProbs)
488IsMatch equ (LenCoder + kNumLenProbs)
489kAlign equ (IsMatch + (kNumStates2 SHL kNumPosBitsMax))
490IsRep equ (kAlign + kAlignTableSize)
491IsRepG0 equ (IsRep + kNumStates)
492IsRepG1 equ (IsRepG0 + kNumStates)
493IsRepG2 equ (IsRepG1 + kNumStates)
494PosSlot equ (IsRepG2 + kNumStates)
495Literal equ (PosSlot + (kNumLenToPosStates SHL kNumPosSlotBits))
496NUM_BASE_PROBS equ (Literal + kStartOffset)
497
498if kAlign ne 0
499 .err <Stop_Compiling_Bad_LZMA_kAlign>
500endif
501
502if NUM_BASE_PROBS ne 1984
503 .err <Stop_Compiling_Bad_LZMA_PROBS>
504endif
505
506
507PTR_FIELD equ dq ?
508
509CLzmaDec_Asm struct
510 lc db ?
511 lp db ?
512 pb db ?
513 _pad_ db ?
514 dicSize dd ?
515
516 probs_Spec PTR_FIELD
517 probs_1664 PTR_FIELD
518 dic_Spec PTR_FIELD
519 dicBufSize PTR_FIELD
520 dicPos_Spec PTR_FIELD
521 buf_Spec PTR_FIELD
522
523 range_Spec dd ?
524 code_Spec dd ?
525 processedPos_Spec dd ?
526 checkDicSize dd ?
527 rep0 dd ?
528 rep1 dd ?
529 rep2 dd ?
530 rep3 dd ?
531 state_Spec dd ?
532 remainLen dd ?
533CLzmaDec_Asm ends
534
535
536CLzmaDec_Asm_Loc struct
537 OLD_RSP PTR_FIELD
538 lzmaPtr PTR_FIELD
539 _pad0_ PTR_FIELD
540 _pad1_ PTR_FIELD
541 _pad2_ PTR_FIELD
542 dicBufSize PTR_FIELD
543 probs_Spec PTR_FIELD
544 dic_Spec PTR_FIELD
545
546 limit PTR_FIELD
547 bufLimit PTR_FIELD
548 lc2 dd ?
549 lpMask dd ?
550 pbMask dd ?
551 checkDicSize dd ?
552
553 _pad_ dd ?
554 remainLen dd ?
555 dicPos_Spec PTR_FIELD
556 rep0 dd ?
557 rep1 dd ?
558 rep2 dd ?
559 rep3 dd ?
560CLzmaDec_Asm_Loc ends
561
562
563GLOB_2 equ [sym_R].CLzmaDec_Asm.
564GLOB equ [r1].CLzmaDec_Asm.
565LOC_0 equ [r0].CLzmaDec_Asm_Loc.
566LOC equ [RSP].CLzmaDec_Asm_Loc.
567
568
569COPY_VAR macro name
570 mov t0, GLOB_2 name
571 mov LOC_0 name, t0
572endm
573
574
575RESTORE_VAR macro name
576 mov t0, LOC name
577 mov GLOB name, t0
578endm
579
580
581
582IsMatchBranch_Pre macro reg
583 ; prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
584 mov pbPos, LOC pbMask
585 and pbPos, processedPos
586 shl pbPos, (kLenNumLowBits + 1 + PSHIFT)
587 lea probs_state_R, [probs + 1 * state_R]
588endm
589
590
591IsMatchBranch macro reg
592 IsMatchBranch_Pre
593 IF_BIT_1 probs_state_R, pbPos_R, IsMatch, IsMatch_label
594endm
595
596
597CheckLimits macro reg
598 cmp buf, LOC bufLimit
599 jae fin_OK
600 cmp dicPos, LOC limit
601 jae fin_OK
602endm
603
604
605
606; RSP is (16x + 8) bytes aligned in WIN64-x64
607; LocalSize equ ((((SIZEOF CLzmaDec_Asm_Loc) + 7) / 16 * 16) + 8)
608
609PARAM_lzma equ REG_ABI_PARAM_0
610PARAM_limit equ REG_ABI_PARAM_1
611PARAM_bufLimit equ REG_ABI_PARAM_2
612
613; MY_ALIGN_64
614MY_PROC LzmaDec_DecodeReal_3, 3
615MY_PUSH_PRESERVED_ABI_REGS
616
617 lea r0, [RSP - (SIZEOF CLzmaDec_Asm_Loc)]
618 and r0, -128
619 mov r5, RSP
620 mov RSP, r0
621 mov LOC_0 Old_RSP, r5
622 mov LOC_0 lzmaPtr, PARAM_lzma
623
624 mov LOC_0 remainLen, 0 ; remainLen must be ZERO
625
626 mov LOC_0 bufLimit, PARAM_bufLimit
627 mov sym_R, PARAM_lzma ; CLzmaDec_Asm_Loc pointer for GLOB_2
628 mov dic, GLOB_2 dic_Spec
629 add PARAM_limit, dic
630 mov LOC_0 limit, PARAM_limit
631
632 COPY_VAR(rep0)
633 COPY_VAR(rep1)
634 COPY_VAR(rep2)
635 COPY_VAR(rep3)
636
637 mov dicPos, GLOB_2 dicPos_Spec
638 add dicPos, dic
639 mov LOC_0 dicPos_Spec, dicPos
640 mov LOC_0 dic_Spec, dic
641
642 mov x1_L, GLOB_2 pb
643 mov t0, 1
644 shl t0, x1_L
645 dec t0
646 mov LOC_0 pbMask, t0
647
648 ; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
649 ; unsigned lc = p->prop.lc;
650 ; unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
651
652 mov x1_L, GLOB_2 lc
653 mov x2, 100h
654 mov t0, x2
655 shr x2, x1_L
656 ; inc x1
657 add x1_L, PSHIFT
658 mov LOC_0 lc2, x1
659 mov x1_L, GLOB_2 lp
660 shl t0, x1_L
661 sub t0, x2
662 mov LOC_0 lpMask, t0
663 mov lpMask_reg, t0
664
665 ; mov probs, GLOB_2 probs_Spec
666 ; add probs, kStartOffset SHL PSHIFT
667 mov probs, GLOB_2 probs_1664
668 mov LOC_0 probs_Spec, probs
669
670 mov t0_R, GLOB_2 dicBufSize
671 mov LOC_0 dicBufSize, t0_R
672
673 mov x1, GLOB_2 checkDicSize
674 mov LOC_0 checkDicSize, x1
675
676 mov processedPos, GLOB_2 processedPos_Spec
677
678 mov state, GLOB_2 state_Spec
679 shl state, PSHIFT
680
681 mov buf, GLOB_2 buf_Spec
682 mov range, GLOB_2 range_Spec
683 mov cod, GLOB_2 code_Spec
684 mov kBitModelTotal_reg, kBitModelTotal
685 xor sym, sym
686
687 ; if (processedPos != 0 || checkDicSize != 0)
688 or x1, processedPos
689 jz @f
690
691 add t0_R, dic
692 cmp dicPos, dic
693 cmovnz t0_R, dicPos
694 movzx sym, byte ptr[t0_R - 1]
695
696@@:
697 IsMatchBranch_Pre
698 cmp state, 4 * PMULT
699 jb lit_end
700 cmp state, kNumLitStates * PMULT
701 jb lit_matched_end
702 jmp lz_end
703
704
705
706
707; ---------- LITERAL ----------
708MY_ALIGN_64
709lit_start:
710 xor state, state
711lit_start_2:
712 LIT_PROBS lpMask_reg
713
714 ifdef _LZMA_SIZE_OPT
715
716 PLOAD x1, probs + 1 * PMULT
717 mov sym, 1
718MY_ALIGN_16
719lit_loop:
720 BIT_1 x1, x2
721 mov x1, x2
722 cmp sym, 127
723 jbe lit_loop
724
725 else
726
727 BIT_0 x1, x2
728 BIT_1 x2, x1
729 BIT_1 x1, x2
730 BIT_1 x2, x1
731 BIT_1 x1, x2
732 BIT_1 x2, x1
733 BIT_1 x1, x2
734
735 endif
736
737 BIT_2 x2, 256 - 1
738
739 ; mov dic, LOC dic_Spec
740 mov probs, LOC probs_Spec
741 IsMatchBranch_Pre
742 mov byte ptr[dicPos], sym_L
743 inc dicPos
744
745 CheckLimits
746lit_end:
747 IF_BIT_0_NOUP probs_state_R, pbPos_R, IsMatch, lit_start
748
749 ; jmp IsMatch_label
750
751; ---------- MATCHES ----------
752; MY_ALIGN_32
753IsMatch_label:
754 UPDATE_1 probs_state_R, pbPos_R, IsMatch
755 IF_BIT_1 probs_state_R, 0, IsRep, IsRep_label
756
757 add probs, LenCoder * PMULT
758 add state, kNumStates * PMULT
759
760; ---------- LEN DECODE ----------
761len_decode:
762 mov len_temp, 8 - 1 - kMatchMinLen
763 IF_BIT_0_NOUP probs, 0, 0, len_mid_0
764 UPDATE_1 probs, 0, 0
765 add probs, (1 SHL (kLenNumLowBits + PSHIFT))
766 mov len_temp, -1 - kMatchMinLen
767 IF_BIT_0_NOUP probs, 0, 0, len_mid_0
768 UPDATE_1 probs, 0, 0
769 add probs, LenHigh * PMULT - (1 SHL (kLenNumLowBits + PSHIFT))
770 mov sym, 1
771 PLOAD x1, probs + 1 * PMULT
772
773MY_ALIGN_32
774len8_loop:
775 BIT_1 x1, x2
776 mov x1, x2
777 cmp sym, 64
778 jb len8_loop
779
780 mov len_temp, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - 1 - kMatchMinLen
781 jmp short len_mid_2 ; we use short here for MASM that doesn't optimize that code as another assembler programs
782
783MY_ALIGN_32
784len_mid_0:
785 UPDATE_0 probs, 0, 0
786 add probs, pbPos_R
787 BIT_0 x2, x1
788len_mid_2:
789 BIT_1 x1, x2
790 BIT_2 x2, len_temp
791 mov probs, LOC probs_Spec
792 cmp state, kNumStates * PMULT
793 jb copy_match
794
795
796; ---------- DECODE DISTANCE ----------
797 ; probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
798
799 mov t0, 3 + kMatchMinLen
800 cmp sym, 3 + kMatchMinLen
801 cmovb t0, sym
802 add probs, PosSlot * PMULT - (kMatchMinLen SHL (kNumPosSlotBits + PSHIFT))
803 shl t0, (kNumPosSlotBits + PSHIFT)
804 add probs, t0_R
805
806 ; sym = Len
807 ; mov LOC remainLen, sym
808 mov len_temp, sym
809
810 ifdef _LZMA_SIZE_OPT
811
812 PLOAD x1, probs + 1 * PMULT
813 mov sym, 1
814MY_ALIGN_16
815slot_loop:
816 BIT_1 x1, x2
817 mov x1, x2
818 cmp sym, 32
819 jb slot_loop
820
821 else
822
823 BIT_0 x1, x2
824 BIT_1 x2, x1
825 BIT_1 x1, x2
826 BIT_1 x2, x1
827 BIT_1 x1, x2
828
829 endif
830
831 mov x1, sym
832 BIT_2 x2, 64-1
833
834 and sym, 3
835 mov probs, LOC probs_Spec
836 cmp x1, 32 + kEndPosModelIndex / 2
837 jb short_dist
838
839 ; unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
840 sub x1, (32 + 1 + kNumAlignBits)
841 ; distance = (2 | (distance & 1));
842 or sym, 2
843 PLOAD x2, probs + 1 * PMULT
844 shl sym, kNumAlignBits + 1
845 lea sym2_R, [probs + 2 * PMULT]
846
847 jmp direct_norm
848 ; lea t1, [sym_R + (1 SHL kNumAlignBits)]
849 ; cmp range, kTopValue
850 ; jb direct_norm
851
852; ---------- DIRECT DISTANCE ----------
853MY_ALIGN_32
854direct_loop:
855 shr range, 1
856 mov t0, cod
857 sub cod, range
858 cmovs cod, t0
859 cmovns sym, t1
860
861 comment ~
862 sub cod, range
863 mov x2, cod
864 sar x2, 31
865 lea sym, dword ptr [r2 + sym_R * 2 + 1]
866 and x2, range
867 add cod, x2
868 ~
869 dec x1
870 je direct_end
871
872 add sym, sym
873direct_norm:
874 lea t1, [sym_R + (1 SHL kNumAlignBits)]
875 cmp range, kTopValue
876 jae near ptr direct_loop
877 ; we align for 32 here with "near ptr" command above
878 NORM_2
879 jmp direct_loop
880
881MY_ALIGN_32
882direct_end:
883 ; prob = + kAlign;
884 ; distance <<= kNumAlignBits;
885 REV_0 x2, x1
886 REV_1 x1, x2, 2
887 REV_1 x2, x1, 4
888 REV_2 x1, 8
889
890decode_dist_end:
891
892 ; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
893
894 mov t1, LOC rep0
895 mov x1, LOC rep1
896 mov x2, LOC rep2
897
898 mov t0, LOC checkDicSize
899 test t0, t0
900 cmove t0, processedPos
901 cmp sym, t0
902 jae end_of_payload
903 ; jmp end_of_payload ; for debug
904
905 ; rep3 = rep2;
906 ; rep2 = rep1;
907 ; rep1 = rep0;
908 ; rep0 = distance + 1;
909
910 inc sym
911 mov LOC rep0, sym
912 ; mov sym, LOC remainLen
913 mov sym, len_temp
914 mov LOC rep1, t1
915 mov LOC rep2, x1
916 mov LOC rep3, x2
917
918 ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
919 cmp state, (kNumStates + kNumLitStates) * PMULT
920 mov state, kNumLitStates * PMULT
921 mov t0, (kNumLitStates + 3) * PMULT
922 cmovae state, t0
923
924
925; ---------- COPY MATCH ----------
926copy_match:
927
928 ; len += kMatchMinLen;
929 ; add sym, kMatchMinLen
930
931 ; if ((rem = limit - dicPos) == 0)
932 ; {
933 ; p->dicPos = dicPos;
934 ; return SZ_ERROR_DATA;
935 ; }
936 mov cnt_R, LOC limit
937 sub cnt_R, dicPos
938 jz fin_dicPos_LIMIT
939
940 ; curLen = ((rem < len) ? (unsigned)rem : len);
941 cmp cnt_R, sym_R
942 ; cmovae cnt_R, sym_R ; 64-bit
943 cmovae cnt, sym ; 32-bit
944
945 mov dic, LOC dic_Spec
946 mov x1, LOC rep0
947
948 mov t0_R, dicPos
949 add dicPos, cnt_R
950 ; processedPos += curLen;
951 add processedPos, cnt
952 ; len -= curLen;
953 sub sym, cnt
954 mov LOC remainLen, sym
955
956 sub t0_R, dic
957
958 ; pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
959 sub t0_R, r1
960 jae @f
961
962 mov r1, LOC dicBufSize
963 add t0_R, r1
964 sub r1, t0_R
965 cmp cnt_R, r1
966 ja copy_match_cross
967@@:
968 ; if (curLen <= dicBufSize - pos)
969
970; ---------- COPY MATCH FAST ----------
971 ; Byte *dest = dic + dicPos;
972 ; mov r1, dic
973 ; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
974 ; sub t0_R, dicPos
975 ; dicPos += curLen;
976
977 ; const Byte *lim = dest + curLen;
978 add t0_R, dic
979 movzx sym, byte ptr[t0_R]
980 add t0_R, cnt_R
981 neg cnt_R
982 ; lea r1, [dicPos - 1]
983copy_common:
984 dec dicPos
985 ; cmp LOC rep0, 1
986 ; je rep0Label
987
988 ; t0_R - src_lim
989 ; r1 - dest_lim - 1
990 ; cnt_R - (-cnt)
991
992 IsMatchBranch_Pre
993 inc cnt_R
994 jz copy_end
995MY_ALIGN_16
996@@:
997 mov byte ptr[cnt_R * 1 + dicPos], sym_L
998 movzx sym, byte ptr[cnt_R * 1 + t0_R]
999 inc cnt_R
1000 jnz @b
1001
1002copy_end:
1003lz_end_match:
1004 mov byte ptr[dicPos], sym_L
1005 inc dicPos
1006
1007 ; IsMatchBranch_Pre
1008 CheckLimits
1009lz_end:
1010 IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label
1011
1012
1013
1014; ---------- LITERAL MATCHED ----------
1015
1016 LIT_PROBS LOC lpMask
1017
1018 ; matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
1019 mov x1, LOC rep0
1020 ; mov dic, LOC dic_Spec
1021 mov LOC dicPos_Spec, dicPos
1022
1023 ; state -= (state < 10) ? 3 : 6;
1024 lea t0, [state_R - 6 * PMULT]
1025 sub state, 3 * PMULT
1026 cmp state, 7 * PMULT
1027 cmovae state, t0
1028
1029 sub dicPos, dic
1030 sub dicPos, r1
1031 jae @f
1032 add dicPos, LOC dicBufSize
1033@@:
1034 comment ~
1035 xor t0, t0
1036 sub dicPos, r1
1037 cmovb t0_R, LOC dicBufSize
1038 ~
1039
1040 movzx match, byte ptr[dic + dicPos * 1]
1041
1042 ifdef _LZMA_SIZE_OPT
1043
1044 mov offs, 256 * PMULT
1045 shl match, (PSHIFT + 1)
1046 mov bit, match
1047 mov sym, 1
1048MY_ALIGN_16
1049litm_loop:
1050 LITM
1051 cmp sym, 256
1052 jb litm_loop
1053 sub sym, 256
1054
1055 else
1056
1057 LITM_0
1058 LITM
1059 LITM
1060 LITM
1061 LITM
1062 LITM
1063 LITM
1064 LITM_2
1065
1066 endif
1067
1068 mov probs, LOC probs_Spec
1069 IsMatchBranch_Pre
1070 ; mov dic, LOC dic_Spec
1071 mov dicPos, LOC dicPos_Spec
1072 mov byte ptr[dicPos], sym_L
1073 inc dicPos
1074
1075 CheckLimits
1076lit_matched_end:
1077 IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label
1078 ; IsMatchBranch
1079 mov lpMask_reg, LOC lpMask
1080 sub state, 3 * PMULT
1081 jmp lit_start_2
1082
1083
1084
1085; ---------- REP 0 LITERAL ----------
1086MY_ALIGN_32
1087IsRep0Short_label:
1088 UPDATE_0 probs_state_R, pbPos_R, IsRep0Long
1089
1090 ; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
1091 mov dic, LOC dic_Spec
1092 mov t0_R, dicPos
1093 mov probBranch, LOC rep0
1094 sub t0_R, dic
1095
1096 sub probs, RepLenCoder * PMULT
1097
1098 ; state = state < kNumLitStates ? 9 : 11;
1099 or state, 1 * PMULT
1100
1101 ; the caller doesn't allow (dicPos >= limit) case for REP_SHORT
1102 ; so we don't need the following (dicPos == limit) check here:
1103 ; cmp dicPos, LOC limit
1104 ; jae fin_dicPos_LIMIT_REP_SHORT
1105
1106 inc processedPos
1107
1108 IsMatchBranch_Pre
1109
1110; xor sym, sym
1111; sub t0_R, probBranch_R
1112; cmovb sym_R, LOC dicBufSize
1113; add t0_R, sym_R
1114 sub t0_R, probBranch_R
1115 jae @f
1116 add t0_R, LOC dicBufSize
1117@@:
1118 movzx sym, byte ptr[dic + t0_R * 1]
1119 jmp lz_end_match
1120
1121
1122MY_ALIGN_32
1123IsRep_label:
1124 UPDATE_1 probs_state_R, 0, IsRep
1125
1126 ; The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode.
1127 ; So we don't check it here.
1128
1129 ; mov t0, processedPos
1130 ; or t0, LOC checkDicSize
1131 ; jz fin_ERROR_2
1132
1133 ; state = state < kNumLitStates ? 8 : 11;
1134 cmp state, kNumLitStates * PMULT
1135 mov state, 8 * PMULT
1136 mov probBranch, 11 * PMULT
1137 cmovae state, probBranch
1138
1139 ; prob = probs + RepLenCoder;
1140 add probs, RepLenCoder * PMULT
1141
1142 IF_BIT_1 probs_state_R, 0, IsRepG0, IsRepG0_label
1143 IF_BIT_0_NOUP probs_state_R, pbPos_R, IsRep0Long, IsRep0Short_label
1144 UPDATE_1 probs_state_R, pbPos_R, IsRep0Long
1145 jmp len_decode
1146
1147MY_ALIGN_32
1148IsRepG0_label:
1149 UPDATE_1 probs_state_R, 0, IsRepG0
1150 mov dist2, LOC rep0
1151 mov dist, LOC rep1
1152 mov LOC rep1, dist2
1153
1154 IF_BIT_1 probs_state_R, 0, IsRepG1, IsRepG1_label
1155 mov LOC rep0, dist
1156 jmp len_decode
1157
1158; MY_ALIGN_32
1159IsRepG1_label:
1160 UPDATE_1 probs_state_R, 0, IsRepG1
1161 mov dist2, LOC rep2
1162 mov LOC rep2, dist
1163
1164 IF_BIT_1 probs_state_R, 0, IsRepG2, IsRepG2_label
1165 mov LOC rep0, dist2
1166 jmp len_decode
1167
1168; MY_ALIGN_32
1169IsRepG2_label:
1170 UPDATE_1 probs_state_R, 0, IsRepG2
1171 mov dist, LOC rep3
1172 mov LOC rep3, dist2
1173 mov LOC rep0, dist
1174 jmp len_decode
1175
1176
1177
1178; ---------- SPEC SHORT DISTANCE ----------
1179
1180MY_ALIGN_32
1181short_dist:
1182 sub x1, 32 + 1
1183 jbe decode_dist_end
1184 or sym, 2
1185 shl sym, x1_L
1186 lea sym_R, [probs + sym_R * PMULT + SpecPos * PMULT + 1 * PMULT]
1187 mov sym2, PMULT ; step
1188MY_ALIGN_32
1189spec_loop:
1190 REV_1_VAR x2
1191 dec x1
1192 jnz spec_loop
1193
1194 mov probs, LOC probs_Spec
1195 sub sym, sym2
1196 sub sym, SpecPos * PMULT
1197 sub sym_R, probs
1198 shr sym, PSHIFT
1199
1200 jmp decode_dist_end
1201
1202
1203; ---------- COPY MATCH CROSS ----------
1204copy_match_cross:
1205 ; t0_R - src pos
1206 ; r1 - len to dicBufSize
1207 ; cnt_R - total copy len
1208
1209 mov t1_R, t0_R ; srcPos
1210 mov t0_R, dic
1211 mov r1, LOC dicBufSize ;
1212 neg cnt_R
1213@@:
1214 movzx sym, byte ptr[t1_R * 1 + t0_R]
1215 inc t1_R
1216 mov byte ptr[cnt_R * 1 + dicPos], sym_L
1217 inc cnt_R
1218 cmp t1_R, r1
1219 jne @b
1220
1221 movzx sym, byte ptr[t0_R]
1222 sub t0_R, cnt_R
1223 jmp copy_common
1224
1225
1226
1227
1228; fin_dicPos_LIMIT_REP_SHORT:
1229 ; mov sym, 1
1230
1231fin_dicPos_LIMIT:
1232 mov LOC remainLen, sym
1233 jmp fin_OK
1234 ; For more strict mode we can stop decoding with error
1235 ; mov sym, 1
1236 ; jmp fin
1237
1238
1239fin_ERROR_MATCH_DIST:
1240
1241 ; rep3 = rep2;
1242 ; rep2 = rep1;
1243 ; rep1 = rep0;
1244 ; rep0 = distance + 1;
1245
1246 add len_temp, kMatchSpecLen_Error_Data
1247 mov LOC remainLen, len_temp
1248
1249 mov LOC rep0, sym
1250 mov LOC rep1, t1
1251 mov LOC rep2, x1
1252 mov LOC rep3, x2
1253
1254 ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
1255 cmp state, (kNumStates + kNumLitStates) * PMULT
1256 mov state, kNumLitStates * PMULT
1257 mov t0, (kNumLitStates + 3) * PMULT
1258 cmovae state, t0
1259
1260 ; jmp fin_OK
1261 mov sym, 1
1262 jmp fin
1263
1264end_of_payload:
1265 inc sym
1266 jnz fin_ERROR_MATCH_DIST
1267
1268 mov LOC remainLen, kMatchSpecLenStart
1269 sub state, kNumStates * PMULT
1270
1271fin_OK:
1272 xor sym, sym
1273
1274fin:
1275 NORM
1276
1277 mov r1, LOC lzmaPtr
1278
1279 sub dicPos, LOC dic_Spec
1280 mov GLOB dicPos_Spec, dicPos
1281 mov GLOB buf_Spec, buf
1282 mov GLOB range_Spec, range
1283 mov GLOB code_Spec, cod
1284 shr state, PSHIFT
1285 mov GLOB state_Spec, state
1286 mov GLOB processedPos_Spec, processedPos
1287
1288 RESTORE_VAR(remainLen)
1289 RESTORE_VAR(rep0)
1290 RESTORE_VAR(rep1)
1291 RESTORE_VAR(rep2)
1292 RESTORE_VAR(rep3)
1293
1294 mov x0, sym
1295
1296 mov RSP, LOC Old_RSP
1297
1298MY_POP_PRESERVED_ABI_REGS
1299MY_ENDP
1300
1301_TEXT$LZMADECOPT ENDS
1302
1303end