1 // LzmaDecOpt.S -- ARM64-ASM version of LzmaDec_DecodeReal_3() function
2 // 2021-04-25 : Igor Pavlov : Public domain
5 ; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
6 ; function for check at link time.
7 ; That code is tightly coupled with LzmaDec_TryDummy()
8 ; and with another functions in LzmaDec.c file.
9 ; CLzmaDec structure, (probs) array layout, input and output of
10 ; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM).
17 // .file "LzmaDecOpt.c"
22 .globl _LzmaDec_DecodeReal_3
24 .global LzmaDec_DecodeReal_3
26 // .type LzmaDec_DecodeReal_3, %function
28 // #define _LZMA_SIZE_OPT 1
30 #define LZMA_USE_4BYTES_FILL 1
31 // #define LZMA_USE_2BYTES_COPY 1
32 // #define LZMA_USE_CMOV_LZ_WRAP 1
33 // #define _LZMA_PROB32 1
35 #define MY_ALIGN_FOR_ENTRY MY_ALIGN_32
36 #define MY_ALIGN_FOR_LOOP MY_ALIGN_32
37 #define MY_ALIGN_FOR_LOOP_16 MY_ALIGN_16
41 .macro PLOAD dest:req, mem:req
44 .macro PLOAD_PREINDEXED dest:req, mem:req, offset:req
45 ldr \dest, [\mem, \offset]!
47 .macro PLOAD_2 dest:req, mem1:req, mem2:req
48 ldr \dest, [\mem1, \mem2]
50 .macro PLOAD_LSL dest:req, mem1:req, mem2:req
51 ldr \dest, [\mem1, \mem2, lsl #PSHIFT]
53 .macro PSTORE src:req, mem:req
56 .macro PSTORE_2 src:req, mem1:req, mem2:req
57 str \src, [\mem1, \mem2]
59 .macro PSTORE_LSL src:req, mem1:req, mem2:req
60 str \src, [\mem1, \mem2, lsl #PSHIFT]
62 .macro PSTORE_LSL_M1 src:req, mem1:req, mem2:req, temp_reg:req
63 // you must check that temp_reg is free register when macro is used
64 add \temp_reg, \mem1, \mem2
65 str \src, [\temp_reg, \mem2]
70 .macro PLOAD dest:req, mem:req
73 .macro PLOAD_PREINDEXED dest:req, mem:req, offset:req
74 ldrh \dest, [\mem, \offset]!
76 .macro PLOAD_2 dest:req, mem1:req, mem2:req
77 ldrh \dest, [\mem1, \mem2]
79 .macro PLOAD_LSL dest:req, mem1:req, mem2:req
80 ldrh \dest, [\mem1, \mem2, lsl #PSHIFT]
82 .macro PSTORE src:req, mem:req
85 .macro PSTORE_2 src:req, mem1:req, mem2:req
86 strh \src, [\mem1, \mem2]
88 .macro PSTORE_LSL src:req, mem1:req, mem2:req
89 strh \src, [\mem1, \mem2, lsl #PSHIFT]
91 .macro PSTORE_LSL_M1 src:req, mem1:req, mem2:req, temp_reg:req
92 strh \src, [\mem1, \mem2]
96 .equ PMULT , (1 << PSHIFT)
97 .equ PMULT_2 , (2 << PSHIFT)
99 .equ kMatchSpecLen_Error_Data , (1 << 9)
101 # x7 t0 : NORM_CALC : prob2 (IF_BIT_1)
102 # x6 t1 : NORM_CALC : probs_state
103 # x8 t2 : (LITM) temp : (TREE) temp
104 # x4 t3 : (LITM) bit : (TREE) temp : UPDATE_0/UPDATE_0 temp
105 # x10 t4 : (LITM) offs : (TREE) probs_PMULT : numBits
106 # x9 t5 : (LITM) match : sym2 (ShortDist)
107 # x1 t6 : (LITM) litm_prob : (TREE) prob_reg : pbPos
108 # x2 t7 : (LITM) prm : probBranch : cnt
121 #define litm_prob prob_reg
124 #define probBranch w2
136 #define update_temp_reg r4
142 #define probs_state t1_R
173 #define dicBufSize r17
182 #define probs_IsMatch r25
183 #define probs_Spec r26
184 #define checkDicSize w27
185 #define processedPos w28
187 #define lc2_lpMask w30
190 .equ kNumBitModelTotalBits , 11
191 .equ kBitModelTotal , (1 << kNumBitModelTotalBits)
192 .equ kNumMoveBits , 5
193 .equ kBitModelOffset , (kBitModelTotal - (1 << kNumMoveBits) + 1)
198 orr cod, t0, cod, lsl 8
207 .macro TEST_HIGH_BYTE_range macro
208 tst range, 0xFF000000
219 # ---------- Branch MACROS ----------
222 sub prob2, probBranch, kBitModelOffset
226 sub probBranch, probBranch, prob2, asr #(kNumMoveBits)
229 .macro UPDATE_0__2 probsArray:req, probOffset:req, probDisp:req
231 PSTORE_2 probBranch, \probsArray, \probOffset
232 .elseif \probOffset == 0
233 PSTORE_2 probBranch, \probsArray, \probDisp * PMULT
236 // add update_temp_reg, \probsArray, \probOffset
237 PSTORE_2 probBranch, update_temp_reg, \probDisp * PMULT
241 .macro UPDATE_0 probsArray:req, probOffset:req, probDisp:req
244 UPDATE_0__2 \probsArray, \probOffset, \probDisp
248 .macro UPDATE_1 probsArray:req, probOffset:req, probDisp:req
249 // sub cod, cod, prob2
250 // sub range, range, prob2
252 sub range, prob2, range
253 sub prob2, probBranch, probBranch, lsr #(kNumMoveBits)
255 PSTORE_2 prob2, \probsArray, \probOffset
256 .elseif \probOffset == 0
257 PSTORE_2 prob2, \probsArray, \probDisp * PMULT
260 // add update_temp_reg, \probsArray, \probOffset
261 PSTORE_2 prob2, update_temp_reg, \probDisp * PMULT
268 // lsr prob2, range, kNumBitModelTotalBits
269 // imul prob2, probBranch
272 shr range, kNumBitModelTotalBits
273 imul range, probBranch
277 .macro CMP_COD_1 probsArray:req
278 PLOAD probBranch, \probsArray
282 .macro CMP_COD_3 probsArray:req, probOffset:req, probDisp:req
284 PLOAD_2 probBranch, \probsArray, \probOffset
285 .elseif \probOffset == 0
286 PLOAD_2 probBranch, \probsArray, \probDisp * PMULT
289 add update_temp_reg, \probsArray, \probOffset
290 PLOAD_2 probBranch, update_temp_reg, \probDisp * PMULT
296 .macro IF_BIT_1_NOUP probsArray:req, probOffset:req, probDisp:req, toLabel:req
297 CMP_COD_3 \probsArray, \probOffset, \probDisp
302 .macro IF_BIT_1 probsArray:req, probOffset:req, probDisp:req, toLabel:req
303 IF_BIT_1_NOUP \probsArray, \probOffset, \probDisp, \toLabel
304 UPDATE_0 \probsArray, \probOffset, \probDisp
308 .macro IF_BIT_0_NOUP probsArray:req, probOffset:req, probDisp:req, toLabel:req
309 CMP_COD_3 \probsArray, \probOffset, \probDisp
313 .macro IF_BIT_0_NOUP_1 probsArray:req, toLabel:req
314 CMP_COD_1 \probsArray
319 # ---------- CMOV MACROS ----------
323 lsr t0, range, #kNumBitModelTotalBits
331 .macro RANGE_IMUL prob:req
335 .macro NORM_CALC prob:req
349 .macro CMOV_code_Model_Pre prob:req
350 sub t0, \prob, kBitModelOffset
356 .macro PUP_BASE_2 prob:req, dest_reg:req
357 # only sar works for both 16/32 bit prob modes
358 sub \dest_reg, \prob, \dest_reg, asr #(kNumMoveBits)
361 .macro PUP prob:req, probPtr:req, mem2:req
363 PSTORE_2 t0, \probPtr, \mem2
368 #define probs_PMULT t4_R
371 add probs_PMULT, probs, PMULT
375 .macro BIT_0_R prob:req
376 PLOAD_2 \prob, probs, 1 * PMULT
378 sub t3, \prob, kBitModelOffset
380 PLOAD_2 t2, probs, 1 * PMULT_2
384 PLOAD_2 t0, probs, 1 * PMULT_2 + PMULT
386 csel \prob, t2, t0, lo
389 PSTORE_2 t3, probs, 1 * PMULT
394 .macro BIT_1_R prob:req
397 sub t3, \prob, kBitModelOffset
399 PLOAD_LSL t2, probs, sym_R
403 PLOAD_LSL t0, probs_PMULT, sym_R
405 csel \prob, t2, t0, lo
407 PSTORE_LSL_M1 t3, probs, sym_R, t2_R
412 .macro BIT_2_R prob:req
415 sub t3, \prob, kBitModelOffset
422 PSTORE_LSL_M1 t3, probs, sym_R, t2_R
427 # ---------- MATCHED LITERAL ----------
430 shl match, (PSHIFT + 1)
431 and bit, match, 256 * PMULT
432 add prm, probs, 256 * PMULT + 1 * PMULT
435 eor offs, bit, 256 * PMULT
439 sub t2, litm_prob, kBitModelOffset
448 PUP_BASE_2 litm_prob, t2
450 add prm, probs, offs_R
457 PLOAD_LSL litm_prob, prm, sym_R
461 sub t2, litm_prob, kBitModelOffset
469 PUP_BASE_2 litm_prob, t2
470 PSTORE_LSL t2, prm, sym_R
471 add prm, probs, offs_R
478 PLOAD_LSL litm_prob, prm, sym_R
481 sub t2, litm_prob, kBitModelOffset
487 PUP_BASE_2 litm_prob, t2
488 PSTORE_LSL t2, prm, sym_R
493 # ---------- REVERSE BITS ----------
495 .macro REV_0 prob:req
499 PLOAD_2 t3, probs, 3 * PMULT
500 CMOV_code_Model_Pre \prob
501 add t1_R, probs, 3 * PMULT
503 PUP \prob, probs, 1 * PMULT
504 csel \prob, t2, t3, lo
508 .macro REV_1 prob:req, step:req
510 PLOAD_PREINDEXED t2, sym2_R, (\step * PMULT)
514 PLOAD_2 t3, sym2_R, (\step * PMULT)
515 sub t0, \prob, kBitModelOffset
517 add t1_R, sym2_R, \step * PMULT
521 csel \prob, t2, t3, lo
522 PSTORE_2 t0, t1_R, 0 - \step * PMULT_2
526 .macro REV_2 prob:req, step:req
527 sub t1_R, sym2_R, probs
529 orr sym, sym, t1, lsr #PSHIFT
535 CMOV_code_Model_Pre \prob
540 .macro REV_1_VAR prob:req
545 add t2_R, sym_R, sym2_R
550 CMOV_code_Model_Pre \prob
556 .macro add_big dest:req, src:req, param:req
557 .if (\param) < (1 << 12)
558 add \dest, \src, \param
561 .error "unexpcted add_big expansion"
563 add \dest, \src, (\param) / 2
564 add \dest, \dest, (\param) - (\param) / 2
568 .macro sub_big dest:req, src:req, param:req
569 .if (\param) < (1 << 12)
570 sub \dest, \src, \param
573 .error "unexpcted sub_big expansion"
575 sub \dest, \src, (\param) / 2
576 sub \dest, \dest, (\param) - (\param) / 2
581 .macro SET_probs offset:req
582 // add_big probs, probs_Spec, (\offset) * PMULT
583 add probs, probs_IsMatch, ((\offset) - IsMatch) * PMULT
588 add sym, sym, processedPos, lsl 8
593 p2_and sym, lc2_lpMask
594 // p2_add probs_state, pbPos_R
597 add probs, probs, sym_R, lsl 1
598 UPDATE_0__2 probs_state, pbPos_R, 0
603 .equ kNumPosBitsMax , 4
604 .equ kNumPosStatesMax , (1 << kNumPosBitsMax)
606 .equ kLenNumLowBits , 3
607 .equ kLenNumLowSymbols , (1 << kLenNumLowBits)
608 .equ kLenNumHighBits , 8
609 .equ kLenNumHighSymbols , (1 << kLenNumHighBits)
610 .equ kNumLenProbs , (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols)
613 .equ LenChoice , LenLow
614 .equ LenChoice2 , (LenLow + kLenNumLowSymbols)
615 .equ LenHigh , (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax)
618 .equ kNumStates2 , 16
619 .equ kNumLitStates , 7
621 .equ kStartPosModelIndex , 4
622 .equ kEndPosModelIndex , 14
623 .equ kNumFullDistances , (1 << (kEndPosModelIndex >> 1))
625 .equ kNumPosSlotBits , 6
626 .equ kNumLenToPosStates , 4
628 .equ kNumAlignBits , 4
629 .equ kAlignTableSize , (1 << kNumAlignBits)
631 .equ kMatchMinLen , 2
632 .equ kMatchSpecLenStart , (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
634 // .equ kStartOffset , 1408
635 .equ kStartOffset , 0
636 .equ SpecPos , (-kStartOffset)
637 .equ IsRep0Long , (SpecPos + kNumFullDistances)
638 .equ RepLenCoder , (IsRep0Long + (kNumStates2 << kNumPosBitsMax))
639 .equ LenCoder , (RepLenCoder + kNumLenProbs)
640 .equ IsMatch , (LenCoder + kNumLenProbs)
641 .equ kAlign , (IsMatch + (kNumStates2 << kNumPosBitsMax))
642 .equ IsRep , (kAlign + kAlignTableSize)
643 .equ IsRepG0 , (IsRep + kNumStates)
644 .equ IsRepG1 , (IsRepG0 + kNumStates)
645 .equ IsRepG2 , (IsRepG1 + kNumStates)
646 .equ PosSlot , (IsRepG2 + kNumStates)
647 .equ Literal , (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
648 .equ NUM_BASE_PROBS , (Literal + kStartOffset)
650 .if kStartOffset != 0 // && IsMatch != 0
651 .error "Stop_Compiling_Bad_StartOffset"
654 .if NUM_BASE_PROBS != 1984
655 .error "Stop_Compiling_Bad_LZMA_PROBS"
661 .equ offset_dicSize , 4
662 .equ offset_probs , 4 + offset_dicSize
663 .equ offset_probs_1664 , 8 + offset_probs
664 .equ offset_dic , 8 + offset_probs_1664
665 .equ offset_dicBufSize , 8 + offset_dic
666 .equ offset_dicPos , 8 + offset_dicBufSize
667 .equ offset_buf , 8 + offset_dicPos
668 .equ offset_range , 8 + offset_buf
669 .equ offset_code , 4 + offset_range
670 .equ offset_processedPos , 4 + offset_code
671 .equ offset_checkDicSize , 4 + offset_processedPos
672 .equ offset_rep0 , 4 + offset_checkDicSize
673 .equ offset_rep1 , 4 + offset_rep0
674 .equ offset_rep2 , 4 + offset_rep1
675 .equ offset_rep3 , 4 + offset_rep2
676 .equ offset_state , 4 + offset_rep3
677 .equ offset_remainLen , 4 + offset_state
678 .equ offset_TOTAL_SIZE , 4 + offset_remainLen
680 .if offset_TOTAL_SIZE != 96
681 .error "Incorrect offset_TOTAL_SIZE"
685 .macro IsMatchBranch_Pre
686 # prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
687 and pbPos, pbMask, processedPos, lsl #(kLenNumLowBits + 1 + PSHIFT)
688 add probs_state, probs_IsMatch, state_R
695 IF_BIT_1 probs_state, pbPos_R, (IsMatch - IsMatch), IsMatch_label
706 #define CheckLimits_lit CheckLimits
708 .macro CheckLimits_lit
717 #define PARAM_lzma REG_ABI_PARAM_0
718 #define PARAM_limit REG_ABI_PARAM_1
719 #define PARAM_bufLimit REG_ABI_PARAM_2
722 .macro LOAD_LZMA_VAR reg:req, struct_offs:req
723 ldr \reg, [PARAM_lzma, \struct_offs]
726 .macro LOAD_LZMA_BYTE reg:req, struct_offs:req
727 ldrb \reg, [PARAM_lzma, \struct_offs]
730 .macro LOAD_LZMA_PAIR reg0:req, reg1:req, struct_offs:req
731 ldp \reg0, \reg1, [PARAM_lzma, \struct_offs]
735 LzmaDec_DecodeReal_3:
736 _LzmaDec_DecodeReal_3:
742 stp x19, x20, [sp, -128]!
743 stp x21, x22, [sp, 16]
744 stp x23, x24, [sp, 32]
745 stp x25, x26, [sp, 48]
746 stp x27, x28, [sp, 64]
747 stp x29, x30, [sp, 80]
749 str PARAM_lzma, [sp, 120]
751 mov bufLimit, PARAM_bufLimit
752 mov limit, PARAM_limit
754 LOAD_LZMA_PAIR dic, dicBufSize, offset_dic
755 LOAD_LZMA_PAIR dicPos, buf, offset_dicPos
756 LOAD_LZMA_PAIR rep0, rep1, offset_rep0
757 LOAD_LZMA_PAIR rep2, rep3, offset_rep2
759 mov t0, 1 << (kLenNumLowBits + 1 + PSHIFT)
760 LOAD_LZMA_BYTE pbMask, offset_pb
762 mov len, wzr // we can set it in all requiread branches instead
763 lsl pbMask, t0, pbMask
767 LOAD_LZMA_BYTE lc2_lpMask, offset_lc
768 mov t0, 256 << PSHIFT
769 LOAD_LZMA_BYTE t1, offset_lp
770 p2_add t1, lc2_lpMask
771 p2_sub lc2_lpMask, (256 << PSHIFT) - PSHIFT
773 p2_add lc2_lpMask, t0
775 LOAD_LZMA_VAR probs_Spec, offset_probs
776 LOAD_LZMA_VAR checkDicSize, offset_checkDicSize
777 LOAD_LZMA_VAR processedPos, offset_processedPos
778 LOAD_LZMA_VAR state, offset_state
779 // range is r0 : this load must be last don't move
780 LOAD_LZMA_PAIR range, cod, offset_range
784 add_big probs_IsMatch, probs_Spec, ((IsMatch - SpecPos) << PSHIFT)
786 // if (processedPos != 0 || checkDicSize != 0)
787 orr t0, checkDicSize, processedPos
789 add t0_R, dicBufSize, dic
797 cmp state, kNumLitStates * PMULT
803 #define BIT_0 BIT_0_R prob_reg
804 #define BIT_1 BIT_1_R prob_reg
805 #define BIT_2 BIT_2_R prob_reg
807 # ---------- LITERAL ----------
814 #ifdef _LZMA_SIZE_OPT
816 PLOAD_2 prob_reg, probs, 1 * PMULT
838 strb sym, [dicPos], 1
843 IF_BIT_0_NOUP probs_state, pbPos_R, (IsMatch - IsMatch), lit_start
848 #define FLAG_STATE_BITS (4 + PSHIFT)
850 # ---------- MATCHES ----------
853 UPDATE_1 probs_state, pbPos_R, (IsMatch - IsMatch)
854 IF_BIT_1 probs_state, 0, (IsRep - IsMatch), IsRep_label
857 or state, (1 << FLAG_STATE_BITS)
859 # ---------- LEN DECODE ----------
861 mov len, 8 - kMatchMinLen
862 IF_BIT_0_NOUP_1 probs, len_mid_0
864 p2_add probs, (1 << (kLenNumLowBits + PSHIFT))
865 mov len, 0 - kMatchMinLen
866 IF_BIT_0_NOUP_1 probs, len_mid_0
868 p2_add probs, LenHigh * PMULT - (1 << (kLenNumLowBits + PSHIFT))
878 PLOAD_2 prob_reg, probs, 1 * PMULT
884 tbz sym, 6, len8_loop
887 mov len, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - kMatchMinLen
893 p2_add probs, pbPos_R
899 tbz state, FLAG_STATE_BITS, copy_match
901 # ---------- DECODE DISTANCE ----------
902 // probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
904 mov t0, 3 + kMatchMinLen
905 cmp len, 3 + kMatchMinLen
907 SET_probs PosSlot - (kMatchMinLen << (kNumPosSlotBits))
908 add probs, probs, t0_R, lsl #(kNumPosSlotBits + PSHIFT)
910 #ifdef _LZMA_SIZE_OPT
912 PLOAD_2 prob_reg, probs, 1 * PMULT
918 tbz sym, 5, slot_loop
933 // we need only low bits
935 cmp numBits, 32 + kEndPosModelIndex / 2
940 # unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
941 p2_sub numBits, (32 + 1 + kNumAlignBits)
942 # distance = (2 | (distance & 1));
944 PLOAD_2 prob_reg, probs, 1 * PMULT
945 add sym2_R, probs, 2 * PMULT
947 # ---------- DIRECT DISTANCE ----------
954 csel cod, cod, t0, mi
955 csinc sym, sym, sym, mi
956 // csel sym, t1, sym, pl
957 // adc sym, sym, sym // not 100% compatible for "corruptued-allowed" LZMA streams
962 #ifdef _LZMA_SIZE_OPT
1007 shl sym, kNumAlignBits
1015 // if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
1017 tst checkDicSize, checkDicSize
1018 csel t0, processedPos, checkDicSize, eq
1021 // jmp end_of_payload # for debug
1028 .macro STATE_UPDATE_FOR_MATCH
1029 // state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
1030 // cmp state, (kNumStates + kNumLitStates) * PMULT
1031 cmp state, kNumLitStates * PMULT + (1 << FLAG_STATE_BITS)
1032 mov state, kNumLitStates * PMULT
1033 mov t0, (kNumLitStates + 3) * PMULT
1036 STATE_UPDATE_FOR_MATCH
1038 # ---------- COPY MATCH ----------
1041 // if ((rem = limit - dicPos) == 0) break // return SZ_ERROR_DATA;
1042 subs cnt_R, limit, dicPos
1043 // jz fin_dicPos_LIMIT
1046 // curLen = ((rem < len) ? (unsigned)rem : len);
1050 sub t0_R, dicPos, dic
1051 p2_add dicPos, cnt_R
1052 p2_add processedPos, cnt
1055 // pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
1056 p2_sub_s t0_R, rep0_R
1060 p2_add t0_R, dicBufSize
1063 # ---------- COPY MATCH FAST ----------
1073 # dicPos : (ptr_to_last_dest_BYTE)
1085 #ifdef LZMA_USE_2BYTES_COPY
1086 strb sym, [dicPos, cnt_R]
1088 # dicPos : (ptr_to_last_dest_16bitWORD)
1090 ldrh sym, [t0_R, cnt_R]
1091 adds cnt_R, cnt_R, 2
1096 strh sym, [dicPos, cnt_R]
1097 ldrh sym, [t0_R, cnt_R]
1098 adds cnt_R, cnt_R, 2
1102 strh sym, [dicPos, cnt_R]
1103 ldrh sym, [t0_R, cnt_R]
1104 adds cnt_R, cnt_R, 2
1109 // for universal little/big endian code, but slow
1112 ldrb sym, [t0_R, -1]
1115 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1116 // we must improve big-endian detection for another compilers
1117 // for big-endian we need to revert bytes
1121 // (sym) must represent as little-endian here:
1122 strb sym, [dicPos], 1
1129 strb sym, [dicPos, cnt_R]
1130 ldrb sym, [t0_R, cnt_R]
1134 strb sym, [dicPos, cnt_R]
1135 ldrb sym, [t0_R, cnt_R]
1142 strb sym, [dicPos], 1
1147 IF_BIT_1_NOUP probs_state, pbPos_R, (IsMatch - IsMatch), IsMatch_label
1151 # ---------- LITERAL MATCHED ----------
1155 // matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
1157 sub t0_R, dicPos, dic
1158 p2_sub_s t0_R, rep0_R
1160 #ifdef LZMA_USE_CMOV_LZ_WRAP
1161 add t1_R, t0_R, dicBufSize
1165 p2_add t0_R, dicBufSize
1169 ldrb match, [dic, t0_R]
1171 // state -= (state < 10) ? 3 : 6;
1172 sub sym, state, 6 * PMULT
1173 cmp state, 10 * PMULT
1174 p2_sub state, 3 * PMULT
1177 #ifdef _LZMA_SIZE_OPT
1179 mov offs, 256 * PMULT
1180 shl match, (PSHIFT + 1)
1182 and bit, match, offs
1183 add prm, probs, offs_R
1188 tbz sym, 8, litm_loop
1204 strb sym, [dicPos], 1
1207 // mov len, wzr // LITM uses same regisetr (len / offs). So we clear it
1210 IF_BIT_1_NOUP probs_state, pbPos_R, (IsMatch - IsMatch), IsMatch_label
1212 p2_sub state, 3 * PMULT
1217 # ---------- REP 0 LITERAL ----------
1220 UPDATE_0 probs_state, pbPos_R, 0
1222 // dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
1223 sub t0_R, dicPos, dic
1225 // state = state < kNumLitStates ? 9 : 11;
1228 # the caller doesn't allow (dicPos >= limit) case for REP_SHORT
1229 # so we don't need the following (dicPos == limit) check here:
1231 # jae fin_dicPos_LIMIT_REP_SHORT
1232 # // jmp fin_dicPos_LIMIT_REP_SHORT // for testing/debug puposes
1238 p2_sub_s t0_R, rep0_R
1239 #ifdef LZMA_USE_CMOV_LZ_WRAP
1240 add sym_R, t0_R, dicBufSize
1244 p2_add t0_R, dicBufSize
1248 ldrb sym, [dic, t0_R]
1254 UPDATE_1 probs_state, 0, (IsRep - IsMatch)
1256 # The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode.
1257 # So we don't check it here.
1259 # mov t0, processedPos
1260 # or t0, checkDicSize
1263 // state = state < kNumLitStates ? 8 : 11;
1264 cmp state, kNumLitStates * PMULT
1265 mov state, 8 * PMULT
1266 mov probBranch, 11 * PMULT
1267 cmovae state, probBranch
1269 SET_probs RepLenCoder
1271 IF_BIT_1 probs_state, 0, (IsRepG0 - IsMatch), IsRepG0_label
1272 sub_big probs_state, probs_state, (IsMatch - IsRep0Long) << PSHIFT
1273 IF_BIT_0_NOUP probs_state, pbPos_R, 0, IsRep0Short_label
1274 UPDATE_1 probs_state, pbPos_R, 0
1279 UPDATE_1 probs_state, 0, (IsRepG0 - IsMatch)
1280 IF_BIT_1 probs_state, 0, (IsRepG1 - IsMatch), IsRepG1_label
1286 # MY_ALIGN_FOR_ENTRY
1288 UPDATE_1 probs_state, 0, (IsRepG1 - IsMatch)
1289 IF_BIT_1 probs_state, 0, (IsRepG2 - IsMatch), IsRepG2_label
1296 # MY_ALIGN_FOR_ENTRY
1298 UPDATE_1 probs_state, 0, (IsRepG2 - IsMatch)
1308 # ---------- SPEC SHORT DISTANCE ----------
1312 p2_sub_s numBits, 32 + 1
1316 add sym_R, probs_Spec, sym_R, lsl #PSHIFT
1317 p2_add sym_R, SpecPos * PMULT + 1 * PMULT
1318 mov sym2, PMULT // # step
1325 p2_add sym2_R, probs_Spec
1327 p2_add sym2_R, SpecPos * PMULT
1329 p2_sub sym_R, sym2_R
1336 # ---------- COPY MATCH 0 ----------
1339 #ifdef LZMA_USE_4BYTES_FILL
1340 strb sym, [dicPos, cnt_R]
1344 strb sym, [dicPos, cnt_R]
1348 strb sym, [dicPos, cnt_R]
1352 orr t3, sym, sym, lsl 8
1354 orr t3, t3, t3, lsl 16
1355 MY_ALIGN_FOR_LOOP_16
1358 str t3, [dicPos, cnt_R]
1359 adds cnt_R, cnt_R, 4
1363 str t3, [dicPos, cnt_R]
1364 adds cnt_R, cnt_R, 4
1372 strb sym, [dicPos, cnt_R]
1376 strb sym, [dicPos, cnt_R]
1384 # ---------- COPY MATCH CROSS ----------
1387 # cnt_R - total copy len
1391 ldrb sym, [dic, t0_R]
1393 strb sym, [dicPos, cnt_R]
1395 cmp t0_R, dicBufSize
1399 sub t0_R, dic, cnt_R
1406 fin_dicPos_LIMIT_REP_SHORT:
1414 # For more strict mode we can stop decoding with error
1419 fin_ERROR_MATCH_DIST:
1420 # rep0 = distance + 1;
1421 p2_add len, kMatchSpecLen_Error_Data
1426 STATE_UPDATE_FOR_MATCH
1433 jnz fin_ERROR_MATCH_DIST
1435 mov len, kMatchSpecLenStart
1436 xor state, (1 << FLAG_STATE_BITS)
1450 #define fin_lzma_reg t0_R
1452 .macro STORE_LZMA_VAR reg:req, struct_offs:req
1453 str \reg, [fin_lzma_reg, \struct_offs]
1456 .macro STORE_LZMA_PAIR reg0:req, reg1:req, struct_offs:req
1457 stp \reg0, \reg1, [fin_lzma_reg, \struct_offs]
1460 ldr fin_lzma_reg, [sp, 120]
1464 STORE_LZMA_PAIR dicPos, buf, offset_dicPos
1465 STORE_LZMA_PAIR range, cod, offset_range
1466 STORE_LZMA_VAR processedPos, offset_processedPos
1467 STORE_LZMA_PAIR rep0, rep1, offset_rep0
1468 STORE_LZMA_PAIR rep2, rep3, offset_rep2
1469 STORE_LZMA_PAIR state, len, offset_state
1473 ldp x29, x30, [sp, 80]
1474 ldp x27, x28, [sp, 64]
1475 ldp x25, x26, [sp, 48]
1476 ldp x23, x24, [sp, 32]
1477 ldp x21, x22, [sp, 16]
1478 ldp x19, x20, [sp], 128
1484 .size LzmaDec_DecodeReal_3, .-LzmaDec_DecodeReal_3
1486 .section .note.GNU-stack,"",@progbits