// LzmaDecOpt.S -- ARM64-ASM version of LzmaDec_DecodeReal_3() function // 2021-04-25 : Igor Pavlov : Public domain /* ; 3 - is the code compatibility version of LzmaDec_DecodeReal_*() ; function for check at link time. ; That code is tightly coupled with LzmaDec_TryDummy() ; and with another functions in LzmaDec.c file. ; CLzmaDec structure, (probs) array layout, input and output of ; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM). */ #include "7zAsm.S" // .arch armv8-a // .file "LzmaDecOpt.c" .text .align 2 .p2align 4,,15 #ifdef __APPLE__ .globl _LzmaDec_DecodeReal_3 #else .global LzmaDec_DecodeReal_3 #endif // .type LzmaDec_DecodeReal_3, %function // #define _LZMA_SIZE_OPT 1 #define LZMA_USE_4BYTES_FILL 1 // #define LZMA_USE_2BYTES_COPY 1 // #define LZMA_USE_CMOV_LZ_WRAP 1 // #define _LZMA_PROB32 1 #define MY_ALIGN_FOR_ENTRY MY_ALIGN_32 #define MY_ALIGN_FOR_LOOP MY_ALIGN_32 #define MY_ALIGN_FOR_LOOP_16 MY_ALIGN_16 #ifdef _LZMA_PROB32 .equ PSHIFT , 2 .macro PLOAD dest:req, mem:req ldr \dest, [\mem] .endm .macro PLOAD_PREINDEXED dest:req, mem:req, offset:req ldr \dest, [\mem, \offset]! .endm .macro PLOAD_2 dest:req, mem1:req, mem2:req ldr \dest, [\mem1, \mem2] .endm .macro PLOAD_LSL dest:req, mem1:req, mem2:req ldr \dest, [\mem1, \mem2, lsl #PSHIFT] .endm .macro PSTORE src:req, mem:req str \src, [\mem] .endm .macro PSTORE_2 src:req, mem1:req, mem2:req str \src, [\mem1, \mem2] .endm .macro PSTORE_LSL src:req, mem1:req, mem2:req str \src, [\mem1, \mem2, lsl #PSHIFT] .endm .macro PSTORE_LSL_M1 src:req, mem1:req, mem2:req, temp_reg:req // you must check that temp_reg is free register when macro is used add \temp_reg, \mem1, \mem2 str \src, [\temp_reg, \mem2] .endm #else // .equ PSHIFT , 1 #define PSHIFT 1 .macro PLOAD dest:req, mem:req ldrh \dest, [\mem] .endm .macro PLOAD_PREINDEXED dest:req, mem:req, offset:req ldrh \dest, [\mem, \offset]! .endm .macro PLOAD_2 dest:req, mem1:req, mem2:req ldrh \dest, [\mem1, \mem2] .endm .macro PLOAD_LSL dest:req, mem1:req, mem2:req ldrh \dest, [\mem1, \mem2, lsl #PSHIFT] .endm .macro PSTORE src:req, mem:req strh \src, [\mem] .endm .macro PSTORE_2 src:req, mem1:req, mem2:req strh \src, [\mem1, \mem2] .endm .macro PSTORE_LSL src:req, mem1:req, mem2:req strh \src, [\mem1, \mem2, lsl #PSHIFT] .endm .macro PSTORE_LSL_M1 src:req, mem1:req, mem2:req, temp_reg:req strh \src, [\mem1, \mem2] .endm #endif .equ PMULT , (1 << PSHIFT) .equ PMULT_2 , (2 << PSHIFT) .equ kMatchSpecLen_Error_Data , (1 << 9) # x7 t0 : NORM_CALC : prob2 (IF_BIT_1) # x6 t1 : NORM_CALC : probs_state # x8 t2 : (LITM) temp : (TREE) temp # x4 t3 : (LITM) bit : (TREE) temp : UPDATE_0/UPDATE_0 temp # x10 t4 : (LITM) offs : (TREE) probs_PMULT : numBits # x9 t5 : (LITM) match : sym2 (ShortDist) # x1 t6 : (LITM) litm_prob : (TREE) prob_reg : pbPos # x2 t7 : (LITM) prm : probBranch : cnt # x3 sym : dist # x12 len # x0 range # x5 cod #define range w0 // t6 #define pbPos w1 #define pbPos_R r1 #define prob_reg w1 #define litm_prob prob_reg // t7 #define probBranch w2 #define cnt w2 #define cnt_R r2 #define prm r2 #define sym w3 #define sym_R r3 #define dist sym #define t3 w4 #define bit w4 #define bit_R r4 #define update_temp_reg r4 #define cod w5 #define t1 w6 #define t1_R r6 #define probs_state t1_R #define t0 w7 #define t0_R r7 #define prob2 t0 #define t2 w8 #define t2_R r8 // t5 #define match w9 #define sym2 w9 #define sym2_R r9 #define t4 w10 #define t4_R r10 #define offs w10 #define offs_R r10 #define probs r11 #define len w12 #define len_R x12 #define state w13 #define state_R r13 #define dicPos r14 #define buf r15 #define bufLimit r16 #define dicBufSize r17 #define limit r19 #define rep0 w20 #define rep0_R r20 #define rep1 w21 #define rep2 w22 #define rep3 w23 #define dic r24 #define probs_IsMatch r25 #define probs_Spec r26 #define checkDicSize w27 #define processedPos w28 #define pbMask w29 #define lc2_lpMask w30 .equ kNumBitModelTotalBits , 11 .equ kBitModelTotal , (1 << kNumBitModelTotalBits) .equ kNumMoveBits , 5 .equ kBitModelOffset , (kBitModelTotal - (1 << kNumMoveBits) + 1) .macro NORM_2 macro ldrb t0, [buf], 1 shl range, 8 orr cod, t0, cod, lsl 8 /* mov t0, cod ldrb cod, [buf], 1 shl range, 8 bfi cod, t0, #8, #24 */ .endm .macro TEST_HIGH_BYTE_range macro tst range, 0xFF000000 .endm .macro NORM macro TEST_HIGH_BYTE_range jnz 1f NORM_2 1: .endm # ---------- Branch MACROS ---------- .macro UPDATE_0__0 sub prob2, probBranch, kBitModelOffset .endm .macro UPDATE_0__1 sub probBranch, probBranch, prob2, asr #(kNumMoveBits) .endm .macro UPDATE_0__2 probsArray:req, probOffset:req, probDisp:req .if \probDisp == 0 PSTORE_2 probBranch, \probsArray, \probOffset .elseif \probOffset == 0 PSTORE_2 probBranch, \probsArray, \probDisp * PMULT .else .error "unsupported" // add update_temp_reg, \probsArray, \probOffset PSTORE_2 probBranch, update_temp_reg, \probDisp * PMULT .endif .endm .macro UPDATE_0 probsArray:req, probOffset:req, probDisp:req UPDATE_0__0 UPDATE_0__1 UPDATE_0__2 \probsArray, \probOffset, \probDisp .endm .macro UPDATE_1 probsArray:req, probOffset:req, probDisp:req // sub cod, cod, prob2 // sub range, range, prob2 p2_sub cod, range sub range, prob2, range sub prob2, probBranch, probBranch, lsr #(kNumMoveBits) .if \probDisp == 0 PSTORE_2 prob2, \probsArray, \probOffset .elseif \probOffset == 0 PSTORE_2 prob2, \probsArray, \probDisp * PMULT .else .error "unsupported" // add update_temp_reg, \probsArray, \probOffset PSTORE_2 prob2, update_temp_reg, \probDisp * PMULT .endif .endm .macro CMP_COD_BASE NORM // lsr prob2, range, kNumBitModelTotalBits // imul prob2, probBranch // cmp cod, prob2 mov prob2, range shr range, kNumBitModelTotalBits imul range, probBranch cmp cod, range .endm .macro CMP_COD_1 probsArray:req PLOAD probBranch, \probsArray CMP_COD_BASE .endm .macro CMP_COD_3 probsArray:req, probOffset:req, probDisp:req .if \probDisp == 0 PLOAD_2 probBranch, \probsArray, \probOffset .elseif \probOffset == 0 PLOAD_2 probBranch, \probsArray, \probDisp * PMULT .else .error "unsupported" add update_temp_reg, \probsArray, \probOffset PLOAD_2 probBranch, update_temp_reg, \probDisp * PMULT .endif CMP_COD_BASE .endm .macro IF_BIT_1_NOUP probsArray:req, probOffset:req, probDisp:req, toLabel:req CMP_COD_3 \probsArray, \probOffset, \probDisp jae \toLabel .endm .macro IF_BIT_1 probsArray:req, probOffset:req, probDisp:req, toLabel:req IF_BIT_1_NOUP \probsArray, \probOffset, \probDisp, \toLabel UPDATE_0 \probsArray, \probOffset, \probDisp .endm .macro IF_BIT_0_NOUP probsArray:req, probOffset:req, probDisp:req, toLabel:req CMP_COD_3 \probsArray, \probOffset, \probDisp jb \toLabel .endm .macro IF_BIT_0_NOUP_1 probsArray:req, toLabel:req CMP_COD_1 \probsArray jb \toLabel .endm # ---------- CMOV MACROS ---------- .macro NORM_LSR NORM lsr t0, range, #kNumBitModelTotalBits .endm .macro COD_RANGE_SUB subs t1, cod, t0 p2_sub range, t0 .endm .macro RANGE_IMUL prob:req imul t0, \prob .endm .macro NORM_CALC prob:req NORM_LSR RANGE_IMUL \prob COD_RANGE_SUB .endm .macro CMOV_range cmovb range, t0 .endm .macro CMOV_code cmovae cod, t1 .endm .macro CMOV_code_Model_Pre prob:req sub t0, \prob, kBitModelOffset CMOV_code cmovae t0, \prob .endm .macro PUP_BASE_2 prob:req, dest_reg:req # only sar works for both 16/32 bit prob modes sub \dest_reg, \prob, \dest_reg, asr #(kNumMoveBits) .endm .macro PUP prob:req, probPtr:req, mem2:req PUP_BASE_2 \prob, t0 PSTORE_2 t0, \probPtr, \mem2 .endm #define probs_PMULT t4_R .macro BIT_01 add probs_PMULT, probs, PMULT .endm .macro BIT_0_R prob:req PLOAD_2 \prob, probs, 1 * PMULT NORM_LSR sub t3, \prob, kBitModelOffset RANGE_IMUL \prob PLOAD_2 t2, probs, 1 * PMULT_2 COD_RANGE_SUB CMOV_range cmovae t3, \prob PLOAD_2 t0, probs, 1 * PMULT_2 + PMULT PUP_BASE_2 \prob, t3 csel \prob, t2, t0, lo CMOV_code mov sym, 2 PSTORE_2 t3, probs, 1 * PMULT adc sym, sym, wzr BIT_01 .endm .macro BIT_1_R prob:req NORM_LSR p2_add sym, sym sub t3, \prob, kBitModelOffset RANGE_IMUL \prob PLOAD_LSL t2, probs, sym_R COD_RANGE_SUB CMOV_range cmovae t3, \prob PLOAD_LSL t0, probs_PMULT, sym_R PUP_BASE_2 \prob, t3 csel \prob, t2, t0, lo CMOV_code PSTORE_LSL_M1 t3, probs, sym_R, t2_R adc sym, sym, wzr .endm .macro BIT_2_R prob:req NORM_LSR p2_add sym, sym sub t3, \prob, kBitModelOffset RANGE_IMUL \prob COD_RANGE_SUB CMOV_range cmovae t3, \prob CMOV_code PUP_BASE_2 \prob, t3 PSTORE_LSL_M1 t3, probs, sym_R, t2_R adc sym, sym, wzr .endm # ---------- MATCHED LITERAL ---------- .macro LITM_0 macro shl match, (PSHIFT + 1) and bit, match, 256 * PMULT add prm, probs, 256 * PMULT + 1 * PMULT p2_add match, match p2_add prm, bit_R eor offs, bit, 256 * PMULT PLOAD litm_prob, prm NORM_LSR sub t2, litm_prob, kBitModelOffset RANGE_IMUL litm_prob COD_RANGE_SUB cmovae offs, bit CMOV_range and bit, match, offs cmovae t2, litm_prob CMOV_code mov sym, 2 PUP_BASE_2 litm_prob, t2 PSTORE t2, prm add prm, probs, offs_R adc sym, sym, wzr .endm .macro LITM macro p2_add prm, bit_R xor offs, bit PLOAD_LSL litm_prob, prm, sym_R NORM_LSR p2_add match, match sub t2, litm_prob, kBitModelOffset RANGE_IMUL litm_prob COD_RANGE_SUB cmovae offs, bit CMOV_range and bit, match, offs cmovae t2, litm_prob CMOV_code PUP_BASE_2 litm_prob, t2 PSTORE_LSL t2, prm, sym_R add prm, probs, offs_R adc sym, sym, sym .endm .macro LITM_2 macro p2_add prm, bit_R PLOAD_LSL litm_prob, prm, sym_R NORM_LSR sub t2, litm_prob, kBitModelOffset RANGE_IMUL litm_prob COD_RANGE_SUB CMOV_range cmovae t2, litm_prob CMOV_code PUP_BASE_2 litm_prob, t2 PSTORE_LSL t2, prm, sym_R adc sym, sym, sym .endm # ---------- REVERSE BITS ---------- .macro REV_0 prob:req NORM_CALC \prob CMOV_range PLOAD t2, sym2_R PLOAD_2 t3, probs, 3 * PMULT CMOV_code_Model_Pre \prob add t1_R, probs, 3 * PMULT cmovae sym2_R, t1_R PUP \prob, probs, 1 * PMULT csel \prob, t2, t3, lo .endm .macro REV_1 prob:req, step:req NORM_LSR PLOAD_PREINDEXED t2, sym2_R, (\step * PMULT) RANGE_IMUL \prob COD_RANGE_SUB CMOV_range PLOAD_2 t3, sym2_R, (\step * PMULT) sub t0, \prob, kBitModelOffset CMOV_code add t1_R, sym2_R, \step * PMULT cmovae t0, \prob cmovae sym2_R, t1_R PUP_BASE_2 \prob, t0 csel \prob, t2, t3, lo PSTORE_2 t0, t1_R, 0 - \step * PMULT_2 .endm .macro REV_2 prob:req, step:req sub t1_R, sym2_R, probs NORM_LSR orr sym, sym, t1, lsr #PSHIFT RANGE_IMUL \prob COD_RANGE_SUB sub t2, sym, \step CMOV_range cmovb sym, t2 CMOV_code_Model_Pre \prob PUP \prob, sym2_R, 0 .endm .macro REV_1_VAR prob:req PLOAD \prob, sym_R mov probs, sym_R p2_add sym_R, sym2_R NORM_LSR add t2_R, sym_R, sym2_R RANGE_IMUL \prob COD_RANGE_SUB cmovae sym_R, t2_R CMOV_range CMOV_code_Model_Pre \prob p2_add sym2, sym2 PUP \prob, probs, 0 .endm .macro add_big dest:req, src:req, param:req .if (\param) < (1 << 12) add \dest, \src, \param .else #ifndef _LZMA_PROB32 .error "unexpcted add_big expansion" #endif add \dest, \src, (\param) / 2 add \dest, \dest, (\param) - (\param) / 2 .endif .endm .macro sub_big dest:req, src:req, param:req .if (\param) < (1 << 12) sub \dest, \src, \param .else #ifndef _LZMA_PROB32 .error "unexpcted sub_big expansion" #endif sub \dest, \src, (\param) / 2 sub \dest, \dest, (\param) - (\param) / 2 .endif .endm .macro SET_probs offset:req // add_big probs, probs_Spec, (\offset) * PMULT add probs, probs_IsMatch, ((\offset) - IsMatch) * PMULT .endm .macro LIT_PROBS add sym, sym, processedPos, lsl 8 inc processedPos UPDATE_0__0 shl sym, lc2_lpMask SET_probs Literal p2_and sym, lc2_lpMask // p2_add probs_state, pbPos_R p2_add probs, sym_R UPDATE_0__1 add probs, probs, sym_R, lsl 1 UPDATE_0__2 probs_state, pbPos_R, 0 .endm .equ kNumPosBitsMax , 4 .equ kNumPosStatesMax , (1 << kNumPosBitsMax) .equ kLenNumLowBits , 3 .equ kLenNumLowSymbols , (1 << kLenNumLowBits) .equ kLenNumHighBits , 8 .equ kLenNumHighSymbols , (1 << kLenNumHighBits) .equ kNumLenProbs , (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols) .equ LenLow , 0 .equ LenChoice , LenLow .equ LenChoice2 , (LenLow + kLenNumLowSymbols) .equ LenHigh , (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax) .equ kNumStates , 12 .equ kNumStates2 , 16 .equ kNumLitStates , 7 .equ kStartPosModelIndex , 4 .equ kEndPosModelIndex , 14 .equ kNumFullDistances , (1 << (kEndPosModelIndex >> 1)) .equ kNumPosSlotBits , 6 .equ kNumLenToPosStates , 4 .equ kNumAlignBits , 4 .equ kAlignTableSize , (1 << kNumAlignBits) .equ kMatchMinLen , 2 .equ kMatchSpecLenStart , (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) // .equ kStartOffset , 1408 .equ kStartOffset , 0 .equ SpecPos , (-kStartOffset) .equ IsRep0Long , (SpecPos + kNumFullDistances) .equ RepLenCoder , (IsRep0Long + (kNumStates2 << kNumPosBitsMax)) .equ LenCoder , (RepLenCoder + kNumLenProbs) .equ IsMatch , (LenCoder + kNumLenProbs) .equ kAlign , (IsMatch + (kNumStates2 << kNumPosBitsMax)) .equ IsRep , (kAlign + kAlignTableSize) .equ IsRepG0 , (IsRep + kNumStates) .equ IsRepG1 , (IsRepG0 + kNumStates) .equ IsRepG2 , (IsRepG1 + kNumStates) .equ PosSlot , (IsRepG2 + kNumStates) .equ Literal , (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) .equ NUM_BASE_PROBS , (Literal + kStartOffset) .if kStartOffset != 0 // && IsMatch != 0 .error "Stop_Compiling_Bad_StartOffset" .endif .if NUM_BASE_PROBS != 1984 .error "Stop_Compiling_Bad_LZMA_PROBS" .endif .equ offset_lc , 0 .equ offset_lp , 1 .equ offset_pb , 2 .equ offset_dicSize , 4 .equ offset_probs , 4 + offset_dicSize .equ offset_probs_1664 , 8 + offset_probs .equ offset_dic , 8 + offset_probs_1664 .equ offset_dicBufSize , 8 + offset_dic .equ offset_dicPos , 8 + offset_dicBufSize .equ offset_buf , 8 + offset_dicPos .equ offset_range , 8 + offset_buf .equ offset_code , 4 + offset_range .equ offset_processedPos , 4 + offset_code .equ offset_checkDicSize , 4 + offset_processedPos .equ offset_rep0 , 4 + offset_checkDicSize .equ offset_rep1 , 4 + offset_rep0 .equ offset_rep2 , 4 + offset_rep1 .equ offset_rep3 , 4 + offset_rep2 .equ offset_state , 4 + offset_rep3 .equ offset_remainLen , 4 + offset_state .equ offset_TOTAL_SIZE , 4 + offset_remainLen .if offset_TOTAL_SIZE != 96 .error "Incorrect offset_TOTAL_SIZE" .endif .macro IsMatchBranch_Pre # prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; and pbPos, pbMask, processedPos, lsl #(kLenNumLowBits + 1 + PSHIFT) add probs_state, probs_IsMatch, state_R .endm /* .macro IsMatchBranch IsMatchBranch_Pre IF_BIT_1 probs_state, pbPos_R, (IsMatch - IsMatch), IsMatch_label .endm */ .macro CheckLimits cmp buf, bufLimit jae fin_OK cmp dicPos, limit jae fin_OK .endm #define CheckLimits_lit CheckLimits /* .macro CheckLimits_lit cmp buf, bufLimit jae fin_OK_lit cmp dicPos, limit jae fin_OK_lit .endm */ #define PARAM_lzma REG_ABI_PARAM_0 #define PARAM_limit REG_ABI_PARAM_1 #define PARAM_bufLimit REG_ABI_PARAM_2 .macro LOAD_LZMA_VAR reg:req, struct_offs:req ldr \reg, [PARAM_lzma, \struct_offs] .endm .macro LOAD_LZMA_BYTE reg:req, struct_offs:req ldrb \reg, [PARAM_lzma, \struct_offs] .endm .macro LOAD_LZMA_PAIR reg0:req, reg1:req, struct_offs:req ldp \reg0, \reg1, [PARAM_lzma, \struct_offs] .endm LzmaDec_DecodeReal_3: _LzmaDec_DecodeReal_3: /* .LFB0: .cfi_startproc */ stp x19, x20, [sp, -128]! stp x21, x22, [sp, 16] stp x23, x24, [sp, 32] stp x25, x26, [sp, 48] stp x27, x28, [sp, 64] stp x29, x30, [sp, 80] str PARAM_lzma, [sp, 120] mov bufLimit, PARAM_bufLimit mov limit, PARAM_limit LOAD_LZMA_PAIR dic, dicBufSize, offset_dic LOAD_LZMA_PAIR dicPos, buf, offset_dicPos LOAD_LZMA_PAIR rep0, rep1, offset_rep0 LOAD_LZMA_PAIR rep2, rep3, offset_rep2 mov t0, 1 << (kLenNumLowBits + 1 + PSHIFT) LOAD_LZMA_BYTE pbMask, offset_pb p2_add limit, dic mov len, wzr // we can set it in all requiread branches instead lsl pbMask, t0, pbMask p2_add dicPos, dic p2_sub pbMask, t0 LOAD_LZMA_BYTE lc2_lpMask, offset_lc mov t0, 256 << PSHIFT LOAD_LZMA_BYTE t1, offset_lp p2_add t1, lc2_lpMask p2_sub lc2_lpMask, (256 << PSHIFT) - PSHIFT shl t0, t1 p2_add lc2_lpMask, t0 LOAD_LZMA_VAR probs_Spec, offset_probs LOAD_LZMA_VAR checkDicSize, offset_checkDicSize LOAD_LZMA_VAR processedPos, offset_processedPos LOAD_LZMA_VAR state, offset_state // range is r0 : this load must be last don't move LOAD_LZMA_PAIR range, cod, offset_range mov sym, wzr shl state, PSHIFT add_big probs_IsMatch, probs_Spec, ((IsMatch - SpecPos) << PSHIFT) // if (processedPos != 0 || checkDicSize != 0) orr t0, checkDicSize, processedPos cbz t0, 1f add t0_R, dicBufSize, dic cmp dicPos, dic cmovne t0_R, dicPos ldrb sym, [t0_R, -1] 1: IsMatchBranch_Pre cmp state, 4 * PMULT jb lit_end cmp state, kNumLitStates * PMULT jb lit_matched_end jmp lz_end #define BIT_0 BIT_0_R prob_reg #define BIT_1 BIT_1_R prob_reg #define BIT_2 BIT_2_R prob_reg # ---------- LITERAL ---------- MY_ALIGN_64 lit_start: mov state, wzr lit_start_2: LIT_PROBS #ifdef _LZMA_SIZE_OPT PLOAD_2 prob_reg, probs, 1 * PMULT mov sym, 1 BIT_01 MY_ALIGN_FOR_LOOP lit_loop: BIT_1 tbz sym, 7, lit_loop #else BIT_0 BIT_1 BIT_1 BIT_1 BIT_1 BIT_1 BIT_1 #endif BIT_2 IsMatchBranch_Pre strb sym, [dicPos], 1 p2_and sym, 255 CheckLimits_lit lit_end: IF_BIT_0_NOUP probs_state, pbPos_R, (IsMatch - IsMatch), lit_start # jmp IsMatch_label #define FLAG_STATE_BITS (4 + PSHIFT) # ---------- MATCHES ---------- # MY_ALIGN_FOR_ENTRY IsMatch_label: UPDATE_1 probs_state, pbPos_R, (IsMatch - IsMatch) IF_BIT_1 probs_state, 0, (IsRep - IsMatch), IsRep_label SET_probs LenCoder or state, (1 << FLAG_STATE_BITS) # ---------- LEN DECODE ---------- len_decode: mov len, 8 - kMatchMinLen IF_BIT_0_NOUP_1 probs, len_mid_0 UPDATE_1 probs, 0, 0 p2_add probs, (1 << (kLenNumLowBits + PSHIFT)) mov len, 0 - kMatchMinLen IF_BIT_0_NOUP_1 probs, len_mid_0 UPDATE_1 probs, 0, 0 p2_add probs, LenHigh * PMULT - (1 << (kLenNumLowBits + PSHIFT)) #if 0 == 1 BIT_0 BIT_1 BIT_1 BIT_1 BIT_1 BIT_1 #else PLOAD_2 prob_reg, probs, 1 * PMULT mov sym, 1 BIT_01 MY_ALIGN_FOR_LOOP len8_loop: BIT_1 tbz sym, 6, len8_loop #endif mov len, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - kMatchMinLen jmp len_mid_2 MY_ALIGN_FOR_ENTRY len_mid_0: UPDATE_0 probs, 0, 0 p2_add probs, pbPos_R BIT_0 len_mid_2: BIT_1 BIT_2 sub len, sym, len tbz state, FLAG_STATE_BITS, copy_match # ---------- DECODE DISTANCE ---------- // probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); mov t0, 3 + kMatchMinLen cmp len, 3 + kMatchMinLen cmovb t0, len SET_probs PosSlot - (kMatchMinLen << (kNumPosSlotBits)) add probs, probs, t0_R, lsl #(kNumPosSlotBits + PSHIFT) #ifdef _LZMA_SIZE_OPT PLOAD_2 prob_reg, probs, 1 * PMULT mov sym, 1 BIT_01 MY_ALIGN_FOR_LOOP slot_loop: BIT_1 tbz sym, 5, slot_loop #else BIT_0 BIT_1 BIT_1 BIT_1 BIT_1 #endif #define numBits t4 mov numBits, sym BIT_2 // we need only low bits p2_and sym, 3 cmp numBits, 32 + kEndPosModelIndex / 2 jb short_dist SET_probs kAlign # unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); p2_sub numBits, (32 + 1 + kNumAlignBits) # distance = (2 | (distance & 1)); or sym, 2 PLOAD_2 prob_reg, probs, 1 * PMULT add sym2_R, probs, 2 * PMULT # ---------- DIRECT DISTANCE ---------- .macro DIRECT_1 shr range, 1 subs t0, cod, range p2_add sym, sym // add t1, sym, 1 csel cod, cod, t0, mi csinc sym, sym, sym, mi // csel sym, t1, sym, pl // adc sym, sym, sym // not 100% compatible for "corruptued-allowed" LZMA streams dec_s numBits je direct_end .endm #ifdef _LZMA_SIZE_OPT jmp direct_norm MY_ALIGN_FOR_ENTRY direct_loop: DIRECT_1 direct_norm: TEST_HIGH_BYTE_range jnz direct_loop NORM_2 jmp direct_loop #else .macro DIRECT_2 TEST_HIGH_BYTE_range jz direct_unroll DIRECT_1 .endm DIRECT_2 DIRECT_2 DIRECT_2 DIRECT_2 DIRECT_2 DIRECT_2 DIRECT_2 DIRECT_2 direct_unroll: NORM_2 DIRECT_1 DIRECT_1 DIRECT_1 DIRECT_1 DIRECT_1 DIRECT_1 DIRECT_1 DIRECT_1 jmp direct_unroll #endif MY_ALIGN_FOR_ENTRY direct_end: shl sym, kNumAlignBits REV_0 prob_reg REV_1 prob_reg, 2 REV_1 prob_reg, 4 REV_2 prob_reg, 8 decode_dist_end: // if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) tst checkDicSize, checkDicSize csel t0, processedPos, checkDicSize, eq cmp sym, t0 jae end_of_payload // jmp end_of_payload # for debug mov rep3, rep2 mov rep2, rep1 mov rep1, rep0 add rep0, sym, 1 .macro STATE_UPDATE_FOR_MATCH // state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; // cmp state, (kNumStates + kNumLitStates) * PMULT cmp state, kNumLitStates * PMULT + (1 << FLAG_STATE_BITS) mov state, kNumLitStates * PMULT mov t0, (kNumLitStates + 3) * PMULT cmovae state, t0 .endm STATE_UPDATE_FOR_MATCH # ---------- COPY MATCH ---------- copy_match: // if ((rem = limit - dicPos) == 0) break // return SZ_ERROR_DATA; subs cnt_R, limit, dicPos // jz fin_dicPos_LIMIT jz fin_OK // curLen = ((rem < len) ? (unsigned)rem : len); cmp cnt_R, len_R cmovae cnt, len sub t0_R, dicPos, dic p2_add dicPos, cnt_R p2_add processedPos, cnt p2_sub len, cnt // pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); p2_sub_s t0_R, rep0_R jae 1f cmn t0_R, cnt_R p2_add t0_R, dicBufSize ja copy_match_cross 1: # ---------- COPY MATCH FAST ---------- # t0_R : src_pos p2_add t0_R, dic ldrb sym, [t0_R] p2_add t0_R, cnt_R p1_neg cnt_R copy_common: dec dicPos # dicPos : (ptr_to_last_dest_BYTE) # t0_R : (src_lim) # cnt_R : (-curLen) IsMatchBranch_Pre inc_s cnt_R jz copy_end cmp rep0, 1 je copy_match_0 #ifdef LZMA_USE_2BYTES_COPY strb sym, [dicPos, cnt_R] dec dicPos # dicPos : (ptr_to_last_dest_16bitWORD) p2_and cnt_R, -2 ldrh sym, [t0_R, cnt_R] adds cnt_R, cnt_R, 2 jz 2f MY_ALIGN_FOR_LOOP 1: /* strh sym, [dicPos, cnt_R] ldrh sym, [t0_R, cnt_R] adds cnt_R, cnt_R, 2 jz 2f */ strh sym, [dicPos, cnt_R] ldrh sym, [t0_R, cnt_R] adds cnt_R, cnt_R, 2 jnz 1b 2: /* // for universal little/big endian code, but slow strh sym, [dicPos] inc dicPos ldrb sym, [t0_R, -1] */ #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ // we must improve big-endian detection for another compilers // for big-endian we need to revert bytes rev16 sym, sym #endif // (sym) must represent as little-endian here: strb sym, [dicPos], 1 shr sym, 8 #else MY_ALIGN_FOR_LOOP 1: strb sym, [dicPos, cnt_R] ldrb sym, [t0_R, cnt_R] inc_s cnt_R jz copy_end strb sym, [dicPos, cnt_R] ldrb sym, [t0_R, cnt_R] inc_s cnt_R jnz 1b #endif copy_end: lz_end_match: strb sym, [dicPos], 1 # IsMatchBranch_Pre CheckLimits lz_end: IF_BIT_1_NOUP probs_state, pbPos_R, (IsMatch - IsMatch), IsMatch_label # ---------- LITERAL MATCHED ---------- LIT_PROBS // matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; sub t0_R, dicPos, dic p2_sub_s t0_R, rep0_R #ifdef LZMA_USE_CMOV_LZ_WRAP add t1_R, t0_R, dicBufSize cmovb t0_R, t1_R #else jae 1f p2_add t0_R, dicBufSize 1: #endif ldrb match, [dic, t0_R] // state -= (state < 10) ? 3 : 6; sub sym, state, 6 * PMULT cmp state, 10 * PMULT p2_sub state, 3 * PMULT cmovae state, sym #ifdef _LZMA_SIZE_OPT mov offs, 256 * PMULT shl match, (PSHIFT + 1) mov sym, 1 and bit, match, offs add prm, probs, offs_R MY_ALIGN_FOR_LOOP litm_loop: LITM tbz sym, 8, litm_loop #else LITM_0 LITM LITM LITM LITM LITM LITM LITM_2 #endif IsMatchBranch_Pre strb sym, [dicPos], 1 p2_and sym, 255 // mov len, wzr // LITM uses same regisetr (len / offs). So we clear it CheckLimits_lit lit_matched_end: IF_BIT_1_NOUP probs_state, pbPos_R, (IsMatch - IsMatch), IsMatch_label # IsMatchBranch p2_sub state, 3 * PMULT jmp lit_start_2 # ---------- REP 0 LITERAL ---------- MY_ALIGN_FOR_ENTRY IsRep0Short_label: UPDATE_0 probs_state, pbPos_R, 0 // dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; sub t0_R, dicPos, dic // state = state < kNumLitStates ? 9 : 11; or state, 1 * PMULT # the caller doesn't allow (dicPos >= limit) case for REP_SHORT # so we don't need the following (dicPos == limit) check here: # cmp dicPos, limit # jae fin_dicPos_LIMIT_REP_SHORT # // jmp fin_dicPos_LIMIT_REP_SHORT // for testing/debug puposes inc processedPos IsMatchBranch_Pre p2_sub_s t0_R, rep0_R #ifdef LZMA_USE_CMOV_LZ_WRAP add sym_R, t0_R, dicBufSize cmovb t0_R, sym_R #else jae 1f p2_add t0_R, dicBufSize 1: #endif ldrb sym, [dic, t0_R] // mov len, wzr jmp lz_end_match MY_ALIGN_FOR_ENTRY IsRep_label: UPDATE_1 probs_state, 0, (IsRep - IsMatch) # The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode. # So we don't check it here. # mov t0, processedPos # or t0, checkDicSize # jz fin_ERROR_2 // state = state < kNumLitStates ? 8 : 11; cmp state, kNumLitStates * PMULT mov state, 8 * PMULT mov probBranch, 11 * PMULT cmovae state, probBranch SET_probs RepLenCoder IF_BIT_1 probs_state, 0, (IsRepG0 - IsMatch), IsRepG0_label sub_big probs_state, probs_state, (IsMatch - IsRep0Long) << PSHIFT IF_BIT_0_NOUP probs_state, pbPos_R, 0, IsRep0Short_label UPDATE_1 probs_state, pbPos_R, 0 jmp len_decode MY_ALIGN_FOR_ENTRY IsRepG0_label: UPDATE_1 probs_state, 0, (IsRepG0 - IsMatch) IF_BIT_1 probs_state, 0, (IsRepG1 - IsMatch), IsRepG1_label mov dist, rep1 mov rep1, rep0 mov rep0, dist jmp len_decode # MY_ALIGN_FOR_ENTRY IsRepG1_label: UPDATE_1 probs_state, 0, (IsRepG1 - IsMatch) IF_BIT_1 probs_state, 0, (IsRepG2 - IsMatch), IsRepG2_label mov dist, rep2 mov rep2, rep1 mov rep1, rep0 mov rep0, dist jmp len_decode # MY_ALIGN_FOR_ENTRY IsRepG2_label: UPDATE_1 probs_state, 0, (IsRepG2 - IsMatch) mov dist, rep3 mov rep3, rep2 mov rep2, rep1 mov rep1, rep0 mov rep0, dist jmp len_decode # ---------- SPEC SHORT DISTANCE ---------- MY_ALIGN_FOR_ENTRY short_dist: p2_sub_s numBits, 32 + 1 jbe decode_dist_end or sym, 2 shl sym, numBits add sym_R, probs_Spec, sym_R, lsl #PSHIFT p2_add sym_R, SpecPos * PMULT + 1 * PMULT mov sym2, PMULT // # step MY_ALIGN_FOR_LOOP spec_loop: REV_1_VAR prob_reg dec_s numBits jnz spec_loop p2_add sym2_R, probs_Spec .if SpecPos != 0 p2_add sym2_R, SpecPos * PMULT .endif p2_sub sym_R, sym2_R shr sym, PSHIFT jmp decode_dist_end # ---------- COPY MATCH 0 ---------- MY_ALIGN_FOR_ENTRY copy_match_0: #ifdef LZMA_USE_4BYTES_FILL strb sym, [dicPos, cnt_R] inc_s cnt_R jz copy_end strb sym, [dicPos, cnt_R] inc_s cnt_R jz copy_end strb sym, [dicPos, cnt_R] inc_s cnt_R jz copy_end orr t3, sym, sym, lsl 8 p2_and cnt_R, -4 orr t3, t3, t3, lsl 16 MY_ALIGN_FOR_LOOP_16 1: /* str t3, [dicPos, cnt_R] adds cnt_R, cnt_R, 4 jz 2f */ str t3, [dicPos, cnt_R] adds cnt_R, cnt_R, 4 jnz 1b 2: // p2_and sym, 255 #else MY_ALIGN_FOR_LOOP 1: strb sym, [dicPos, cnt_R] inc_s cnt_R jz copy_end strb sym, [dicPos, cnt_R] inc_s cnt_R jnz 1b #endif jmp copy_end # ---------- COPY MATCH CROSS ---------- copy_match_cross: # t0_R - src pos # cnt_R - total copy len p1_neg cnt_R 1: ldrb sym, [dic, t0_R] inc t0_R strb sym, [dicPos, cnt_R] inc cnt_R cmp t0_R, dicBufSize jne 1b ldrb sym, [dic] sub t0_R, dic, cnt_R jmp copy_common /* fin_dicPos_LIMIT_REP_SHORT: mov len, 1 jmp fin_OK */ /* fin_dicPos_LIMIT: jmp fin_OK # For more strict mode we can stop decoding with error # mov sym, 1 # jmp fin */ fin_ERROR_MATCH_DIST: # rep0 = distance + 1; p2_add len, kMatchSpecLen_Error_Data mov rep3, rep2 mov rep2, rep1 mov rep1, rep0 mov rep0, sym STATE_UPDATE_FOR_MATCH # jmp fin_OK mov sym, 1 jmp fin end_of_payload: inc_s sym jnz fin_ERROR_MATCH_DIST mov len, kMatchSpecLenStart xor state, (1 << FLAG_STATE_BITS) jmp fin_OK /* fin_OK_lit: mov len, wzr */ fin_OK: mov sym, wzr fin: NORM #define fin_lzma_reg t0_R .macro STORE_LZMA_VAR reg:req, struct_offs:req str \reg, [fin_lzma_reg, \struct_offs] .endm .macro STORE_LZMA_PAIR reg0:req, reg1:req, struct_offs:req stp \reg0, \reg1, [fin_lzma_reg, \struct_offs] .endm ldr fin_lzma_reg, [sp, 120] p2_sub dicPos, dic shr state, PSHIFT STORE_LZMA_PAIR dicPos, buf, offset_dicPos STORE_LZMA_PAIR range, cod, offset_range STORE_LZMA_VAR processedPos, offset_processedPos STORE_LZMA_PAIR rep0, rep1, offset_rep0 STORE_LZMA_PAIR rep2, rep3, offset_rep2 STORE_LZMA_PAIR state, len, offset_state mov w0, sym ldp x29, x30, [sp, 80] ldp x27, x28, [sp, 64] ldp x25, x26, [sp, 48] ldp x23, x24, [sp, 32] ldp x21, x22, [sp, 16] ldp x19, x20, [sp], 128 ret /* .cfi_endproc .LFE0: .size LzmaDec_DecodeReal_3, .-LzmaDec_DecodeReal_3 .ident "TAG_LZMA" .section .note.GNU-stack,"",@progbits */