; XzCrc64Opt.asm -- CRC64 calculation : optimized version ; 2021-02-06 : Igor Pavlov : Public domain include 7zAsm.asm MY_ASM_START ifdef x64 rD equ r9 rN equ r10 rT equ r5 num_VAR equ r8 SRCDAT4 equ dword ptr [rD + rN * 1] CRC_XOR macro dest:req, src:req, t:req xor dest, QWORD PTR [rT + src * 8 + 0800h * t] endm CRC1b macro movzx x6, BYTE PTR [rD] inc rD movzx x3, x0_L xor x6, x3 shr r0, 8 CRC_XOR r0, r6, 0 dec rN endm MY_PROLOG macro crc_end:req ifdef ABI_LINUX MY_PUSH_2_REGS else MY_PUSH_4_REGS endif mov r0, REG_ABI_PARAM_0 mov rN, REG_ABI_PARAM_2 mov rT, REG_ABI_PARAM_3 mov rD, REG_ABI_PARAM_1 test rN, rN jz crc_end @@: test rD, 3 jz @F CRC1b jnz @B @@: cmp rN, 8 jb crc_end add rN, rD mov num_VAR, rN sub rN, 4 and rN, NOT 3 sub rD, rN mov x1, SRCDAT4 xor r0, r1 add rN, 4 endm MY_EPILOG macro crc_end:req sub rN, 4 mov x1, SRCDAT4 xor r0, r1 mov rD, rN mov rN, num_VAR sub rN, rD crc_end: test rN, rN jz @F CRC1b jmp crc_end @@: ifdef ABI_LINUX MY_POP_2_REGS else MY_POP_4_REGS endif endm MY_PROC XzCrc64UpdateT4, 4 MY_PROLOG crc_end_4 align 16 main_loop_4: mov x1, SRCDAT4 movzx x2, x0_L movzx x3, x0_H shr r0, 16 movzx x6, x0_L movzx x7, x0_H shr r0, 16 CRC_XOR r1, r2, 3 CRC_XOR r0, r3, 2 CRC_XOR r1, r6, 1 CRC_XOR r0, r7, 0 xor r0, r1 add rD, 4 jnz main_loop_4 MY_EPILOG crc_end_4 MY_ENDP else ; x86 (32-bit) rD equ r1 rN equ r7 rT equ r5 crc_OFFS equ (REG_SIZE * 5) if (IS_CDECL gt 0) or (IS_LINUX gt 0) ; cdecl or (GNU fastcall) stack: ; (UInt32 *) table ; size_t size ; void * data ; (UInt64) crc ; ret-ip <-(r4) data_OFFS equ (8 + crc_OFFS) size_OFFS equ (REG_SIZE + data_OFFS) table_OFFS equ (REG_SIZE + size_OFFS) num_VAR equ [r4 + size_OFFS] table_VAR equ [r4 + table_OFFS] else ; Windows fastcall: ; r1 = data, r2 = size ; stack: ; (UInt32 *) table ; (UInt64) crc ; ret-ip <-(r4) table_OFFS equ (8 + crc_OFFS) table_VAR equ [r4 + table_OFFS] num_VAR equ table_VAR endif SRCDAT4 equ dword ptr [rD + rN * 1] CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req op0 dest0, DWORD PTR [rT + src * 8 + 0800h * t] op1 dest1, DWORD PTR [rT + src * 8 + 0800h * t + 4] endm CRC_XOR macro dest0:req, dest1:req, src:req, t:req CRC xor, xor, dest0, dest1, src, t endm CRC1b macro movzx x6, BYTE PTR [rD] inc rD movzx x3, x0_L xor x6, x3 shrd r0, r2, 8 shr r2, 8 CRC_XOR r0, r2, r6, 0 dec rN endm MY_PROLOG macro crc_end:req MY_PUSH_4_REGS if (IS_CDECL gt 0) or (IS_LINUX gt 0) proc_numParams = proc_numParams + 2 ; for ABI_LINUX mov rN, [r4 + size_OFFS] mov rD, [r4 + data_OFFS] else mov rN, r2 endif mov x0, [r4 + crc_OFFS] mov x2, [r4 + crc_OFFS + 4] mov rT, table_VAR test rN, rN jz crc_end @@: test rD, 3 jz @F CRC1b jnz @B @@: cmp rN, 8 jb crc_end add rN, rD mov num_VAR, rN sub rN, 4 and rN, NOT 3 sub rD, rN xor r0, SRCDAT4 add rN, 4 endm MY_EPILOG macro crc_end:req sub rN, 4 xor r0, SRCDAT4 mov rD, rN mov rN, num_VAR sub rN, rD crc_end: test rN, rN jz @F CRC1b jmp crc_end @@: MY_POP_4_REGS endm MY_PROC XzCrc64UpdateT4, 5 MY_PROLOG crc_end_4 movzx x6, x0_L align 16 main_loop_4: mov r3, SRCDAT4 xor r3, r2 CRC xor, mov, r3, r2, r6, 3 movzx x6, x0_H shr r0, 16 CRC_XOR r3, r2, r6, 2 movzx x6, x0_L movzx x0, x0_H CRC_XOR r3, r2, r6, 1 CRC_XOR r3, r2, r0, 0 movzx x6, x3_L mov r0, r3 add rD, 4 jnz main_loop_4 MY_EPILOG crc_end_4 MY_ENDP endif ; ! x64 end