--- /dev/null
+; 7zCrcOpt.asm -- CRC32 calculation : optimized version\r
+; 2021-02-07 : Igor Pavlov : Public domain\r
+\r
+include 7zAsm.asm\r
+\r
+MY_ASM_START\r
+\r
+rD equ r2\r
+rN equ r7\r
+rT equ r5\r
+\r
+ifdef x64\r
+ num_VAR equ r8\r
+ table_VAR equ r9\r
+else\r
+ if (IS_CDECL gt 0)\r
+ crc_OFFS equ (REG_SIZE * 5)\r
+ data_OFFS equ (REG_SIZE + crc_OFFS)\r
+ size_OFFS equ (REG_SIZE + data_OFFS)\r
+ else\r
+ size_OFFS equ (REG_SIZE * 5)\r
+ endif\r
+ table_OFFS equ (REG_SIZE + size_OFFS)\r
+ num_VAR equ [r4 + size_OFFS]\r
+ table_VAR equ [r4 + table_OFFS]\r
+endif\r
+\r
+SRCDAT equ rD + rN * 1 + 4 *\r
+\r
+CRC macro op:req, dest:req, src:req, t:req\r
+ op dest, DWORD PTR [rT + src * 4 + 0400h * t]\r
+endm\r
+\r
+CRC_XOR macro dest:req, src:req, t:req\r
+ CRC xor, dest, src, t\r
+endm\r
+\r
+CRC_MOV macro dest:req, src:req, t:req\r
+ CRC mov, dest, src, t\r
+endm\r
+\r
+CRC1b macro\r
+ movzx x6, BYTE PTR [rD]\r
+ inc rD\r
+ movzx x3, x0_L\r
+ xor x6, x3\r
+ shr x0, 8\r
+ CRC xor, x0, r6, 0\r
+ dec rN\r
+endm\r
+\r
+MY_PROLOG macro crc_end:req\r
+\r
+ ifdef x64\r
+ if (IS_LINUX gt 0)\r
+ MY_PUSH_2_REGS\r
+ mov x0, REG_ABI_PARAM_0_x ; x0 = x7\r
+ mov rT, REG_ABI_PARAM_3 ; r5 = r1\r
+ mov rN, REG_ABI_PARAM_2 ; r7 = r2\r
+ mov rD, REG_ABI_PARAM_1 ; r2 = r6\r
+ else\r
+ MY_PUSH_4_REGS\r
+ mov x0, REG_ABI_PARAM_0_x ; x0 = x1\r
+ mov rT, REG_ABI_PARAM_3 ; r5 = r9\r
+ mov rN, REG_ABI_PARAM_2 ; r7 = r8\r
+ ; mov rD, REG_ABI_PARAM_1 ; r2 = r2\r
+ endif\r
+ else\r
+ MY_PUSH_4_REGS\r
+ if (IS_CDECL gt 0)\r
+ mov x0, [r4 + crc_OFFS]\r
+ mov rD, [r4 + data_OFFS]\r
+ else\r
+ mov x0, REG_ABI_PARAM_0_x\r
+ endif\r
+ mov rN, num_VAR\r
+ mov rT, table_VAR\r
+ endif\r
+ \r
+ test rN, rN\r
+ jz crc_end\r
+ @@:\r
+ test rD, 7\r
+ jz @F\r
+ CRC1b\r
+ jnz @B\r
+ @@:\r
+ cmp rN, 16\r
+ jb crc_end\r
+ add rN, rD\r
+ mov num_VAR, rN\r
+ sub rN, 8\r
+ and rN, NOT 7\r
+ sub rD, rN\r
+ xor x0, [SRCDAT 0]\r
+endm\r
+\r
+MY_EPILOG macro crc_end:req\r
+ xor x0, [SRCDAT 0]\r
+ mov rD, rN\r
+ mov rN, num_VAR\r
+ sub rN, rD\r
+ crc_end:\r
+ test rN, rN\r
+ jz @F\r
+ CRC1b\r
+ jmp crc_end\r
+ @@:\r
+ if (IS_X64 gt 0) and (IS_LINUX gt 0)\r
+ MY_POP_2_REGS\r
+ else\r
+ MY_POP_4_REGS\r
+ endif\r
+endm\r
+\r
+MY_PROC CrcUpdateT8, 4\r
+ MY_PROLOG crc_end_8\r
+ mov x1, [SRCDAT 1]\r
+ align 16\r
+ main_loop_8:\r
+ mov x6, [SRCDAT 2]\r
+ movzx x3, x1_L\r
+ CRC_XOR x6, r3, 3\r
+ movzx x3, x1_H\r
+ CRC_XOR x6, r3, 2\r
+ shr x1, 16\r
+ movzx x3, x1_L\r
+ movzx x1, x1_H\r
+ CRC_XOR x6, r3, 1\r
+ movzx x3, x0_L\r
+ CRC_XOR x6, r1, 0\r
+\r
+ mov x1, [SRCDAT 3]\r
+ CRC_XOR x6, r3, 7\r
+ movzx x3, x0_H\r
+ shr x0, 16\r
+ CRC_XOR x6, r3, 6\r
+ movzx x3, x0_L\r
+ CRC_XOR x6, r3, 5\r
+ movzx x3, x0_H\r
+ CRC_MOV x0, r3, 4\r
+ xor x0, x6\r
+ add rD, 8\r
+ jnz main_loop_8\r
+\r
+ MY_EPILOG crc_end_8\r
+MY_ENDP\r
+\r
+MY_PROC CrcUpdateT4, 4\r
+ MY_PROLOG crc_end_4\r
+ align 16\r
+ main_loop_4:\r
+ movzx x1, x0_L\r
+ movzx x3, x0_H\r
+ shr x0, 16\r
+ movzx x6, x0_H\r
+ and x0, 0FFh\r
+ CRC_MOV x1, r1, 3\r
+ xor x1, [SRCDAT 1]\r
+ CRC_XOR x1, r3, 2\r
+ CRC_XOR x1, r6, 0\r
+ CRC_XOR x1, r0, 1\r
+ \r
+ movzx x0, x1_L\r
+ movzx x3, x1_H\r
+ shr x1, 16\r
+ movzx x6, x1_H\r
+ and x1, 0FFh\r
+ CRC_MOV x0, r0, 3\r
+ xor x0, [SRCDAT 2]\r
+ CRC_XOR x0, r3, 2\r
+ CRC_XOR x0, r6, 0\r
+ CRC_XOR x0, r1, 1\r
+ add rD, 8\r
+ jnz main_loop_4\r
+\r
+ MY_EPILOG crc_end_4\r
+MY_ENDP\r
+\r
+end\r