9e052883 |
1 | ; 7zCrcOpt.asm -- CRC32 calculation : optimized version\r |
2 | ; 2021-02-07 : Igor Pavlov : Public domain\r |
3 | \r |
4 | include 7zAsm.asm\r |
5 | \r |
6 | MY_ASM_START\r |
7 | \r |
8 | rD equ r2\r |
9 | rN equ r7\r |
10 | rT equ r5\r |
11 | \r |
12 | ifdef x64\r |
13 | num_VAR equ r8\r |
14 | table_VAR equ r9\r |
15 | else\r |
16 | if (IS_CDECL gt 0)\r |
17 | crc_OFFS equ (REG_SIZE * 5)\r |
18 | data_OFFS equ (REG_SIZE + crc_OFFS)\r |
19 | size_OFFS equ (REG_SIZE + data_OFFS)\r |
20 | else\r |
21 | size_OFFS equ (REG_SIZE * 5)\r |
22 | endif\r |
23 | table_OFFS equ (REG_SIZE + size_OFFS)\r |
24 | num_VAR equ [r4 + size_OFFS]\r |
25 | table_VAR equ [r4 + table_OFFS]\r |
26 | endif\r |
27 | \r |
28 | SRCDAT equ rD + rN * 1 + 4 *\r |
29 | \r |
30 | CRC macro op:req, dest:req, src:req, t:req\r |
31 | op dest, DWORD PTR [rT + src * 4 + 0400h * t]\r |
32 | endm\r |
33 | \r |
34 | CRC_XOR macro dest:req, src:req, t:req\r |
35 | CRC xor, dest, src, t\r |
36 | endm\r |
37 | \r |
38 | CRC_MOV macro dest:req, src:req, t:req\r |
39 | CRC mov, dest, src, t\r |
40 | endm\r |
41 | \r |
42 | CRC1b macro\r |
43 | movzx x6, BYTE PTR [rD]\r |
44 | inc rD\r |
45 | movzx x3, x0_L\r |
46 | xor x6, x3\r |
47 | shr x0, 8\r |
48 | CRC xor, x0, r6, 0\r |
49 | dec rN\r |
50 | endm\r |
51 | \r |
52 | MY_PROLOG macro crc_end:req\r |
53 | \r |
54 | ifdef x64\r |
55 | if (IS_LINUX gt 0)\r |
56 | MY_PUSH_2_REGS\r |
57 | mov x0, REG_ABI_PARAM_0_x ; x0 = x7\r |
58 | mov rT, REG_ABI_PARAM_3 ; r5 = r1\r |
59 | mov rN, REG_ABI_PARAM_2 ; r7 = r2\r |
60 | mov rD, REG_ABI_PARAM_1 ; r2 = r6\r |
61 | else\r |
62 | MY_PUSH_4_REGS\r |
63 | mov x0, REG_ABI_PARAM_0_x ; x0 = x1\r |
64 | mov rT, REG_ABI_PARAM_3 ; r5 = r9\r |
65 | mov rN, REG_ABI_PARAM_2 ; r7 = r8\r |
66 | ; mov rD, REG_ABI_PARAM_1 ; r2 = r2\r |
67 | endif\r |
68 | else\r |
69 | MY_PUSH_4_REGS\r |
70 | if (IS_CDECL gt 0)\r |
71 | mov x0, [r4 + crc_OFFS]\r |
72 | mov rD, [r4 + data_OFFS]\r |
73 | else\r |
74 | mov x0, REG_ABI_PARAM_0_x\r |
75 | endif\r |
76 | mov rN, num_VAR\r |
77 | mov rT, table_VAR\r |
78 | endif\r |
79 | \r |
80 | test rN, rN\r |
81 | jz crc_end\r |
82 | @@:\r |
83 | test rD, 7\r |
84 | jz @F\r |
85 | CRC1b\r |
86 | jnz @B\r |
87 | @@:\r |
88 | cmp rN, 16\r |
89 | jb crc_end\r |
90 | add rN, rD\r |
91 | mov num_VAR, rN\r |
92 | sub rN, 8\r |
93 | and rN, NOT 7\r |
94 | sub rD, rN\r |
95 | xor x0, [SRCDAT 0]\r |
96 | endm\r |
97 | \r |
98 | MY_EPILOG macro crc_end:req\r |
99 | xor x0, [SRCDAT 0]\r |
100 | mov rD, rN\r |
101 | mov rN, num_VAR\r |
102 | sub rN, rD\r |
103 | crc_end:\r |
104 | test rN, rN\r |
105 | jz @F\r |
106 | CRC1b\r |
107 | jmp crc_end\r |
108 | @@:\r |
109 | if (IS_X64 gt 0) and (IS_LINUX gt 0)\r |
110 | MY_POP_2_REGS\r |
111 | else\r |
112 | MY_POP_4_REGS\r |
113 | endif\r |
114 | endm\r |
115 | \r |
116 | MY_PROC CrcUpdateT8, 4\r |
117 | MY_PROLOG crc_end_8\r |
118 | mov x1, [SRCDAT 1]\r |
119 | align 16\r |
120 | main_loop_8:\r |
121 | mov x6, [SRCDAT 2]\r |
122 | movzx x3, x1_L\r |
123 | CRC_XOR x6, r3, 3\r |
124 | movzx x3, x1_H\r |
125 | CRC_XOR x6, r3, 2\r |
126 | shr x1, 16\r |
127 | movzx x3, x1_L\r |
128 | movzx x1, x1_H\r |
129 | CRC_XOR x6, r3, 1\r |
130 | movzx x3, x0_L\r |
131 | CRC_XOR x6, r1, 0\r |
132 | \r |
133 | mov x1, [SRCDAT 3]\r |
134 | CRC_XOR x6, r3, 7\r |
135 | movzx x3, x0_H\r |
136 | shr x0, 16\r |
137 | CRC_XOR x6, r3, 6\r |
138 | movzx x3, x0_L\r |
139 | CRC_XOR x6, r3, 5\r |
140 | movzx x3, x0_H\r |
141 | CRC_MOV x0, r3, 4\r |
142 | xor x0, x6\r |
143 | add rD, 8\r |
144 | jnz main_loop_8\r |
145 | \r |
146 | MY_EPILOG crc_end_8\r |
147 | MY_ENDP\r |
148 | \r |
149 | MY_PROC CrcUpdateT4, 4\r |
150 | MY_PROLOG crc_end_4\r |
151 | align 16\r |
152 | main_loop_4:\r |
153 | movzx x1, x0_L\r |
154 | movzx x3, x0_H\r |
155 | shr x0, 16\r |
156 | movzx x6, x0_H\r |
157 | and x0, 0FFh\r |
158 | CRC_MOV x1, r1, 3\r |
159 | xor x1, [SRCDAT 1]\r |
160 | CRC_XOR x1, r3, 2\r |
161 | CRC_XOR x1, r6, 0\r |
162 | CRC_XOR x1, r0, 1\r |
163 | \r |
164 | movzx x0, x1_L\r |
165 | movzx x3, x1_H\r |
166 | shr x1, 16\r |
167 | movzx x6, x1_H\r |
168 | and x1, 0FFh\r |
169 | CRC_MOV x0, r0, 3\r |
170 | xor x0, [SRCDAT 2]\r |
171 | CRC_XOR x0, r3, 2\r |
172 | CRC_XOR x0, r6, 0\r |
173 | CRC_XOR x0, r1, 1\r |
174 | add rD, 8\r |
175 | jnz main_loop_4\r |
176 | \r |
177 | MY_EPILOG crc_end_4\r |
178 | MY_ENDP\r |
179 | \r |
180 | end\r |