9e052883 |
1 | ; XzCrc64Opt.asm -- CRC64 calculation : optimized version\r |
2 | ; 2021-02-06 : Igor Pavlov : Public domain\r |
3 | \r |
4 | include 7zAsm.asm\r |
5 | \r |
6 | MY_ASM_START\r |
7 | \r |
8 | ifdef x64\r |
9 | \r |
10 | rD equ r9\r |
11 | rN equ r10\r |
12 | rT equ r5\r |
13 | num_VAR equ r8\r |
14 | \r |
15 | SRCDAT4 equ dword ptr [rD + rN * 1]\r |
16 | \r |
17 | CRC_XOR macro dest:req, src:req, t:req\r |
18 | xor dest, QWORD PTR [rT + src * 8 + 0800h * t]\r |
19 | endm\r |
20 | \r |
21 | CRC1b macro\r |
22 | movzx x6, BYTE PTR [rD]\r |
23 | inc rD\r |
24 | movzx x3, x0_L\r |
25 | xor x6, x3\r |
26 | shr r0, 8\r |
27 | CRC_XOR r0, r6, 0\r |
28 | dec rN\r |
29 | endm\r |
30 | \r |
31 | MY_PROLOG macro crc_end:req\r |
32 | ifdef ABI_LINUX\r |
33 | MY_PUSH_2_REGS\r |
34 | else\r |
35 | MY_PUSH_4_REGS\r |
36 | endif\r |
37 | mov r0, REG_ABI_PARAM_0\r |
38 | mov rN, REG_ABI_PARAM_2\r |
39 | mov rT, REG_ABI_PARAM_3\r |
40 | mov rD, REG_ABI_PARAM_1\r |
41 | test rN, rN\r |
42 | jz crc_end\r |
43 | @@:\r |
44 | test rD, 3\r |
45 | jz @F\r |
46 | CRC1b\r |
47 | jnz @B\r |
48 | @@:\r |
49 | cmp rN, 8\r |
50 | jb crc_end\r |
51 | add rN, rD\r |
52 | mov num_VAR, rN\r |
53 | sub rN, 4\r |
54 | and rN, NOT 3\r |
55 | sub rD, rN\r |
56 | mov x1, SRCDAT4\r |
57 | xor r0, r1\r |
58 | add rN, 4\r |
59 | endm\r |
60 | \r |
61 | MY_EPILOG macro crc_end:req\r |
62 | sub rN, 4\r |
63 | mov x1, SRCDAT4\r |
64 | xor r0, r1\r |
65 | mov rD, rN\r |
66 | mov rN, num_VAR\r |
67 | sub rN, rD\r |
68 | crc_end:\r |
69 | test rN, rN\r |
70 | jz @F\r |
71 | CRC1b\r |
72 | jmp crc_end\r |
73 | @@:\r |
74 | ifdef ABI_LINUX\r |
75 | MY_POP_2_REGS\r |
76 | else\r |
77 | MY_POP_4_REGS\r |
78 | endif\r |
79 | endm\r |
80 | \r |
81 | MY_PROC XzCrc64UpdateT4, 4\r |
82 | MY_PROLOG crc_end_4\r |
83 | align 16\r |
84 | main_loop_4:\r |
85 | mov x1, SRCDAT4\r |
86 | movzx x2, x0_L\r |
87 | movzx x3, x0_H\r |
88 | shr r0, 16\r |
89 | movzx x6, x0_L\r |
90 | movzx x7, x0_H\r |
91 | shr r0, 16\r |
92 | CRC_XOR r1, r2, 3\r |
93 | CRC_XOR r0, r3, 2\r |
94 | CRC_XOR r1, r6, 1\r |
95 | CRC_XOR r0, r7, 0\r |
96 | xor r0, r1\r |
97 | \r |
98 | add rD, 4\r |
99 | jnz main_loop_4\r |
100 | \r |
101 | MY_EPILOG crc_end_4\r |
102 | MY_ENDP\r |
103 | \r |
104 | else\r |
105 | ; x86 (32-bit)\r |
106 | \r |
107 | rD equ r1\r |
108 | rN equ r7\r |
109 | rT equ r5\r |
110 | \r |
111 | crc_OFFS equ (REG_SIZE * 5)\r |
112 | \r |
113 | if (IS_CDECL gt 0) or (IS_LINUX gt 0)\r |
114 | ; cdecl or (GNU fastcall) stack:\r |
115 | ; (UInt32 *) table\r |
116 | ; size_t size\r |
117 | ; void * data\r |
118 | ; (UInt64) crc\r |
119 | ; ret-ip <-(r4)\r |
120 | data_OFFS equ (8 + crc_OFFS)\r |
121 | size_OFFS equ (REG_SIZE + data_OFFS)\r |
122 | table_OFFS equ (REG_SIZE + size_OFFS)\r |
123 | num_VAR equ [r4 + size_OFFS]\r |
124 | table_VAR equ [r4 + table_OFFS]\r |
125 | else\r |
126 | ; Windows fastcall:\r |
127 | ; r1 = data, r2 = size\r |
128 | ; stack:\r |
129 | ; (UInt32 *) table\r |
130 | ; (UInt64) crc\r |
131 | ; ret-ip <-(r4)\r |
132 | table_OFFS equ (8 + crc_OFFS)\r |
133 | table_VAR equ [r4 + table_OFFS]\r |
134 | num_VAR equ table_VAR\r |
135 | endif\r |
136 | \r |
137 | SRCDAT4 equ dword ptr [rD + rN * 1]\r |
138 | \r |
139 | CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req\r |
140 | op0 dest0, DWORD PTR [rT + src * 8 + 0800h * t]\r |
141 | op1 dest1, DWORD PTR [rT + src * 8 + 0800h * t + 4]\r |
142 | endm\r |
143 | \r |
144 | CRC_XOR macro dest0:req, dest1:req, src:req, t:req\r |
145 | CRC xor, xor, dest0, dest1, src, t\r |
146 | endm\r |
147 | \r |
148 | \r |
149 | CRC1b macro\r |
150 | movzx x6, BYTE PTR [rD]\r |
151 | inc rD\r |
152 | movzx x3, x0_L\r |
153 | xor x6, x3\r |
154 | shrd r0, r2, 8\r |
155 | shr r2, 8\r |
156 | CRC_XOR r0, r2, r6, 0\r |
157 | dec rN\r |
158 | endm\r |
159 | \r |
160 | MY_PROLOG macro crc_end:req\r |
161 | MY_PUSH_4_REGS\r |
162 | \r |
163 | if (IS_CDECL gt 0) or (IS_LINUX gt 0)\r |
164 | proc_numParams = proc_numParams + 2 ; for ABI_LINUX\r |
165 | mov rN, [r4 + size_OFFS]\r |
166 | mov rD, [r4 + data_OFFS]\r |
167 | else\r |
168 | mov rN, r2\r |
169 | endif\r |
170 | \r |
171 | mov x0, [r4 + crc_OFFS]\r |
172 | mov x2, [r4 + crc_OFFS + 4]\r |
173 | mov rT, table_VAR\r |
174 | test rN, rN\r |
175 | jz crc_end\r |
176 | @@:\r |
177 | test rD, 3\r |
178 | jz @F\r |
179 | CRC1b\r |
180 | jnz @B\r |
181 | @@:\r |
182 | cmp rN, 8\r |
183 | jb crc_end\r |
184 | add rN, rD\r |
185 | \r |
186 | mov num_VAR, rN\r |
187 | \r |
188 | sub rN, 4\r |
189 | and rN, NOT 3\r |
190 | sub rD, rN\r |
191 | xor r0, SRCDAT4\r |
192 | add rN, 4\r |
193 | endm\r |
194 | \r |
195 | MY_EPILOG macro crc_end:req\r |
196 | sub rN, 4\r |
197 | xor r0, SRCDAT4\r |
198 | \r |
199 | mov rD, rN\r |
200 | mov rN, num_VAR\r |
201 | sub rN, rD\r |
202 | crc_end:\r |
203 | test rN, rN\r |
204 | jz @F\r |
205 | CRC1b\r |
206 | jmp crc_end\r |
207 | @@:\r |
208 | MY_POP_4_REGS\r |
209 | endm\r |
210 | \r |
211 | MY_PROC XzCrc64UpdateT4, 5\r |
212 | MY_PROLOG crc_end_4\r |
213 | movzx x6, x0_L\r |
214 | align 16\r |
215 | main_loop_4:\r |
216 | mov r3, SRCDAT4\r |
217 | xor r3, r2\r |
218 | \r |
219 | CRC xor, mov, r3, r2, r6, 3\r |
220 | movzx x6, x0_H\r |
221 | shr r0, 16\r |
222 | CRC_XOR r3, r2, r6, 2\r |
223 | \r |
224 | movzx x6, x0_L\r |
225 | movzx x0, x0_H\r |
226 | CRC_XOR r3, r2, r6, 1\r |
227 | CRC_XOR r3, r2, r0, 0\r |
228 | movzx x6, x3_L\r |
229 | mov r0, r3\r |
230 | \r |
231 | add rD, 4\r |
232 | jnz main_loop_4\r |
233 | \r |
234 | MY_EPILOG crc_end_4\r |
235 | MY_ENDP\r |
236 | \r |
237 | endif ; ! x64\r |
238 | \r |
239 | end\r |