attempt to fix build
[pcsx_rearmed.git] / deps / libchdr / deps / lzma-22.01 / src / Asm / x86 / LzFindOpt.asm
CommitLineData
9e052883 1; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function\r
2; 2021-07-21: Igor Pavlov : Public domain\r
3;\r
4\r
5ifndef x64\r
6; x64=1\r
7; .err <x64_IS_REQUIRED>\r
8endif\r
9\r
10include 7zAsm.asm\r
11\r
12MY_ASM_START\r
13\r
14_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'\r
15\r
16MY_ALIGN macro num:req\r
17 align num\r
18endm\r
19\r
20MY_ALIGN_32 macro\r
21 MY_ALIGN 32\r
22endm\r
23\r
24MY_ALIGN_64 macro\r
25 MY_ALIGN 64\r
26endm\r
27\r
28\r
29t0_L equ x0_L\r
30t0_x equ x0\r
31t0 equ r0\r
32t1_x equ x3\r
33t1 equ r3\r
34\r
35cp_x equ t1_x\r
36cp_r equ t1\r
37m equ x5\r
38m_r equ r5\r
39len_x equ x6\r
40len equ r6\r
41diff_x equ x7\r
42diff equ r7\r
43len0 equ r10\r
44len1_x equ x11\r
45len1 equ r11\r
46maxLen_x equ x12\r
47maxLen equ r12\r
48d equ r13\r
49ptr0 equ r14\r
50ptr1 equ r15\r
51\r
52d_lim equ m_r\r
53cycSize equ len_x\r
54hash_lim equ len0\r
55delta1_x equ len1_x\r
56delta1_r equ len1\r
57delta_x equ maxLen_x\r
58delta_r equ maxLen\r
59hash equ ptr0\r
60src equ ptr1\r
61\r
62\r
63\r
64if (IS_LINUX gt 0)\r
65\r
66; r1 r2 r8 r9 : win32\r
67; r7 r6 r2 r1 r8 r9 : linux\r
68\r
69lenLimit equ r8\r
70lenLimit_x equ x8\r
71; pos_r equ r2\r
72pos equ x2\r
73cur equ r1\r
74son equ r9\r
75\r
76else\r
77\r
78lenLimit equ REG_ABI_PARAM_2\r
79lenLimit_x equ REG_ABI_PARAM_2_x\r
80pos equ REG_ABI_PARAM_1_x\r
81cur equ REG_ABI_PARAM_0\r
82son equ REG_ABI_PARAM_3\r
83\r
84endif\r
85\r
86\r
87if (IS_LINUX gt 0)\r
88 maxLen_OFFS equ (REG_SIZE * (6 + 1))\r
89else\r
90 cutValue_OFFS equ (REG_SIZE * (8 + 1 + 4))\r
91 d_OFFS equ (REG_SIZE + cutValue_OFFS)\r
92 maxLen_OFFS equ (REG_SIZE + d_OFFS)\r
93endif\r
94 hash_OFFS equ (REG_SIZE + maxLen_OFFS)\r
95 limit_OFFS equ (REG_SIZE + hash_OFFS)\r
96 size_OFFS equ (REG_SIZE + limit_OFFS)\r
97 cycPos_OFFS equ (REG_SIZE + size_OFFS)\r
98 cycSize_OFFS equ (REG_SIZE + cycPos_OFFS)\r
99 posRes_OFFS equ (REG_SIZE + cycSize_OFFS)\r
100 \r
101if (IS_LINUX gt 0)\r
102else\r
103 cutValue_PAR equ [r0 + cutValue_OFFS]\r
104 d_PAR equ [r0 + d_OFFS]\r
105endif\r
106 maxLen_PAR equ [r0 + maxLen_OFFS]\r
107 hash_PAR equ [r0 + hash_OFFS]\r
108 limit_PAR equ [r0 + limit_OFFS]\r
109 size_PAR equ [r0 + size_OFFS]\r
110 cycPos_PAR equ [r0 + cycPos_OFFS]\r
111 cycSize_PAR equ [r0 + cycSize_OFFS]\r
112 posRes_PAR equ [r0 + posRes_OFFS]\r
113\r
114\r
115 cutValue_VAR equ DWORD PTR [r4 + 8 * 0]\r
116 cutValueCur_VAR equ DWORD PTR [r4 + 8 * 0 + 4]\r
117 cycPos_VAR equ DWORD PTR [r4 + 8 * 1 + 0]\r
118 cycSize_VAR equ DWORD PTR [r4 + 8 * 1 + 4]\r
119 hash_VAR equ QWORD PTR [r4 + 8 * 2]\r
120 limit_VAR equ QWORD PTR [r4 + 8 * 3]\r
121 size_VAR equ QWORD PTR [r4 + 8 * 4]\r
122 distances equ QWORD PTR [r4 + 8 * 5]\r
123 maxLen_VAR equ QWORD PTR [r4 + 8 * 6]\r
124\r
125 Old_RSP equ QWORD PTR [r4 + 8 * 7]\r
126 LOCAL_SIZE equ 8 * 8\r
127\r
128COPY_VAR_32 macro dest_var, src_var\r
129 mov x3, src_var\r
130 mov dest_var, x3\r
131endm\r
132\r
133COPY_VAR_64 macro dest_var, src_var\r
134 mov r3, src_var\r
135 mov dest_var, r3\r
136endm\r
137\r
138\r
139; MY_ALIGN_64\r
140MY_PROC GetMatchesSpecN_2, 13\r
141MY_PUSH_PRESERVED_ABI_REGS\r
142 mov r0, RSP\r
143 lea r3, [r0 - LOCAL_SIZE]\r
144 and r3, -64\r
145 mov RSP, r3\r
146 mov Old_RSP, r0\r
147\r
148if (IS_LINUX gt 0)\r
149 mov d, REG_ABI_PARAM_5 ; r13 = r9\r
150 mov cutValue_VAR, REG_ABI_PARAM_4_x ; = r8\r
151 mov son, REG_ABI_PARAM_3 ; r9 = r1\r
152 mov r8, REG_ABI_PARAM_2 ; r8 = r2\r
153 mov pos, REG_ABI_PARAM_1_x ; r2 = x6\r
154 mov r1, REG_ABI_PARAM_0 ; r1 = r7\r
155else\r
156 COPY_VAR_32 cutValue_VAR, cutValue_PAR\r
157 mov d, d_PAR\r
158endif\r
159\r
160 COPY_VAR_64 limit_VAR, limit_PAR\r
161 \r
162 mov hash_lim, size_PAR\r
163 mov size_VAR, hash_lim\r
164 \r
165 mov cp_x, cycPos_PAR\r
166 mov hash, hash_PAR\r
167\r
168 mov cycSize, cycSize_PAR\r
169 mov cycSize_VAR, cycSize\r
170 \r
171 ; we want cur in (rcx). So we change the cur and lenLimit variables\r
172 sub lenLimit, cur\r
173 neg lenLimit_x\r
174 inc lenLimit_x\r
175 \r
176 mov t0_x, maxLen_PAR\r
177 sub t0, lenLimit\r
178 mov maxLen_VAR, t0\r
179\r
180 jmp main_loop\r
181\r
182MY_ALIGN_64\r
183fill_empty:\r
184 ; ptr0 = *ptr1 = kEmptyHashValue;\r
185 mov QWORD PTR [ptr1], 0\r
186 inc pos\r
187 inc cp_x\r
188 mov DWORD PTR [d - 4], 0\r
189 cmp d, limit_VAR\r
190 jae fin\r
191 cmp hash, hash_lim\r
192 je fin\r
193\r
194; MY_ALIGN_64\r
195main_loop:\r
196 ; UInt32 delta = *hash++;\r
197 mov diff_x, [hash] ; delta\r
198 add hash, 4\r
199 ; mov cycPos_VAR, cp_x\r
200 \r
201 inc cur\r
202 add d, 4\r
203 mov m, pos\r
204 sub m, diff_x; ; matchPos\r
205 \r
206 ; CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;\r
207 lea ptr1, [son + 8 * cp_r]\r
208 ; mov cycSize, cycSize_VAR\r
209 cmp pos, cycSize\r
210 jb directMode ; if (pos < cycSize_VAR)\r
211 \r
212 ; CYC MODE\r
213\r
214 cmp diff_x, cycSize\r
215 jae fill_empty ; if (delta >= cycSize_VAR)\r
216 \r
217 xor t0_x, t0_x\r
218 mov cycPos_VAR, cp_x\r
219 sub cp_x, diff_x\r
220 ; jae prepare_for_tree_loop\r
221 ; add cp_x, cycSize\r
222 cmovb t0_x, cycSize\r
223 add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0)\r
224 jmp prepare_for_tree_loop\r
225 \r
226 \r
227directMode:\r
228 cmp diff_x, pos\r
229 je fill_empty ; if (delta == pos)\r
230 jae fin_error ; if (delta >= pos)\r
231 \r
232 mov cycPos_VAR, cp_x\r
233 mov cp_x, m\r
234 \r
235prepare_for_tree_loop:\r
236 mov len0, lenLimit\r
237 mov hash_VAR, hash\r
238 ; CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;\r
239 lea ptr0, [ptr1 + 4]\r
240 ; UInt32 *_distances = ++d;\r
241 mov distances, d\r
242\r
243 neg len0\r
244 mov len1, len0\r
245\r
246 mov t0_x, cutValue_VAR\r
247 mov maxLen, maxLen_VAR\r
248 mov cutValueCur_VAR, t0_x\r
249\r
250MY_ALIGN_32\r
251tree_loop:\r
252 neg diff\r
253 mov len, len0\r
254 cmp len1, len0\r
255 cmovb len, len1 ; len = (len1 < len0 ? len1 : len0);\r
256 add diff, cur\r
257\r
258 mov t0_x, [son + cp_r * 8] ; prefetch\r
259 movzx t0_x, BYTE PTR [diff + 1 * len]\r
260 lea cp_r, [son + cp_r * 8]\r
261 cmp [cur + 1 * len], t0_L\r
262 je matched_1\r
263 \r
264 jb left_0\r
265\r
266 mov [ptr1], m\r
267 mov m, [cp_r + 4]\r
268 lea ptr1, [cp_r + 4]\r
269 sub diff, cur ; FIX32\r
270 jmp next_node\r
271\r
272MY_ALIGN_32\r
273left_0:\r
274 mov [ptr0], m\r
275 mov m, [cp_r]\r
276 mov ptr0, cp_r\r
277 sub diff, cur ; FIX32\r
278 ; jmp next_node\r
279\r
280; ------------ NEXT NODE ------------\r
281; MY_ALIGN_32\r
282next_node:\r
283 mov cycSize, cycSize_VAR\r
284 dec cutValueCur_VAR\r
285 je finish_tree\r
286 \r
287 add diff_x, pos ; prev_match = pos + diff\r
288 cmp m, diff_x\r
289 jae fin_error ; if (new_match >= prev_match)\r
290 \r
291 mov diff_x, pos\r
292 sub diff_x, m ; delta = pos - new_match\r
293 cmp pos, cycSize\r
294 jae cyc_mode_2 ; if (pos >= cycSize)\r
295\r
296 mov cp_x, m\r
297 test m, m\r
298 jne tree_loop ; if (m != 0)\r
299 \r
300finish_tree:\r
301 ; ptr0 = *ptr1 = kEmptyHashValue;\r
302 mov DWORD PTR [ptr0], 0\r
303 mov DWORD PTR [ptr1], 0\r
304\r
305 inc pos\r
306 \r
307 ; _distances[-1] = (UInt32)(d - _distances);\r
308 mov t0, distances\r
309 mov t1, d\r
310 sub t1, t0\r
311 shr t1_x, 2\r
312 mov [t0 - 4], t1_x\r
313\r
314 cmp d, limit_VAR\r
315 jae fin ; if (d >= limit)\r
316 \r
317 mov cp_x, cycPos_VAR\r
318 mov hash, hash_VAR\r
319 mov hash_lim, size_VAR\r
320 inc cp_x\r
321 cmp hash, hash_lim\r
322 jne main_loop ; if (hash != size)\r
323 jmp fin\r
324 \r
325\r
326MY_ALIGN_32\r
327cyc_mode_2:\r
328 cmp diff_x, cycSize\r
329 jae finish_tree ; if (delta >= cycSize)\r
330\r
331 mov cp_x, cycPos_VAR\r
332 xor t0_x, t0_x\r
333 sub cp_x, diff_x ; cp_x = cycPos - delta\r
334 cmovb t0_x, cycSize\r
335 add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0)\r
336 jmp tree_loop\r
337\r
338 \r
339MY_ALIGN_32\r
340matched_1:\r
341\r
342 inc len\r
343 ; cmp len_x, lenLimit_x\r
344 je short lenLimit_reach\r
345 movzx t0_x, BYTE PTR [diff + 1 * len]\r
346 cmp [cur + 1 * len], t0_L\r
347 jne mismatch\r
348\r
349 \r
350MY_ALIGN_32\r
351match_loop:\r
352 ; while (++len != lenLimit) (len[diff] != len[0]) ;\r
353\r
354 inc len\r
355 ; cmp len_x, lenLimit_x\r
356 je short lenLimit_reach\r
357 movzx t0_x, BYTE PTR [diff + 1 * len]\r
358 cmp BYTE PTR [cur + 1 * len], t0_L\r
359 je match_loop\r
360\r
361mismatch:\r
362 jb left_2\r
363\r
364 mov [ptr1], m\r
365 mov m, [cp_r + 4]\r
366 lea ptr1, [cp_r + 4]\r
367 mov len1, len\r
368\r
369 jmp max_update\r
370 \r
371MY_ALIGN_32\r
372left_2:\r
373 mov [ptr0], m\r
374 mov m, [cp_r]\r
375 mov ptr0, cp_r\r
376 mov len0, len\r
377\r
378max_update:\r
379 sub diff, cur ; restore diff\r
380\r
381 cmp maxLen, len\r
382 jae next_node\r
383 \r
384 mov maxLen, len\r
385 add len, lenLimit\r
386 mov [d], len_x\r
387 mov t0_x, diff_x\r
388 not t0_x\r
389 mov [d + 4], t0_x\r
390 add d, 8\r
391 \r
392 jmp next_node\r
393\r
394\r
395 \r
396MY_ALIGN_32\r
397lenLimit_reach:\r
398\r
399 mov delta_r, cur\r
400 sub delta_r, diff\r
401 lea delta1_r, [delta_r - 1]\r
402\r
403 mov t0_x, [cp_r]\r
404 mov [ptr1], t0_x\r
405 mov t0_x, [cp_r + 4]\r
406 mov [ptr0], t0_x\r
407\r
408 mov [d], lenLimit_x\r
409 mov [d + 4], delta1_x\r
410 add d, 8\r
411\r
412 ; _distances[-1] = (UInt32)(d - _distances);\r
413 mov t0, distances\r
414 mov t1, d\r
415 sub t1, t0\r
416 shr t1_x, 2\r
417 mov [t0 - 4], t1_x\r
418\r
419 mov hash, hash_VAR\r
420 mov hash_lim, size_VAR\r
421\r
422 inc pos\r
423 mov cp_x, cycPos_VAR\r
424 inc cp_x\r
425\r
426 mov d_lim, limit_VAR\r
427 mov cycSize, cycSize_VAR\r
428 ; if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)\r
429 ; break;\r
430 cmp hash, hash_lim\r
431 je fin\r
432 cmp d, d_lim\r
433 jae fin\r
434 cmp delta_x, [hash]\r
435 jne main_loop\r
436 movzx t0_x, BYTE PTR [diff]\r
437 cmp [cur], t0_L\r
438 jne main_loop\r
439\r
440 ; jmp main_loop ; bypass for debug\r
441 \r
442 mov cycPos_VAR, cp_x\r
443 shl len, 3 ; cycSize * 8\r
444 sub diff, cur ; restore diff\r
445 xor t0_x, t0_x\r
446 cmp cp_x, delta_x ; cmp (cycPos_VAR, delta)\r
447 lea cp_r, [son + 8 * cp_r] ; dest\r
448 lea src, [cp_r + 8 * diff]\r
449 cmovb t0, len ; t0 = (cycPos_VAR < delta ? cycSize * 8 : 0)\r
450 add src, t0\r
451 add len, son ; len = son + cycSize * 8\r
452\r
453 \r
454MY_ALIGN_32\r
455long_loop:\r
456 add hash, 4\r
457 \r
458 ; *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];\r
459 \r
460 mov t0, [src]\r
461 add src, 8\r
462 mov [cp_r], t0\r
463 add cp_r, 8\r
464 cmp src, len\r
465 cmove src, son ; if end of (son) buffer is reached, we wrap to begin\r
466\r
467 mov DWORD PTR [d], 2\r
468 mov [d + 4], lenLimit_x\r
469 mov [d + 8], delta1_x\r
470 add d, 12\r
471\r
472 inc cur\r
473\r
474 cmp hash, hash_lim\r
475 je long_footer\r
476 cmp delta_x, [hash]\r
477 jne long_footer\r
478 movzx t0_x, BYTE PTR [diff + 1 * cur]\r
479 cmp [cur], t0_L\r
480 jne long_footer\r
481 cmp d, d_lim\r
482 jb long_loop\r
483\r
484long_footer:\r
485 sub cp_r, son\r
486 shr cp_r, 3\r
487 add pos, cp_x\r
488 sub pos, cycPos_VAR\r
489 mov cycSize, cycSize_VAR\r
490 \r
491 cmp d, d_lim\r
492 jae fin\r
493 cmp hash, hash_lim\r
494 jne main_loop\r
495 jmp fin\r
496\r
497\r
498\r
499fin_error:\r
500 xor d, d\r
501 \r
502fin:\r
503 mov RSP, Old_RSP\r
504 mov t0, [r4 + posRes_OFFS]\r
505 mov [t0], pos\r
506 mov r0, d\r
507\r
508MY_POP_PRESERVED_ABI_REGS\r
509MY_ENDP\r
510\r
511_TEXT$LZFINDOPT ENDS\r
512\r
513end\r