update libchdr
[pcsx_rearmed.git] / deps / libchdr / deps / lzma-22.01 / src / LzFindOpt.c
CommitLineData
9e052883 1/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms\r
22021-07-13 : Igor Pavlov : Public domain */\r
3\r
4#include "Precomp.h"\r
5\r
6#include "CpuArch.h"\r
7#include "LzFind.h"\r
8\r
9// #include "LzFindMt.h"\r
10\r
11// #define LOG_ITERS\r
12\r
13// #define LOG_THREAD\r
14\r
15#ifdef LOG_THREAD\r
16#include <stdio.h>\r
17#define PRF(x) x\r
18#else\r
19// #define PRF(x)\r
20#endif\r
21\r
22#ifdef LOG_ITERS\r
23#include <stdio.h>\r
24UInt64 g_NumIters_Tree;\r
25UInt64 g_NumIters_Loop;\r
26UInt64 g_NumIters_Bytes;\r
27#define LOG_ITER(x) x\r
28#else\r
29#define LOG_ITER(x)\r
30#endif\r
31\r
32// ---------- BT THREAD ----------\r
33\r
34#define USE_SON_PREFETCH\r
35#define USE_LONG_MATCH_OPT\r
36\r
37#define kEmptyHashValue 0\r
38\r
39// #define CYC_TO_POS_OFFSET 0\r
40\r
41// #define CYC_TO_POS_OFFSET 1 // for debug\r
42\r
43/*\r
44MY_NO_INLINE\r
45UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,\r
46 UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)\r
47{\r
48 do\r
49 {\r
50 UInt32 delta;\r
51 if (hash == size)\r
52 break;\r
53 delta = *hash++;\r
54\r
55 if (delta == 0 || delta > (UInt32)pos)\r
56 return NULL;\r
57\r
58 lenLimit++;\r
59\r
60 if (delta == (UInt32)pos)\r
61 {\r
62 CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;\r
63 *d++ = 0;\r
64 ptr1[0] = kEmptyHashValue;\r
65 ptr1[1] = kEmptyHashValue;\r
66 }\r
67else\r
68{\r
69 UInt32 *_distances = ++d;\r
70\r
71 CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;\r
72 CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;\r
73\r
74 const Byte *len0 = cur, *len1 = cur;\r
75 UInt32 cutValue = _cutValue;\r
76 const Byte *maxLen = cur + _maxLen;\r
77\r
78 for (LOG_ITER(g_NumIters_Tree++);;)\r
79 {\r
80 LOG_ITER(g_NumIters_Loop++);\r
81 {\r
82 const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;\r
83 CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);\r
84 const Byte *len = (len0 < len1 ? len0 : len1);\r
85\r
86 #ifdef USE_SON_PREFETCH\r
87 const UInt32 pair0 = *pair;\r
88 #endif\r
89\r
90 if (len[diff] == len[0])\r
91 {\r
92 if (++len != lenLimit && len[diff] == len[0])\r
93 while (++len != lenLimit)\r
94 {\r
95 LOG_ITER(g_NumIters_Bytes++);\r
96 if (len[diff] != len[0])\r
97 break;\r
98 }\r
99 if (maxLen < len)\r
100 {\r
101 maxLen = len;\r
102 *d++ = (UInt32)(len - cur);\r
103 *d++ = delta - 1;\r
104 \r
105 if (len == lenLimit)\r
106 {\r
107 const UInt32 pair1 = pair[1];\r
108 *ptr1 =\r
109 #ifdef USE_SON_PREFETCH\r
110 pair0;\r
111 #else\r
112 pair[0];\r
113 #endif\r
114 *ptr0 = pair1;\r
115\r
116 _distances[-1] = (UInt32)(d - _distances);\r
117\r
118 #ifdef USE_LONG_MATCH_OPT\r
119\r
120 if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)\r
121 break;\r
122\r
123 {\r
124 for (;;)\r
125 {\r
126 hash++;\r
127 pos++;\r
128 cur++;\r
129 lenLimit++;\r
130 {\r
131 CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;\r
132 #if 0\r
133 *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];\r
134 #else\r
135 const UInt32 p0 = ptr[0 + (diff * 2)];\r
136 const UInt32 p1 = ptr[1 + (diff * 2)];\r
137 ptr[0] = p0;\r
138 ptr[1] = p1;\r
139 // ptr[0] = ptr[0 + (diff * 2)];\r
140 // ptr[1] = ptr[1 + (diff * 2)];\r
141 #endif\r
142 }\r
143 // PrintSon(son + 2, pos - 1);\r
144 // printf("\npos = %x delta = %x\n", pos, delta);\r
145 len++;\r
146 *d++ = 2;\r
147 *d++ = (UInt32)(len - cur);\r
148 *d++ = delta - 1;\r
149 if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)\r
150 break;\r
151 }\r
152 }\r
153 #endif\r
154\r
155 break;\r
156 }\r
157 }\r
158 }\r
159\r
160 {\r
161 const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);\r
162 if (len[diff] < len[0])\r
163 {\r
164 delta = pair[1];\r
165 if (delta >= curMatch)\r
166 return NULL;\r
167 *ptr1 = curMatch;\r
168 ptr1 = pair + 1;\r
169 len1 = len;\r
170 }\r
171 else\r
172 {\r
173 delta = *pair;\r
174 if (delta >= curMatch)\r
175 return NULL;\r
176 *ptr0 = curMatch;\r
177 ptr0 = pair;\r
178 len0 = len;\r
179 }\r
180\r
181 delta = (UInt32)pos - delta;\r
182 \r
183 if (--cutValue == 0 || delta >= pos)\r
184 {\r
185 *ptr0 = *ptr1 = kEmptyHashValue;\r
186 _distances[-1] = (UInt32)(d - _distances);\r
187 break;\r
188 }\r
189 }\r
190 }\r
191 } // for (tree iterations)\r
192}\r
193 pos++;\r
194 cur++;\r
195 }\r
196 while (d < limit);\r
197 *posRes = (UInt32)pos;\r
198 return d;\r
199}\r
200*/\r
201\r
202/* define cbs if you use 2 functions.\r
203 GetMatchesSpecN_1() : (pos < _cyclicBufferSize)\r
204 GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)\r
205\r
206 do not define cbs if you use 1 function:\r
207 GetMatchesSpecN_2()\r
208*/\r
209\r
210// #define cbs _cyclicBufferSize\r
211\r
212/*\r
213 we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32\r
214 to eliminate "movsx" BUG in old MSVC x64 compiler.\r
215*/\r
216\r
217UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,\r
218 UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,\r
219 size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,\r
220 UInt32 *posRes);\r
221\r
222MY_NO_INLINE\r
223UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,\r
224 UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,\r
225 size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,\r
226 UInt32 *posRes)\r
227{\r
228 do // while (hash != size)\r
229 {\r
230 UInt32 delta;\r
231 \r
232 #ifndef cbs\r
233 UInt32 cbs;\r
234 #endif\r
235\r
236 if (hash == size)\r
237 break;\r
238\r
239 delta = *hash++;\r
240\r
241 if (delta == 0)\r
242 return NULL;\r
243\r
244 lenLimit++;\r
245\r
246 #ifndef cbs\r
247 cbs = _cyclicBufferSize;\r
248 if ((UInt32)pos < cbs)\r
249 {\r
250 if (delta > (UInt32)pos)\r
251 return NULL;\r
252 cbs = (UInt32)pos;\r
253 }\r
254 #endif\r
255\r
256 if (delta >= cbs)\r
257 {\r
258 CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);\r
259 *d++ = 0;\r
260 ptr1[0] = kEmptyHashValue;\r
261 ptr1[1] = kEmptyHashValue;\r
262 }\r
263else\r
264{\r
265 UInt32 *_distances = ++d;\r
266\r
267 CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;\r
268 CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);\r
269\r
270 UInt32 cutValue = _cutValue;\r
271 const Byte *len0 = cur, *len1 = cur;\r
272 const Byte *maxLen = cur + _maxLen;\r
273\r
274 // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else\r
275 for (LOG_ITER(g_NumIters_Tree++);;)\r
276 {\r
277 LOG_ITER(g_NumIters_Loop++);\r
278 {\r
279 // SPEC code\r
280 CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta\r
281 + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)\r
282 ) << 1);\r
283\r
284 const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;\r
285 const Byte *len = (len0 < len1 ? len0 : len1);\r
286\r
287 #ifdef USE_SON_PREFETCH\r
288 const UInt32 pair0 = *pair;\r
289 #endif\r
290\r
291 if (len[diff] == len[0])\r
292 {\r
293 if (++len != lenLimit && len[diff] == len[0])\r
294 while (++len != lenLimit)\r
295 {\r
296 LOG_ITER(g_NumIters_Bytes++);\r
297 if (len[diff] != len[0])\r
298 break;\r
299 }\r
300 if (maxLen < len)\r
301 {\r
302 maxLen = len;\r
303 *d++ = (UInt32)(len - cur);\r
304 *d++ = delta - 1;\r
305 \r
306 if (len == lenLimit)\r
307 {\r
308 const UInt32 pair1 = pair[1];\r
309 *ptr1 =\r
310 #ifdef USE_SON_PREFETCH\r
311 pair0;\r
312 #else\r
313 pair[0];\r
314 #endif\r
315 *ptr0 = pair1;\r
316\r
317 _distances[-1] = (UInt32)(d - _distances);\r
318\r
319 #ifdef USE_LONG_MATCH_OPT\r
320\r
321 if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)\r
322 break;\r
323\r
324 {\r
325 for (;;)\r
326 {\r
327 *d++ = 2;\r
328 *d++ = (UInt32)(lenLimit - cur);\r
329 *d++ = delta - 1;\r
330 cur++;\r
331 lenLimit++;\r
332 // SPEC\r
333 _cyclicBufferPos++;\r
334 {\r
335 // SPEC code\r
336 CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);\r
337 const CLzRef *src = dest + ((diff\r
338 + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);\r
339 // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;\r
340 #if 0\r
341 *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);\r
342 #else\r
343 const UInt32 p0 = src[0];\r
344 const UInt32 p1 = src[1];\r
345 dest[0] = p0;\r
346 dest[1] = p1;\r
347 #endif\r
348 }\r
349 pos++;\r
350 hash++;\r
351 if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)\r
352 break;\r
353 } // for() end for long matches\r
354 }\r
355 #endif\r
356\r
357 break; // break from TREE iterations\r
358 }\r
359 }\r
360 }\r
361 {\r
362 const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);\r
363 if (len[diff] < len[0])\r
364 {\r
365 delta = pair[1];\r
366 *ptr1 = curMatch;\r
367 ptr1 = pair + 1;\r
368 len1 = len;\r
369 if (delta >= curMatch)\r
370 return NULL;\r
371 }\r
372 else\r
373 {\r
374 delta = *pair;\r
375 *ptr0 = curMatch;\r
376 ptr0 = pair;\r
377 len0 = len;\r
378 if (delta >= curMatch)\r
379 return NULL;\r
380 }\r
381 delta = (UInt32)pos - delta;\r
382 \r
383 if (--cutValue == 0 || delta >= cbs)\r
384 {\r
385 *ptr0 = *ptr1 = kEmptyHashValue;\r
386 _distances[-1] = (UInt32)(d - _distances);\r
387 break;\r
388 }\r
389 }\r
390 }\r
391 } // for (tree iterations)\r
392}\r
393 pos++;\r
394 _cyclicBufferPos++;\r
395 cur++;\r
396 }\r
397 while (d < limit);\r
398 *posRes = (UInt32)pos;\r
399 return d;\r
400}\r
401\r
402\r
403\r
404/*\r
405typedef UInt32 uint32plus; // size_t\r
406\r
407UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,\r
408 UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,\r
409 size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,\r
410 UInt32 *posRes)\r
411{\r
412 do // while (hash != size)\r
413 {\r
414 UInt32 delta;\r
415\r
416 #ifndef cbs\r
417 UInt32 cbs;\r
418 #endif\r
419\r
420 if (hash == size)\r
421 break;\r
422\r
423 delta = *hash++;\r
424\r
425 if (delta == 0)\r
426 return NULL;\r
427\r
428 #ifndef cbs\r
429 cbs = _cyclicBufferSize;\r
430 if ((UInt32)pos < cbs)\r
431 {\r
432 if (delta > (UInt32)pos)\r
433 return NULL;\r
434 cbs = (UInt32)pos;\r
435 }\r
436 #endif\r
437 \r
438 if (delta >= cbs)\r
439 {\r
440 CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);\r
441 *d++ = 0;\r
442 ptr1[0] = kEmptyHashValue;\r
443 ptr1[1] = kEmptyHashValue;\r
444 }\r
445else\r
446{\r
447 CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;\r
448 CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);\r
449 UInt32 *_distances = ++d;\r
450 uint32plus len0 = 0, len1 = 0;\r
451 UInt32 cutValue = _cutValue;\r
452 uint32plus maxLen = _maxLen;\r
453 // lenLimit++; // const Byte *lenLimit = cur + _lenLimit;\r
454\r
455 for (LOG_ITER(g_NumIters_Tree++);;)\r
456 {\r
457 LOG_ITER(g_NumIters_Loop++);\r
458 {\r
459 // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;\r
460 CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta\r
461 + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)\r
462 ) << 1);\r
463 const Byte *pb = cur - delta;\r
464 uint32plus len = (len0 < len1 ? len0 : len1);\r
465\r
466 #ifdef USE_SON_PREFETCH\r
467 const UInt32 pair0 = *pair;\r
468 #endif\r
469\r
470 if (pb[len] == cur[len])\r
471 {\r
472 if (++len != lenLimit && pb[len] == cur[len])\r
473 while (++len != lenLimit)\r
474 if (pb[len] != cur[len])\r
475 break;\r
476 if (maxLen < len)\r
477 {\r
478 maxLen = len;\r
479 *d++ = (UInt32)len;\r
480 *d++ = delta - 1;\r
481 if (len == lenLimit)\r
482 {\r
483 {\r
484 const UInt32 pair1 = pair[1];\r
485 *ptr0 = pair1;\r
486 *ptr1 =\r
487 #ifdef USE_SON_PREFETCH\r
488 pair0;\r
489 #else\r
490 pair[0];\r
491 #endif\r
492 }\r
493\r
494 _distances[-1] = (UInt32)(d - _distances);\r
495\r
496 #ifdef USE_LONG_MATCH_OPT\r
497\r
498 if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)\r
499 break;\r
500\r
501 {\r
502 const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;\r
503 for (;;)\r
504 {\r
505 *d++ = 2;\r
506 *d++ = (UInt32)lenLimit;\r
507 *d++ = delta - 1;\r
508 _cyclicBufferPos++;\r
509 {\r
510 CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);\r
511 const CLzRef *src = dest + ((diff +\r
512 (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);\r
513 #if 0\r
514 *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);\r
515 #else\r
516 const UInt32 p0 = src[0];\r
517 const UInt32 p1 = src[1];\r
518 dest[0] = p0;\r
519 dest[1] = p1;\r
520 #endif\r
521 }\r
522 hash++;\r
523 pos++;\r
524 cur++;\r
525 pb++;\r
526 if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)\r
527 break;\r
528 }\r
529 }\r
530 #endif\r
531\r
532 break;\r
533 }\r
534 }\r
535 }\r
536 {\r
537 const UInt32 curMatch = (UInt32)pos - delta;\r
538 if (pb[len] < cur[len])\r
539 {\r
540 delta = pair[1];\r
541 *ptr1 = curMatch;\r
542 ptr1 = pair + 1;\r
543 len1 = len;\r
544 }\r
545 else\r
546 {\r
547 delta = *pair;\r
548 *ptr0 = curMatch;\r
549 ptr0 = pair;\r
550 len0 = len;\r
551 }\r
552\r
553 {\r
554 if (delta >= curMatch)\r
555 return NULL;\r
556 delta = (UInt32)pos - delta;\r
557 if (delta >= cbs\r
558 // delta >= _cyclicBufferSize || delta >= pos\r
559 || --cutValue == 0)\r
560 {\r
561 *ptr0 = *ptr1 = kEmptyHashValue;\r
562 _distances[-1] = (UInt32)(d - _distances);\r
563 break;\r
564 }\r
565 }\r
566 }\r
567 }\r
568 } // for (tree iterations)\r
569}\r
570 pos++;\r
571 _cyclicBufferPos++;\r
572 cur++;\r
573 }\r
574 while (d < limit);\r
575 *posRes = (UInt32)pos;\r
576 return d;\r
577}\r
578*/\r