1 /* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
\r
2 2021-07-13 : Igor Pavlov : Public domain */
\r
9 // #include "LzFindMt.h"
\r
11 // #define LOG_ITERS
\r
13 // #define LOG_THREAD
\r
24 UInt64 g_NumIters_Tree;
\r
25 UInt64 g_NumIters_Loop;
\r
26 UInt64 g_NumIters_Bytes;
\r
27 #define LOG_ITER(x) x
\r
32 // ---------- BT THREAD ----------
\r
34 #define USE_SON_PREFETCH
\r
35 #define USE_LONG_MATCH_OPT
\r
37 #define kEmptyHashValue 0
\r
39 // #define CYC_TO_POS_OFFSET 0
\r
41 // #define CYC_TO_POS_OFFSET 1 // for debug
\r
45 UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
\r
46 UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
\r
55 if (delta == 0 || delta > (UInt32)pos)
\r
60 if (delta == (UInt32)pos)
\r
62 CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
\r
64 ptr1[0] = kEmptyHashValue;
\r
65 ptr1[1] = kEmptyHashValue;
\r
69 UInt32 *_distances = ++d;
\r
71 CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
\r
72 CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
\r
74 const Byte *len0 = cur, *len1 = cur;
\r
75 UInt32 cutValue = _cutValue;
\r
76 const Byte *maxLen = cur + _maxLen;
\r
78 for (LOG_ITER(g_NumIters_Tree++);;)
\r
80 LOG_ITER(g_NumIters_Loop++);
\r
82 const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
\r
83 CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
\r
84 const Byte *len = (len0 < len1 ? len0 : len1);
\r
86 #ifdef USE_SON_PREFETCH
\r
87 const UInt32 pair0 = *pair;
\r
90 if (len[diff] == len[0])
\r
92 if (++len != lenLimit && len[diff] == len[0])
\r
93 while (++len != lenLimit)
\r
95 LOG_ITER(g_NumIters_Bytes++);
\r
96 if (len[diff] != len[0])
\r
102 *d++ = (UInt32)(len - cur);
\r
105 if (len == lenLimit)
\r
107 const UInt32 pair1 = pair[1];
\r
109 #ifdef USE_SON_PREFETCH
\r
116 _distances[-1] = (UInt32)(d - _distances);
\r
118 #ifdef USE_LONG_MATCH_OPT
\r
120 if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
\r
131 CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
\r
133 *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
\r
135 const UInt32 p0 = ptr[0 + (diff * 2)];
\r
136 const UInt32 p1 = ptr[1 + (diff * 2)];
\r
139 // ptr[0] = ptr[0 + (diff * 2)];
\r
140 // ptr[1] = ptr[1 + (diff * 2)];
\r
143 // PrintSon(son + 2, pos - 1);
\r
144 // printf("\npos = %x delta = %x\n", pos, delta);
\r
147 *d++ = (UInt32)(len - cur);
\r
149 if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
\r
161 const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
\r
162 if (len[diff] < len[0])
\r
165 if (delta >= curMatch)
\r
174 if (delta >= curMatch)
\r
181 delta = (UInt32)pos - delta;
\r
183 if (--cutValue == 0 || delta >= pos)
\r
185 *ptr0 = *ptr1 = kEmptyHashValue;
\r
186 _distances[-1] = (UInt32)(d - _distances);
\r
191 } // for (tree iterations)
\r
197 *posRes = (UInt32)pos;
\r
202 /* define cbs if you use 2 functions.
\r
203 GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
\r
204 GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
\r
206 do not define cbs if you use 1 function:
\r
207 GetMatchesSpecN_2()
\r
210 // #define cbs _cyclicBufferSize
\r
213 we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
\r
214 to eliminate "movsx" BUG in old MSVC x64 compiler.
\r
217 UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
\r
218 UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
\r
219 size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
\r
223 UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
\r
224 UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
\r
225 size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
\r
228 do // while (hash != size)
\r
247 cbs = _cyclicBufferSize;
\r
248 if ((UInt32)pos < cbs)
\r
250 if (delta > (UInt32)pos)
\r
258 CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
\r
260 ptr1[0] = kEmptyHashValue;
\r
261 ptr1[1] = kEmptyHashValue;
\r
265 UInt32 *_distances = ++d;
\r
267 CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
\r
268 CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
\r
270 UInt32 cutValue = _cutValue;
\r
271 const Byte *len0 = cur, *len1 = cur;
\r
272 const Byte *maxLen = cur + _maxLen;
\r
274 // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
\r
275 for (LOG_ITER(g_NumIters_Tree++);;)
\r
277 LOG_ITER(g_NumIters_Loop++);
\r
280 CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
\r
281 + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
\r
284 const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
\r
285 const Byte *len = (len0 < len1 ? len0 : len1);
\r
287 #ifdef USE_SON_PREFETCH
\r
288 const UInt32 pair0 = *pair;
\r
291 if (len[diff] == len[0])
\r
293 if (++len != lenLimit && len[diff] == len[0])
\r
294 while (++len != lenLimit)
\r
296 LOG_ITER(g_NumIters_Bytes++);
\r
297 if (len[diff] != len[0])
\r
303 *d++ = (UInt32)(len - cur);
\r
306 if (len == lenLimit)
\r
308 const UInt32 pair1 = pair[1];
\r
310 #ifdef USE_SON_PREFETCH
\r
317 _distances[-1] = (UInt32)(d - _distances);
\r
319 #ifdef USE_LONG_MATCH_OPT
\r
321 if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
\r
328 *d++ = (UInt32)(lenLimit - cur);
\r
333 _cyclicBufferPos++;
\r
336 CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
\r
337 const CLzRef *src = dest + ((diff
\r
338 + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
\r
339 // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
\r
341 *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
\r
343 const UInt32 p0 = src[0];
\r
344 const UInt32 p1 = src[1];
\r
351 if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
\r
353 } // for() end for long matches
\r
357 break; // break from TREE iterations
\r
362 const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
\r
363 if (len[diff] < len[0])
\r
369 if (delta >= curMatch)
\r
378 if (delta >= curMatch)
\r
381 delta = (UInt32)pos - delta;
\r
383 if (--cutValue == 0 || delta >= cbs)
\r
385 *ptr0 = *ptr1 = kEmptyHashValue;
\r
386 _distances[-1] = (UInt32)(d - _distances);
\r
391 } // for (tree iterations)
\r
394 _cyclicBufferPos++;
\r
398 *posRes = (UInt32)pos;
\r
405 typedef UInt32 uint32plus; // size_t
\r
407 UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
\r
408 UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
\r
409 size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
\r
412 do // while (hash != size)
\r
429 cbs = _cyclicBufferSize;
\r
430 if ((UInt32)pos < cbs)
\r
432 if (delta > (UInt32)pos)
\r
440 CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
\r
442 ptr1[0] = kEmptyHashValue;
\r
443 ptr1[1] = kEmptyHashValue;
\r
447 CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
\r
448 CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
\r
449 UInt32 *_distances = ++d;
\r
450 uint32plus len0 = 0, len1 = 0;
\r
451 UInt32 cutValue = _cutValue;
\r
452 uint32plus maxLen = _maxLen;
\r
453 // lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
\r
455 for (LOG_ITER(g_NumIters_Tree++);;)
\r
457 LOG_ITER(g_NumIters_Loop++);
\r
459 // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
\r
460 CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
\r
461 + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
\r
463 const Byte *pb = cur - delta;
\r
464 uint32plus len = (len0 < len1 ? len0 : len1);
\r
466 #ifdef USE_SON_PREFETCH
\r
467 const UInt32 pair0 = *pair;
\r
470 if (pb[len] == cur[len])
\r
472 if (++len != lenLimit && pb[len] == cur[len])
\r
473 while (++len != lenLimit)
\r
474 if (pb[len] != cur[len])
\r
479 *d++ = (UInt32)len;
\r
481 if (len == lenLimit)
\r
484 const UInt32 pair1 = pair[1];
\r
487 #ifdef USE_SON_PREFETCH
\r
494 _distances[-1] = (UInt32)(d - _distances);
\r
496 #ifdef USE_LONG_MATCH_OPT
\r
498 if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
\r
502 const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
\r
506 *d++ = (UInt32)lenLimit;
\r
508 _cyclicBufferPos++;
\r
510 CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
\r
511 const CLzRef *src = dest + ((diff +
\r
512 (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
\r
514 *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
\r
516 const UInt32 p0 = src[0];
\r
517 const UInt32 p1 = src[1];
\r
526 if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
\r
537 const UInt32 curMatch = (UInt32)pos - delta;
\r
538 if (pb[len] < cur[len])
\r
554 if (delta >= curMatch)
\r
556 delta = (UInt32)pos - delta;
\r
558 // delta >= _cyclicBufferSize || delta >= pos
\r
559 || --cutValue == 0)
\r
561 *ptr0 = *ptr1 = kEmptyHashValue;
\r
562 _distances[-1] = (UInt32)(d - _distances);
\r
568 } // for (tree iterations)
\r
571 _cyclicBufferPos++;
\r
575 *posRes = (UInt32)pos;
\r