update libchdr
[pcsx_rearmed.git] / deps / libchdr / deps / lzma-22.01 / src / LzFindMt.c
CommitLineData
9e052883 1/* LzFindMt.c -- multithreaded Match finder for LZ algorithms\r
22021-12-21 : Igor Pavlov : Public domain */\r
3\r
4#include "Precomp.h"\r
5\r
6// #include <stdio.h>\r
7\r
8#include "CpuArch.h"\r
9\r
10#include "LzHash.h"\r
11#include "LzFindMt.h"\r
12\r
13// #define LOG_ITERS\r
14\r
15// #define LOG_THREAD\r
16\r
17#ifdef LOG_THREAD\r
18#include <stdio.h>\r
19#define PRF(x) x\r
20#else\r
21#define PRF(x)\r
22#endif\r
23\r
24#ifdef LOG_ITERS\r
25#include <stdio.h>\r
26extern UInt64 g_NumIters_Tree;\r
27extern UInt64 g_NumIters_Loop;\r
28extern UInt64 g_NumIters_Bytes;\r
29#define LOG_ITER(x) x\r
30#else\r
31#define LOG_ITER(x)\r
32#endif\r
33\r
34#define kMtHashBlockSize ((UInt32)1 << 17)\r
35#define kMtHashNumBlocks (1 << 1)\r
36\r
37#define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize)\r
38\r
39#define kMtBtBlockSize ((UInt32)1 << 16)\r
40#define kMtBtNumBlocks (1 << 4)\r
41\r
42#define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize)\r
43\r
44/*\r
45 HASH functions:\r
46 We use raw 8/16 bits from a[1] and a[2],\r
47 xored with crc(a[0]) and crc(a[3]).\r
48 We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches.\r
49 our crc() function provides one-to-one correspondence for low 8-bit values:\r
50 (crc[0...0xFF] & 0xFF) <-> [0...0xFF]\r
51*/\r
52\r
53#define MF(mt) ((mt)->MatchFinder)\r
54#define MF_CRC (p->crc)\r
55\r
56// #define MF(mt) (&(mt)->MatchFinder)\r
57// #define MF_CRC (p->MatchFinder.crc)\r
58\r
59#define MT_HASH2_CALC \\r
60 h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1);\r
61\r
62#define MT_HASH3_CALC { \\r
63 UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \\r
64 h2 = temp & (kHash2Size - 1); \\r
65 h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }\r
66\r
67/*\r
68#define MT_HASH3_CALC__NO_2 { \\r
69 UInt32 temp = p->crc[cur[0]] ^ cur[1]; \\r
70 h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }\r
71\r
72#define __MT_HASH4_CALC { \\r
73 UInt32 temp = p->crc[cur[0]] ^ cur[1]; \\r
74 h2 = temp & (kHash2Size - 1); \\r
75 temp ^= ((UInt32)cur[2] << 8); \\r
76 h3 = temp & (kHash3Size - 1); \\r
77 h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; }\r
78 // (kHash4Size - 1);\r
79*/\r
80\r
81\r
82MY_NO_INLINE\r
83static void MtSync_Construct(CMtSync *p)\r
84{\r
85 p->affinity = 0;\r
86 p->wasCreated = False;\r
87 p->csWasInitialized = False;\r
88 p->csWasEntered = False;\r
89 Thread_Construct(&p->thread);\r
90 Event_Construct(&p->canStart);\r
91 Event_Construct(&p->wasStopped);\r
92 Semaphore_Construct(&p->freeSemaphore);\r
93 Semaphore_Construct(&p->filledSemaphore);\r
94}\r
95\r
96\r
97#define DEBUG_BUFFER_LOCK // define it to debug lock state\r
98\r
99#ifdef DEBUG_BUFFER_LOCK\r
100#include <stdlib.h>\r
101#define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1);\r
102#define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1);\r
103#else\r
104#define BUFFER_MUST_BE_LOCKED(p)\r
105#define BUFFER_MUST_BE_UNLOCKED(p)\r
106#endif\r
107\r
108#define LOCK_BUFFER(p) { \\r
109 BUFFER_MUST_BE_UNLOCKED(p); \\r
110 CriticalSection_Enter(&(p)->cs); \\r
111 (p)->csWasEntered = True; }\r
112\r
113#define UNLOCK_BUFFER(p) { \\r
114 BUFFER_MUST_BE_LOCKED(p); \\r
115 CriticalSection_Leave(&(p)->cs); \\r
116 (p)->csWasEntered = False; }\r
117\r
118\r
119MY_NO_INLINE\r
120static UInt32 MtSync_GetNextBlock(CMtSync *p)\r
121{\r
122 UInt32 numBlocks = 0;\r
123 if (p->needStart)\r
124 {\r
125 BUFFER_MUST_BE_UNLOCKED(p)\r
126 p->numProcessedBlocks = 1;\r
127 p->needStart = False;\r
128 p->stopWriting = False;\r
129 p->exit = False;\r
130 Event_Reset(&p->wasStopped);\r
131 Event_Set(&p->canStart);\r
132 }\r
133 else\r
134 {\r
135 UNLOCK_BUFFER(p)\r
136 // we free current block\r
137 numBlocks = p->numProcessedBlocks++;\r
138 Semaphore_Release1(&p->freeSemaphore);\r
139 }\r
140\r
141 // buffer is UNLOCKED here\r
142 Semaphore_Wait(&p->filledSemaphore);\r
143 LOCK_BUFFER(p);\r
144 return numBlocks;\r
145}\r
146\r
147\r
148/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */\r
149\r
150MY_NO_INLINE\r
151static void MtSync_StopWriting(CMtSync *p)\r
152{\r
153 if (!Thread_WasCreated(&p->thread) || p->needStart)\r
154 return;\r
155\r
156 PRF(printf("\nMtSync_StopWriting %p\n", p));\r
157\r
158 if (p->csWasEntered)\r
159 {\r
160 /* we don't use buffer in this thread after StopWriting().\r
161 So we UNLOCK buffer.\r
162 And we restore default UNLOCKED state for stopped thread */\r
163 UNLOCK_BUFFER(p)\r
164 }\r
165\r
166 /* We send (p->stopWriting) message and release freeSemaphore\r
167 to free current block.\r
168 So the thread will see (p->stopWriting) at some\r
169 iteration after Wait(freeSemaphore).\r
170 The thread doesn't need to fill all avail free blocks,\r
171 so we can get fast thread stop.\r
172 */\r
173\r
174 p->stopWriting = True;\r
175 Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!!\r
176\r
177 PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p));\r
178 Event_Wait(&p->wasStopped);\r
179 PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p));\r
180\r
181 /* 21.03 : we don't restore samaphore counters here.\r
182 We will recreate and reinit samaphores in next start */\r
183\r
184 p->needStart = True;\r
185}\r
186\r
187\r
188MY_NO_INLINE\r
189static void MtSync_Destruct(CMtSync *p)\r
190{\r
191 PRF(printf("\nMtSync_Destruct %p\n", p));\r
192 \r
193 if (Thread_WasCreated(&p->thread))\r
194 {\r
195 /* we want thread to be in Stopped state before sending EXIT command.\r
196 note: stop(btSync) will stop (htSync) also */\r
197 MtSync_StopWriting(p);\r
198 /* thread in Stopped state here : (p->needStart == true) */\r
199 p->exit = True;\r
200 // if (p->needStart) // it's (true)\r
201 Event_Set(&p->canStart); // we send EXIT command to thread\r
202 Thread_Wait_Close(&p->thread); // we wait thread finishing\r
203 }\r
204\r
205 if (p->csWasInitialized)\r
206 {\r
207 CriticalSection_Delete(&p->cs);\r
208 p->csWasInitialized = False;\r
209 }\r
210 p->csWasEntered = False;\r
211\r
212 Event_Close(&p->canStart);\r
213 Event_Close(&p->wasStopped);\r
214 Semaphore_Close(&p->freeSemaphore);\r
215 Semaphore_Close(&p->filledSemaphore);\r
216\r
217 p->wasCreated = False;\r
218}\r
219\r
220\r
221// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }\r
222// we want to get real system error codes here instead of SZ_ERROR_THREAD\r
223#define RINOK_THREAD(x) RINOK(x)\r
224\r
225\r
226// call it before each new file (when new starting is required):\r
227MY_NO_INLINE\r
228static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)\r
229{\r
230 WRes wres;\r
231 // BUFFER_MUST_BE_UNLOCKED(p)\r
232 if (!p->needStart || p->csWasEntered)\r
233 return SZ_ERROR_FAIL;\r
234 wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks);\r
235 if (wres == 0)\r
236 wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks);\r
237 return MY_SRes_HRESULT_FROM_WRes(wres);\r
238}\r
239\r
240\r
241static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)\r
242{\r
243 WRes wres;\r
244\r
245 if (p->wasCreated)\r
246 return SZ_OK;\r
247\r
248 RINOK_THREAD(CriticalSection_Init(&p->cs));\r
249 p->csWasInitialized = True;\r
250 p->csWasEntered = False;\r
251\r
252 RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart));\r
253 RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped));\r
254\r
255 p->needStart = True;\r
256 p->exit = True; /* p->exit is unused before (canStart) Event.\r
257 But in case of some unexpected code failure we will get fast exit from thread */\r
258\r
259 // return ERROR_TOO_MANY_POSTS; // for debug\r
260 // return EINVAL; // for debug\r
261\r
262 if (p->affinity != 0)\r
263 wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);\r
264 else\r
265 wres = Thread_Create(&p->thread, startAddress, obj);\r
266\r
267 RINOK_THREAD(wres);\r
268 p->wasCreated = True;\r
269 return SZ_OK;\r
270}\r
271\r
272\r
273MY_NO_INLINE\r
274static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)\r
275{\r
276 const WRes wres = MtSync_Create_WRes(p, startAddress, obj);\r
277 if (wres == 0)\r
278 return 0;\r
279 MtSync_Destruct(p);\r
280 return MY_SRes_HRESULT_FROM_WRes(wres);\r
281}\r
282\r
283\r
284// ---------- HASH THREAD ----------\r
285\r
286#define kMtMaxValForNormalize 0xFFFFFFFF\r
287// #define kMtMaxValForNormalize ((1 << 21)) // for debug\r
288// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses\r
289\r
290#ifdef MY_CPU_LE_UNALIGN\r
291 #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)\r
292#else\r
293 #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16))\r
294#endif\r
295\r
296#define GetHeads_DECL(name) \\r
297 static void GetHeads ## name(const Byte *p, UInt32 pos, \\r
298 UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc)\r
299\r
300#define GetHeads_LOOP(v) \\r
301 for (; numHeads != 0; numHeads--) { \\r
302 const UInt32 value = (v); \\r
303 p++; \\r
304 *heads++ = pos - hash[value]; \\r
305 hash[value] = pos++; }\r
306\r
307#define DEF_GetHeads2(name, v, action) \\r
308 GetHeads_DECL(name) { action \\r
309 GetHeads_LOOP(v) }\r
310 \r
311#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)\r
312\r
313DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )\r
314DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask)\r
315DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )\r
316// BT3 is not good for crc collisions for big hashMask values.\r
317\r
318/*\r
319GetHeads_DECL(3b)\r
320{\r
321 UNUSED_VAR(hashMask);\r
322 UNUSED_VAR(crc);\r
323 {\r
324 const Byte *pLim = p + numHeads;\r
325 if (numHeads == 0)\r
326 return;\r
327 pLim--;\r
328 while (p < pLim)\r
329 {\r
330 UInt32 v1 = GetUi32(p);\r
331 UInt32 v0 = v1 & 0xFFFFFF;\r
332 UInt32 h0, h1;\r
333 p += 2;\r
334 v1 >>= 8;\r
335 h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++;\r
336 h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++;\r
337 heads += 2;\r
338 }\r
339 if (p == pLim)\r
340 {\r
341 UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16);\r
342 *heads = pos - hash[v0];\r
343 hash[v0] = pos;\r
344 }\r
345 }\r
346}\r
347*/\r
348\r
349/*\r
350GetHeads_DECL(4)\r
351{\r
352 unsigned sh = 0;\r
353 UNUSED_VAR(crc)\r
354 while ((hashMask & 0x80000000) == 0)\r
355 {\r
356 hashMask <<= 1;\r
357 sh++;\r
358 }\r
359 GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh)\r
360}\r
361#define GetHeads4b GetHeads4\r
362*/\r
363\r
364#define USE_GetHeads_LOCAL_CRC\r
365\r
366#ifdef USE_GetHeads_LOCAL_CRC\r
367\r
368GetHeads_DECL(4)\r
369{\r
370 UInt32 crc0[256];\r
371 UInt32 crc1[256];\r
372 {\r
373 unsigned i;\r
374 for (i = 0; i < 256; i++)\r
375 {\r
376 UInt32 v = crc[i];\r
377 crc0[i] = v & hashMask;\r
378 crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;\r
379 // crc1[i] = rotlFixed(v, 8) & hashMask;\r
380 }\r
381 }\r
382 GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1))\r
383}\r
384\r
385GetHeads_DECL(4b)\r
386{\r
387 UInt32 crc0[256];\r
388 {\r
389 unsigned i;\r
390 for (i = 0; i < 256; i++)\r
391 crc0[i] = crc[i] & hashMask;\r
392 }\r
393 GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p))\r
394}\r
395\r
396GetHeads_DECL(5)\r
397{\r
398 UInt32 crc0[256];\r
399 UInt32 crc1[256];\r
400 UInt32 crc2[256];\r
401 {\r
402 unsigned i;\r
403 for (i = 0; i < 256; i++)\r
404 {\r
405 UInt32 v = crc[i];\r
406 crc0[i] = v & hashMask;\r
407 crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;\r
408 crc2[i] = (v << kLzHash_CrcShift_2) & hashMask;\r
409 }\r
410 }\r
411 GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1))\r
412}\r
413\r
414GetHeads_DECL(5b)\r
415{\r
416 UInt32 crc0[256];\r
417 UInt32 crc1[256];\r
418 {\r
419 unsigned i;\r
420 for (i = 0; i < 256; i++)\r
421 {\r
422 UInt32 v = crc[i];\r
423 crc0[i] = v & hashMask;\r
424 crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;\r
425 }\r
426 }\r
427 GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p))\r
428}\r
429\r
430#else\r
431\r
432DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask)\r
433DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask)\r
434DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask)\r
435DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask)\r
436\r
437#endif\r
438 \r
439\r
440static void HashThreadFunc(CMatchFinderMt *mt)\r
441{\r
442 CMtSync *p = &mt->hashSync;\r
443 PRF(printf("\nHashThreadFunc\n"));\r
444 \r
445 for (;;)\r
446 {\r
447 UInt32 blockIndex = 0;\r
448 PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n"));\r
449 Event_Wait(&p->canStart);\r
450 PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n"));\r
451 if (p->exit)\r
452 {\r
453 PRF(printf("\nHashThreadFunc : exit \n"));\r
454 return;\r
455 }\r
456\r
457 MatchFinder_Init_HighHash(MF(mt));\r
458\r
459 for (;;)\r
460 {\r
461 PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos));\r
462\r
463 {\r
464 CMatchFinder *mf = MF(mt);\r
465 if (MatchFinder_NeedMove(mf))\r
466 {\r
467 CriticalSection_Enter(&mt->btSync.cs);\r
468 CriticalSection_Enter(&mt->hashSync.cs);\r
469 {\r
470 const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf);\r
471 ptrdiff_t offset;\r
472 MatchFinder_MoveBlock(mf);\r
473 offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf);\r
474 mt->pointerToCurPos -= offset;\r
475 mt->buffer -= offset;\r
476 }\r
477 CriticalSection_Leave(&mt->hashSync.cs);\r
478 CriticalSection_Leave(&mt->btSync.cs);\r
479 continue;\r
480 }\r
481\r
482 Semaphore_Wait(&p->freeSemaphore);\r
483\r
484 if (p->exit) // exit is unexpected here. But we check it here for some failure case\r
485 return;\r
486\r
487 // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)\r
488 if (p->stopWriting)\r
489 break;\r
490\r
491 MatchFinder_ReadIfRequired(mf);\r
492 {\r
493 UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++);\r
494 UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf);\r
495 heads[0] = 2;\r
496 heads[1] = num;\r
497\r
498 /* heads[1] contains the number of avail bytes:\r
499 if (avail < mf->numHashBytes) :\r
500 {\r
501 it means that stream was finished\r
502 HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes.\r
503 HASH_THREAD doesn't stop,\r
504 HASH_THREAD fills only the header (2 numbers) for all next blocks:\r
505 {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0}\r
506 }\r
507 else\r
508 {\r
509 HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes;\r
510 }\r
511 */\r
512\r
513 if (num >= mf->numHashBytes)\r
514 {\r
515 num = num - mf->numHashBytes + 1;\r
516 if (num > kMtHashBlockSize - 2)\r
517 num = kMtHashBlockSize - 2;\r
518\r
519 if (mf->pos > (UInt32)kMtMaxValForNormalize - num)\r
520 {\r
521 const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);\r
522 Inline_MatchFinder_ReduceOffsets(mf, subValue);\r
523 MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);\r
524 }\r
525\r
526 heads[0] = 2 + num;\r
527 mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);\r
528 }\r
529\r
530 mf->pos += num; // wrap over zero is allowed at the end of stream\r
531 mf->buffer += num;\r
532 }\r
533 }\r
534\r
535 Semaphore_Release1(&p->filledSemaphore);\r
536 } // for() processing end\r
537\r
538 // p->numBlocks_Sent = blockIndex;\r
539 Event_Set(&p->wasStopped);\r
540 } // for() thread end\r
541}\r
542\r
543\r
544\r
545\r
546// ---------- BT THREAD ----------\r
547\r
548/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap.\r
549 here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */\r
550#define CYC_TO_POS_OFFSET 0\r
551// #define CYC_TO_POS_OFFSET 1 // for debug\r
552\r
553#define MFMT_GM_INLINE\r
554\r
555#ifdef MFMT_GM_INLINE\r
556\r
557/*\r
558 we use size_t for (pos) instead of UInt32\r
559 to eliminate "movsx" BUG in old MSVC x64 compiler.\r
560*/\r
561\r
562\r
563UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,\r
564 UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,\r
565 size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,\r
566 UInt32 *posRes);\r
567\r
568#endif\r
569\r
570\r
571static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)\r
572{\r
573 UInt32 numProcessed = 0;\r
574 UInt32 curPos = 2;\r
575 \r
576 /* GetMatchesSpec() functions don't create (len = 1)\r
577 in [len, dist] match pairs, if (p->numHashBytes >= 2)\r
578 Also we suppose here that (matchMaxLen >= 2).\r
579 So the following code for (reserve) is not required\r
580 UInt32 reserve = (p->matchMaxLen * 2);\r
581 const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX\r
582 if (reserve < kNumHashBytes_Max - 1)\r
583 reserve = kNumHashBytes_Max - 1;\r
584 const UInt32 limit = kMtBtBlockSize - (reserve);\r
585 */\r
586\r
587 const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);\r
588\r
589 d[1] = p->hashNumAvail;\r
590\r
591 if (p->failure_BT)\r
592 {\r
593 // printf("\n == 1 BtGetMatches() p->failure_BT\n");\r
594 d[0] = 0;\r
595 // d[1] = 0;\r
596 return;\r
597 }\r
598 \r
599 while (curPos < limit)\r
600 {\r
601 if (p->hashBufPos == p->hashBufPosLimit)\r
602 {\r
603 // MatchFinderMt_GetNextBlock_Hash(p);\r
604 UInt32 avail;\r
605 {\r
606 const UInt32 bi = MtSync_GetNextBlock(&p->hashSync);\r
607 const UInt32 k = GET_HASH_BLOCK_OFFSET(bi);\r
608 const UInt32 *h = p->hashBuf + k;\r
609 avail = h[1];\r
610 p->hashBufPosLimit = k + h[0];\r
611 p->hashNumAvail = avail;\r
612 p->hashBufPos = k + 2;\r
613 }\r
614\r
615 {\r
616 /* we must prevent UInt32 overflow for avail total value,\r
617 if avail was increased with new hash block */\r
618 UInt32 availSum = numProcessed + avail;\r
619 if (availSum < numProcessed)\r
620 availSum = (UInt32)(Int32)-1;\r
621 d[1] = availSum;\r
622 }\r
623\r
624 if (avail >= p->numHashBytes)\r
625 continue;\r
626\r
627 // if (p->hashBufPos != p->hashBufPosLimit) exit(1);\r
628\r
629 /* (avail < p->numHashBytes)\r
630 It means that stream was finished.\r
631 And (avail) - is a number of remaining bytes,\r
632 we fill (d) for (avail) bytes for LZ_THREAD (receiver).\r
633 but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */\r
634\r
635 /* here we suppose that we have space enough:\r
636 (kMtBtBlockSize - curPos >= p->hashNumAvail) */\r
637 p->hashNumAvail = 0;\r
638 d[0] = curPos + avail;\r
639 d += curPos;\r
640 for (; avail != 0; avail--)\r
641 *d++ = 0;\r
642 return;\r
643 }\r
644 {\r
645 UInt32 size = p->hashBufPosLimit - p->hashBufPos;\r
646 UInt32 pos = p->pos;\r
647 UInt32 cyclicBufferPos = p->cyclicBufferPos;\r
648 UInt32 lenLimit = p->matchMaxLen;\r
649 if (lenLimit >= p->hashNumAvail)\r
650 lenLimit = p->hashNumAvail;\r
651 {\r
652 UInt32 size2 = p->hashNumAvail - lenLimit + 1;\r
653 if (size2 < size)\r
654 size = size2;\r
655 size2 = p->cyclicBufferSize - cyclicBufferPos;\r
656 if (size2 < size)\r
657 size = size2;\r
658 }\r
659 \r
660 if (pos > (UInt32)kMtMaxValForNormalize - size)\r
661 {\r
662 const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1);\r
663 pos -= subValue;\r
664 p->pos = pos;\r
665 MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);\r
666 }\r
667\r
668 #ifndef MFMT_GM_INLINE\r
669 while (curPos < limit && size-- != 0)\r
670 {\r
671 UInt32 *startDistances = d + curPos;\r
672 UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],\r
673 pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,\r
674 startDistances + 1, p->numHashBytes - 1) - startDistances);\r
675 *startDistances = num - 1;\r
676 curPos += num;\r
677 cyclicBufferPos++;\r
678 pos++;\r
679 p->buffer++;\r
680 }\r
681 #else\r
682 {\r
683 UInt32 posRes = pos;\r
684 const UInt32 *d_end;\r
685 {\r
686 d_end = GetMatchesSpecN_2(\r
687 p->buffer + lenLimit - 1,\r
688 pos, p->buffer, p->son, p->cutValue, d + curPos,\r
689 p->numHashBytes - 1, p->hashBuf + p->hashBufPos,\r
690 d + limit, p->hashBuf + p->hashBufPos + size,\r
691 cyclicBufferPos, p->cyclicBufferSize,\r
692 &posRes);\r
693 }\r
694 {\r
695 if (!d_end)\r
696 {\r
697 // printf("\n == 2 BtGetMatches() p->failure_BT\n");\r
698 // internal data failure\r
699 p->failure_BT = True;\r
700 d[0] = 0;\r
701 // d[1] = 0;\r
702 return;\r
703 }\r
704 }\r
705 curPos = (UInt32)(d_end - d);\r
706 {\r
707 const UInt32 processed = posRes - pos;\r
708 pos = posRes;\r
709 p->hashBufPos += processed;\r
710 cyclicBufferPos += processed;\r
711 p->buffer += processed;\r
712 }\r
713 }\r
714 #endif\r
715\r
716 {\r
717 const UInt32 processed = pos - p->pos;\r
718 numProcessed += processed;\r
719 p->hashNumAvail -= processed;\r
720 p->pos = pos;\r
721 }\r
722 if (cyclicBufferPos == p->cyclicBufferSize)\r
723 cyclicBufferPos = 0;\r
724 p->cyclicBufferPos = cyclicBufferPos;\r
725 }\r
726 }\r
727 \r
728 d[0] = curPos;\r
729}\r
730\r
731\r
732static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)\r
733{\r
734 CMtSync *sync = &p->hashSync;\r
735 \r
736 BUFFER_MUST_BE_UNLOCKED(sync)\r
737 \r
738 if (!sync->needStart)\r
739 {\r
740 LOCK_BUFFER(sync)\r
741 }\r
742 \r
743 BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex));\r
744 \r
745 /* We suppose that we have called GetNextBlock() from start.\r
746 So buffer is LOCKED */\r
747\r
748 UNLOCK_BUFFER(sync)\r
749}\r
750\r
751\r
752MY_NO_INLINE\r
753static void BtThreadFunc(CMatchFinderMt *mt)\r
754{\r
755 CMtSync *p = &mt->btSync;\r
756 for (;;)\r
757 {\r
758 UInt32 blockIndex = 0;\r
759 Event_Wait(&p->canStart);\r
760\r
761 for (;;)\r
762 {\r
763 PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos));\r
764 /* (p->exit == true) is possible after (p->canStart) at first loop iteration\r
765 and is unexpected after more Wait(freeSemaphore) iterations */\r
766 if (p->exit)\r
767 return;\r
768\r
769 Semaphore_Wait(&p->freeSemaphore);\r
770 \r
771 // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)\r
772 if (p->stopWriting)\r
773 break;\r
774\r
775 BtFillBlock(mt, blockIndex++);\r
776 \r
777 Semaphore_Release1(&p->filledSemaphore);\r
778 }\r
779\r
780 // we stop HASH_THREAD here\r
781 MtSync_StopWriting(&mt->hashSync);\r
782\r
783 // p->numBlocks_Sent = blockIndex;\r
784 Event_Set(&p->wasStopped);\r
785 }\r
786}\r
787\r
788\r
789void MatchFinderMt_Construct(CMatchFinderMt *p)\r
790{\r
791 p->hashBuf = NULL;\r
792 MtSync_Construct(&p->hashSync);\r
793 MtSync_Construct(&p->btSync);\r
794}\r
795\r
796static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)\r
797{\r
798 ISzAlloc_Free(alloc, p->hashBuf);\r
799 p->hashBuf = NULL;\r
800}\r
801\r
802void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)\r
803{\r
804 /*\r
805 HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs.\r
806 So we must be sure that HASH_THREAD will not use CriticalSection(s)\r
807 after deleting CriticalSection here.\r
808\r
809 we call ReleaseStream(p)\r
810 that calls StopWriting(btSync)\r
811 that calls StopWriting(hashSync), if it's required to stop HASH_THREAD.\r
812 after StopWriting() it's safe to destruct MtSync(s) in any order */\r
813\r
814 MatchFinderMt_ReleaseStream(p);\r
815\r
816 MtSync_Destruct(&p->btSync);\r
817 MtSync_Destruct(&p->hashSync);\r
818\r
819 LOG_ITER(\r
820 printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n",\r
821 (UInt32)(g_NumIters_Tree / 1000),\r
822 (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),\r
823 (UInt32)(g_NumIters_Loop / 1000),\r
824 (UInt32)(g_NumIters_Bytes / 1000)\r
825 ));\r
826\r
827 MatchFinderMt_FreeMem(p, alloc);\r
828}\r
829\r
830\r
831#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)\r
832#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)\r
833\r
834\r
835static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; }\r
836static THREAD_FUNC_DECL BtThreadFunc2(void *p)\r
837{\r
838 Byte allocaDummy[0x180];\r
839 unsigned i = 0;\r
840 for (i = 0; i < 16; i++)\r
841 allocaDummy[i] = (Byte)0;\r
842 if (allocaDummy[0] == 0)\r
843 BtThreadFunc((CMatchFinderMt *)p);\r
844 return 0;\r
845}\r
846\r
847\r
848SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,\r
849 UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)\r
850{\r
851 CMatchFinder *mf = MF(p);\r
852 p->historySize = historySize;\r
853 if (kMtBtBlockSize <= matchMaxLen * 4)\r
854 return SZ_ERROR_PARAM;\r
855 if (!p->hashBuf)\r
856 {\r
857 p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32));\r
858 if (!p->hashBuf)\r
859 return SZ_ERROR_MEM;\r
860 p->btBuf = p->hashBuf + kHashBufferSize;\r
861 }\r
862 keepAddBufferBefore += (kHashBufferSize + kBtBufferSize);\r
863 keepAddBufferAfter += kMtHashBlockSize;\r
864 if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))\r
865 return SZ_ERROR_MEM;\r
866\r
867 RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p));\r
868 RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p));\r
869 return SZ_OK;\r
870}\r
871\r
872\r
873SRes MatchFinderMt_InitMt(CMatchFinderMt *p)\r
874{\r
875 RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks));\r
876 return MtSync_Init(&p->btSync, kMtBtNumBlocks);\r
877}\r
878\r
879\r
880static void MatchFinderMt_Init(CMatchFinderMt *p)\r
881{\r
882 CMatchFinder *mf = MF(p);\r
883 \r
884 p->btBufPos =\r
885 p->btBufPosLimit = NULL;\r
886 p->hashBufPos =\r
887 p->hashBufPosLimit = 0;\r
888 p->hashNumAvail = 0; // 21.03\r
889 \r
890 p->failure_BT = False;\r
891\r
892 /* Init without data reading. We don't want to read data in this thread */\r
893 MatchFinder_Init_4(mf);\r
894\r
895 MatchFinder_Init_LowHash(mf);\r
896 \r
897 p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);\r
898 p->btNumAvailBytes = 0;\r
899 p->failure_LZ_BT = False;\r
900 // p->failure_LZ_LZ = False;\r
901 \r
902 p->lzPos =\r
903 1; // optimal smallest value\r
904 // 0; // for debug: ignores match to start\r
905 // kNormalizeAlign; // for debug\r
906\r
907 p->hash = mf->hash;\r
908 p->fixedHashSize = mf->fixedHashSize;\r
909 // p->hash4Mask = mf->hash4Mask;\r
910 p->crc = mf->crc;\r
911 // memcpy(p->crc, mf->crc, sizeof(mf->crc));\r
912\r
913 p->son = mf->son;\r
914 p->matchMaxLen = mf->matchMaxLen;\r
915 p->numHashBytes = mf->numHashBytes;\r
916 \r
917 /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */\r
918 // mf->streamPos = mf->pos = 1; // optimal smallest value\r
919 // 0; // for debug: ignores match to start\r
920 // kNormalizeAlign; // for debug\r
921\r
922 /* we must init (p->pos = mf->pos) for BT, because\r
923 BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */\r
924 p->pos = mf->pos; // do not change it\r
925 \r
926 p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET);\r
927 p->cyclicBufferSize = mf->cyclicBufferSize;\r
928 p->buffer = mf->buffer;\r
929 p->cutValue = mf->cutValue;\r
930 // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses.\r
931}\r
932\r
933\r
934/* ReleaseStream is required to finish multithreading */\r
935void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)\r
936{\r
937 // Sleep(1); // for debug\r
938 MtSync_StopWriting(&p->btSync);\r
939 // Sleep(200); // for debug\r
940 /* p->MatchFinder->ReleaseStream(); */\r
941}\r
942\r
943\r
944MY_NO_INLINE\r
945static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)\r
946{\r
947 if (p->failure_LZ_BT)\r
948 p->btBufPos = p->failureBuf;\r
949 else\r
950 {\r
951 const UInt32 bi = MtSync_GetNextBlock(&p->btSync);\r
952 const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi);\r
953 {\r
954 const UInt32 numItems = bt[0];\r
955 p->btBufPosLimit = bt + numItems;\r
956 p->btNumAvailBytes = bt[1];\r
957 p->btBufPos = bt + 2;\r
958 if (numItems < 2 || numItems > kMtBtBlockSize)\r
959 {\r
960 p->failureBuf[0] = 0;\r
961 p->btBufPos = p->failureBuf;\r
962 p->btBufPosLimit = p->failureBuf + 1;\r
963 p->failure_LZ_BT = True;\r
964 // p->btNumAvailBytes = 0;\r
965 /* we don't want to decrease AvailBytes, that was load before.\r
966 that can be unxepected for the code that have loaded anopther value before */\r
967 }\r
968 }\r
969 \r
970 if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize)\r
971 {\r
972 /* we don't check (lzPos) over exact avail bytes in (btBuf).\r
973 (fixedHashSize) is small, so normalization is fast */\r
974 const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);\r
975 p->lzPos -= subValue;\r
976 MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize);\r
977 }\r
978 }\r
979 return p->btNumAvailBytes;\r
980}\r
981\r
982\r
983\r
984static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)\r
985{\r
986 return p->pointerToCurPos;\r
987}\r
988\r
989\r
990#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);\r
991\r
992\r
993static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)\r
994{\r
995 if (p->btBufPos != p->btBufPosLimit)\r
996 return p->btNumAvailBytes;\r
997 return MatchFinderMt_GetNextBlock_Bt(p);\r
998}\r
999\r
1000\r
1001// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; }\r
1002#define CHECK_FAILURE_LZ(_match_, _pos_)\r
1003\r
1004static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)\r
1005{\r
1006 UInt32 h2, c2;\r
1007 UInt32 *hash = p->hash;\r
1008 const Byte *cur = p->pointerToCurPos;\r
1009 const UInt32 m = p->lzPos;\r
1010 MT_HASH2_CALC\r
1011 \r
1012 c2 = hash[h2];\r
1013 hash[h2] = m;\r
1014\r
1015 if (c2 >= matchMinPos)\r
1016 {\r
1017 CHECK_FAILURE_LZ(c2, m)\r
1018 if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])\r
1019 {\r
1020 *d++ = 2;\r
1021 *d++ = m - c2 - 1;\r
1022 }\r
1023 }\r
1024 \r
1025 return d;\r
1026}\r
1027\r
1028static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)\r
1029{\r
1030 UInt32 h2, h3, c2, c3;\r
1031 UInt32 *hash = p->hash;\r
1032 const Byte *cur = p->pointerToCurPos;\r
1033 const UInt32 m = p->lzPos;\r
1034 MT_HASH3_CALC\r
1035\r
1036 c2 = hash[h2];\r
1037 c3 = (hash + kFix3HashSize)[h3];\r
1038 \r
1039 hash[h2] = m;\r
1040 (hash + kFix3HashSize)[h3] = m;\r
1041\r
1042 if (c2 >= matchMinPos)\r
1043 {\r
1044 CHECK_FAILURE_LZ(c2, m)\r
1045 if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])\r
1046 {\r
1047 d[1] = m - c2 - 1;\r
1048 if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])\r
1049 {\r
1050 d[0] = 3;\r
1051 return d + 2;\r
1052 }\r
1053 d[0] = 2;\r
1054 d += 2;\r
1055 }\r
1056 }\r
1057 \r
1058 if (c3 >= matchMinPos)\r
1059 {\r
1060 CHECK_FAILURE_LZ(c3, m)\r
1061 if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])\r
1062 {\r
1063 *d++ = 3;\r
1064 *d++ = m - c3 - 1;\r
1065 }\r
1066 }\r
1067 \r
1068 return d;\r
1069}\r
1070\r
1071\r
1072#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;\r
1073\r
1074/*\r
1075static\r
1076UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)\r
1077{\r
1078 const UInt32 *bt = p->btBufPos;\r
1079 const UInt32 len = *bt++;\r
1080 const UInt32 *btLim = bt + len;\r
1081 UInt32 matchMinPos;\r
1082 UInt32 avail = p->btNumAvailBytes - 1;\r
1083 p->btBufPos = btLim;\r
1084\r
1085 {\r
1086 p->btNumAvailBytes = avail;\r
1087\r
1088 #define BT_HASH_BYTES_MAX 5\r
1089 \r
1090 matchMinPos = p->lzPos;\r
1091\r
1092 if (len != 0)\r
1093 matchMinPos -= bt[1];\r
1094 else if (avail < (BT_HASH_BYTES_MAX - 1) - 1)\r
1095 {\r
1096 INCREASE_LZ_POS\r
1097 return d;\r
1098 }\r
1099 else\r
1100 {\r
1101 const UInt32 hs = p->historySize;\r
1102 if (matchMinPos > hs)\r
1103 matchMinPos -= hs;\r
1104 else\r
1105 matchMinPos = 1;\r
1106 }\r
1107 }\r
1108\r
1109 for (;;)\r
1110 {\r
1111 \r
1112 UInt32 h2, h3, c2, c3;\r
1113 UInt32 *hash = p->hash;\r
1114 const Byte *cur = p->pointerToCurPos;\r
1115 UInt32 m = p->lzPos;\r
1116 MT_HASH3_CALC\r
1117\r
1118 c2 = hash[h2];\r
1119 c3 = (hash + kFix3HashSize)[h3];\r
1120 \r
1121 hash[h2] = m;\r
1122 (hash + kFix3HashSize)[h3] = m;\r
1123\r
1124 if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])\r
1125 {\r
1126 d[1] = m - c2 - 1;\r
1127 if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])\r
1128 {\r
1129 d[0] = 3;\r
1130 d += 2;\r
1131 break;\r
1132 }\r
1133 // else\r
1134 {\r
1135 d[0] = 2;\r
1136 d += 2;\r
1137 }\r
1138 }\r
1139 if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])\r
1140 {\r
1141 *d++ = 3;\r
1142 *d++ = m - c3 - 1;\r
1143 }\r
1144 break;\r
1145 }\r
1146\r
1147 if (len != 0)\r
1148 {\r
1149 do\r
1150 {\r
1151 const UInt32 v0 = bt[0];\r
1152 const UInt32 v1 = bt[1];\r
1153 bt += 2;\r
1154 d[0] = v0;\r
1155 d[1] = v1;\r
1156 d += 2;\r
1157 }\r
1158 while (bt != btLim);\r
1159 }\r
1160 INCREASE_LZ_POS\r
1161 return d;\r
1162}\r
1163*/\r
1164\r
1165\r
1166static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)\r
1167{\r
1168 UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;\r
1169 UInt32 *hash = p->hash;\r
1170 const Byte *cur = p->pointerToCurPos;\r
1171 const UInt32 m = p->lzPos;\r
1172 MT_HASH3_CALC\r
1173 // MT_HASH4_CALC\r
1174 c2 = hash[h2];\r
1175 c3 = (hash + kFix3HashSize)[h3];\r
1176 // c4 = (hash + kFix4HashSize)[h4];\r
1177 \r
1178 hash[h2] = m;\r
1179 (hash + kFix3HashSize)[h3] = m;\r
1180 // (hash + kFix4HashSize)[h4] = m;\r
1181\r
1182 #define _USE_H2\r
1183\r
1184 #ifdef _USE_H2\r
1185 if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])\r
1186 {\r
1187 d[1] = m - c2 - 1;\r
1188 if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])\r
1189 {\r
1190 // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3;\r
1191 // return d + 2;\r
1192\r
1193 if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3])\r
1194 {\r
1195 d[0] = 4;\r
1196 return d + 2;\r
1197 }\r
1198 d[0] = 3;\r
1199 d += 2;\r
1200 \r
1201 #ifdef _USE_H4\r
1202 if (c4 >= matchMinPos)\r
1203 if (\r
1204 cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&\r
1205 cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]\r
1206 )\r
1207 {\r
1208 *d++ = 4;\r
1209 *d++ = m - c4 - 1;\r
1210 }\r
1211 #endif\r
1212 return d;\r
1213 }\r
1214 d[0] = 2;\r
1215 d += 2;\r
1216 }\r
1217 #endif\r
1218 \r
1219 if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])\r
1220 {\r
1221 d[1] = m - c3 - 1;\r
1222 if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3])\r
1223 {\r
1224 d[0] = 4;\r
1225 return d + 2;\r
1226 }\r
1227 d[0] = 3;\r
1228 d += 2;\r
1229 }\r
1230\r
1231 #ifdef _USE_H4\r
1232 if (c4 >= matchMinPos)\r
1233 if (\r
1234 cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&\r
1235 cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]\r
1236 )\r
1237 {\r
1238 *d++ = 4;\r
1239 *d++ = m - c4 - 1;\r
1240 }\r
1241 #endif\r
1242 \r
1243 return d;\r
1244}\r
1245\r
1246\r
1247static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)\r
1248{\r
1249 const UInt32 *bt = p->btBufPos;\r
1250 const UInt32 len = *bt++;\r
1251 const UInt32 *btLim = bt + len;\r
1252 p->btBufPos = btLim;\r
1253 p->btNumAvailBytes--;\r
1254 INCREASE_LZ_POS\r
1255 {\r
1256 while (bt != btLim)\r
1257 {\r
1258 const UInt32 v0 = bt[0];\r
1259 const UInt32 v1 = bt[1];\r
1260 bt += 2;\r
1261 d[0] = v0;\r
1262 d[1] = v1;\r
1263 d += 2;\r
1264 }\r
1265 }\r
1266 return d;\r
1267}\r
1268\r
1269\r
1270\r
1271static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)\r
1272{\r
1273 const UInt32 *bt = p->btBufPos;\r
1274 UInt32 len = *bt++;\r
1275 const UInt32 avail = p->btNumAvailBytes - 1;\r
1276 p->btNumAvailBytes = avail;\r
1277 p->btBufPos = bt + len;\r
1278 if (len == 0)\r
1279 {\r
1280 #define BT_HASH_BYTES_MAX 5\r
1281 if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)\r
1282 {\r
1283 UInt32 m = p->lzPos;\r
1284 if (m > p->historySize)\r
1285 m -= p->historySize;\r
1286 else\r
1287 m = 1;\r
1288 d = p->MixMatchesFunc(p, m, d);\r
1289 }\r
1290 }\r
1291 else\r
1292 {\r
1293 /*\r
1294 first match pair from BinTree: (match_len, match_dist),\r
1295 (match_len >= numHashBytes).\r
1296 MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)\r
1297 */\r
1298 d = p->MixMatchesFunc(p, p->lzPos - bt[1], d);\r
1299 // if (d) // check for failure\r
1300 do\r
1301 {\r
1302 const UInt32 v0 = bt[0];\r
1303 const UInt32 v1 = bt[1];\r
1304 bt += 2;\r
1305 d[0] = v0;\r
1306 d[1] = v1;\r
1307 d += 2;\r
1308 }\r
1309 while (len -= 2);\r
1310 }\r
1311 INCREASE_LZ_POS\r
1312 return d;\r
1313}\r
1314\r
1315#define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED\r
1316#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;\r
1317#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);\r
1318\r
1319static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)\r
1320{\r
1321 SKIP_HEADER2_MT { p->btNumAvailBytes--;\r
1322 SKIP_FOOTER_MT\r
1323}\r
1324\r
1325static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)\r
1326{\r
1327 SKIP_HEADER_MT(2)\r
1328 UInt32 h2;\r
1329 MT_HASH2_CALC\r
1330 hash[h2] = p->lzPos;\r
1331 SKIP_FOOTER_MT\r
1332}\r
1333\r
1334static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)\r
1335{\r
1336 SKIP_HEADER_MT(3)\r
1337 UInt32 h2, h3;\r
1338 MT_HASH3_CALC\r
1339 (hash + kFix3HashSize)[h3] =\r
1340 hash[ h2] =\r
1341 p->lzPos;\r
1342 SKIP_FOOTER_MT\r
1343}\r
1344\r
1345/*\r
1346// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip().\r
1347// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream.\r
1348\r
1349static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)\r
1350{\r
1351 SKIP_HEADER_MT(4)\r
1352 UInt32 h2, h3; // h4\r
1353 MT_HASH3_CALC\r
1354 // MT_HASH4_CALC\r
1355 // (hash + kFix4HashSize)[h4] =\r
1356 (hash + kFix3HashSize)[h3] =\r
1357 hash[ h2] =\r
1358 p->lzPos;\r
1359 SKIP_FOOTER_MT\r
1360}\r
1361*/\r
1362\r
1363void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)\r
1364{\r
1365 vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;\r
1366 vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;\r
1367 vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;\r
1368 vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;\r
1369 \r
1370 switch (MF(p)->numHashBytes)\r
1371 {\r
1372 case 2:\r
1373 p->GetHeadsFunc = GetHeads2;\r
1374 p->MixMatchesFunc = (Mf_Mix_Matches)NULL;\r
1375 vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;\r
1376 vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;\r
1377 break;\r
1378 case 3:\r
1379 p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;\r
1380 p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;\r
1381 vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;\r
1382 break;\r
1383 case 4:\r
1384 p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;\r
1385\r
1386 // it's fast inline version of GetMatches()\r
1387 // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;\r
1388\r
1389 p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;\r
1390 vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;\r
1391 break;\r
1392 default:\r
1393 p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;\r
1394 p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;\r
1395 vTable->Skip =\r
1396 (Mf_Skip_Func)MatchFinderMt3_Skip;\r
1397 // (Mf_Skip_Func)MatchFinderMt4_Skip;\r
1398 break;\r
1399 }\r
1400}\r