9e052883 |
1 | /* LzFindMt.c -- multithreaded Match finder for LZ algorithms\r |
2 | 2021-12-21 : Igor Pavlov : Public domain */\r |
3 | \r |
4 | #include "Precomp.h"\r |
5 | \r |
6 | // #include <stdio.h>\r |
7 | \r |
8 | #include "CpuArch.h"\r |
9 | \r |
10 | #include "LzHash.h"\r |
11 | #include "LzFindMt.h"\r |
12 | \r |
13 | // #define LOG_ITERS\r |
14 | \r |
15 | // #define LOG_THREAD\r |
16 | \r |
17 | #ifdef LOG_THREAD\r |
18 | #include <stdio.h>\r |
19 | #define PRF(x) x\r |
20 | #else\r |
21 | #define PRF(x)\r |
22 | #endif\r |
23 | \r |
24 | #ifdef LOG_ITERS\r |
25 | #include <stdio.h>\r |
26 | extern UInt64 g_NumIters_Tree;\r |
27 | extern UInt64 g_NumIters_Loop;\r |
28 | extern UInt64 g_NumIters_Bytes;\r |
29 | #define LOG_ITER(x) x\r |
30 | #else\r |
31 | #define LOG_ITER(x)\r |
32 | #endif\r |
33 | \r |
34 | #define kMtHashBlockSize ((UInt32)1 << 17)\r |
35 | #define kMtHashNumBlocks (1 << 1)\r |
36 | \r |
37 | #define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize)\r |
38 | \r |
39 | #define kMtBtBlockSize ((UInt32)1 << 16)\r |
40 | #define kMtBtNumBlocks (1 << 4)\r |
41 | \r |
42 | #define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize)\r |
43 | \r |
44 | /*\r |
45 | HASH functions:\r |
46 | We use raw 8/16 bits from a[1] and a[2],\r |
47 | xored with crc(a[0]) and crc(a[3]).\r |
48 | We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches.\r |
49 | our crc() function provides one-to-one correspondence for low 8-bit values:\r |
50 | (crc[0...0xFF] & 0xFF) <-> [0...0xFF]\r |
51 | */\r |
52 | \r |
53 | #define MF(mt) ((mt)->MatchFinder)\r |
54 | #define MF_CRC (p->crc)\r |
55 | \r |
56 | // #define MF(mt) (&(mt)->MatchFinder)\r |
57 | // #define MF_CRC (p->MatchFinder.crc)\r |
58 | \r |
59 | #define MT_HASH2_CALC \\r |
60 | h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1);\r |
61 | \r |
62 | #define MT_HASH3_CALC { \\r |
63 | UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \\r |
64 | h2 = temp & (kHash2Size - 1); \\r |
65 | h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }\r |
66 | \r |
67 | /*\r |
68 | #define MT_HASH3_CALC__NO_2 { \\r |
69 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \\r |
70 | h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }\r |
71 | \r |
72 | #define __MT_HASH4_CALC { \\r |
73 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \\r |
74 | h2 = temp & (kHash2Size - 1); \\r |
75 | temp ^= ((UInt32)cur[2] << 8); \\r |
76 | h3 = temp & (kHash3Size - 1); \\r |
77 | h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; }\r |
78 | // (kHash4Size - 1);\r |
79 | */\r |
80 | \r |
81 | \r |
82 | MY_NO_INLINE\r |
83 | static void MtSync_Construct(CMtSync *p)\r |
84 | {\r |
85 | p->affinity = 0;\r |
86 | p->wasCreated = False;\r |
87 | p->csWasInitialized = False;\r |
88 | p->csWasEntered = False;\r |
89 | Thread_Construct(&p->thread);\r |
90 | Event_Construct(&p->canStart);\r |
91 | Event_Construct(&p->wasStopped);\r |
92 | Semaphore_Construct(&p->freeSemaphore);\r |
93 | Semaphore_Construct(&p->filledSemaphore);\r |
94 | }\r |
95 | \r |
96 | \r |
97 | #define DEBUG_BUFFER_LOCK // define it to debug lock state\r |
98 | \r |
99 | #ifdef DEBUG_BUFFER_LOCK\r |
100 | #include <stdlib.h>\r |
101 | #define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1);\r |
102 | #define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1);\r |
103 | #else\r |
104 | #define BUFFER_MUST_BE_LOCKED(p)\r |
105 | #define BUFFER_MUST_BE_UNLOCKED(p)\r |
106 | #endif\r |
107 | \r |
108 | #define LOCK_BUFFER(p) { \\r |
109 | BUFFER_MUST_BE_UNLOCKED(p); \\r |
110 | CriticalSection_Enter(&(p)->cs); \\r |
111 | (p)->csWasEntered = True; }\r |
112 | \r |
113 | #define UNLOCK_BUFFER(p) { \\r |
114 | BUFFER_MUST_BE_LOCKED(p); \\r |
115 | CriticalSection_Leave(&(p)->cs); \\r |
116 | (p)->csWasEntered = False; }\r |
117 | \r |
118 | \r |
119 | MY_NO_INLINE\r |
120 | static UInt32 MtSync_GetNextBlock(CMtSync *p)\r |
121 | {\r |
122 | UInt32 numBlocks = 0;\r |
123 | if (p->needStart)\r |
124 | {\r |
125 | BUFFER_MUST_BE_UNLOCKED(p)\r |
126 | p->numProcessedBlocks = 1;\r |
127 | p->needStart = False;\r |
128 | p->stopWriting = False;\r |
129 | p->exit = False;\r |
130 | Event_Reset(&p->wasStopped);\r |
131 | Event_Set(&p->canStart);\r |
132 | }\r |
133 | else\r |
134 | {\r |
135 | UNLOCK_BUFFER(p)\r |
136 | // we free current block\r |
137 | numBlocks = p->numProcessedBlocks++;\r |
138 | Semaphore_Release1(&p->freeSemaphore);\r |
139 | }\r |
140 | \r |
141 | // buffer is UNLOCKED here\r |
142 | Semaphore_Wait(&p->filledSemaphore);\r |
143 | LOCK_BUFFER(p);\r |
144 | return numBlocks;\r |
145 | }\r |
146 | \r |
147 | \r |
148 | /* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */\r |
149 | \r |
150 | MY_NO_INLINE\r |
151 | static void MtSync_StopWriting(CMtSync *p)\r |
152 | {\r |
153 | if (!Thread_WasCreated(&p->thread) || p->needStart)\r |
154 | return;\r |
155 | \r |
156 | PRF(printf("\nMtSync_StopWriting %p\n", p));\r |
157 | \r |
158 | if (p->csWasEntered)\r |
159 | {\r |
160 | /* we don't use buffer in this thread after StopWriting().\r |
161 | So we UNLOCK buffer.\r |
162 | And we restore default UNLOCKED state for stopped thread */\r |
163 | UNLOCK_BUFFER(p)\r |
164 | }\r |
165 | \r |
166 | /* We send (p->stopWriting) message and release freeSemaphore\r |
167 | to free current block.\r |
168 | So the thread will see (p->stopWriting) at some\r |
169 | iteration after Wait(freeSemaphore).\r |
170 | The thread doesn't need to fill all avail free blocks,\r |
171 | so we can get fast thread stop.\r |
172 | */\r |
173 | \r |
174 | p->stopWriting = True;\r |
175 | Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!!\r |
176 | \r |
177 | PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p));\r |
178 | Event_Wait(&p->wasStopped);\r |
179 | PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p));\r |
180 | \r |
181 | /* 21.03 : we don't restore samaphore counters here.\r |
182 | We will recreate and reinit samaphores in next start */\r |
183 | \r |
184 | p->needStart = True;\r |
185 | }\r |
186 | \r |
187 | \r |
188 | MY_NO_INLINE\r |
189 | static void MtSync_Destruct(CMtSync *p)\r |
190 | {\r |
191 | PRF(printf("\nMtSync_Destruct %p\n", p));\r |
192 | \r |
193 | if (Thread_WasCreated(&p->thread))\r |
194 | {\r |
195 | /* we want thread to be in Stopped state before sending EXIT command.\r |
196 | note: stop(btSync) will stop (htSync) also */\r |
197 | MtSync_StopWriting(p);\r |
198 | /* thread in Stopped state here : (p->needStart == true) */\r |
199 | p->exit = True;\r |
200 | // if (p->needStart) // it's (true)\r |
201 | Event_Set(&p->canStart); // we send EXIT command to thread\r |
202 | Thread_Wait_Close(&p->thread); // we wait thread finishing\r |
203 | }\r |
204 | \r |
205 | if (p->csWasInitialized)\r |
206 | {\r |
207 | CriticalSection_Delete(&p->cs);\r |
208 | p->csWasInitialized = False;\r |
209 | }\r |
210 | p->csWasEntered = False;\r |
211 | \r |
212 | Event_Close(&p->canStart);\r |
213 | Event_Close(&p->wasStopped);\r |
214 | Semaphore_Close(&p->freeSemaphore);\r |
215 | Semaphore_Close(&p->filledSemaphore);\r |
216 | \r |
217 | p->wasCreated = False;\r |
218 | }\r |
219 | \r |
220 | \r |
221 | // #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }\r |
222 | // we want to get real system error codes here instead of SZ_ERROR_THREAD\r |
223 | #define RINOK_THREAD(x) RINOK(x)\r |
224 | \r |
225 | \r |
226 | // call it before each new file (when new starting is required):\r |
227 | MY_NO_INLINE\r |
228 | static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)\r |
229 | {\r |
230 | WRes wres;\r |
231 | // BUFFER_MUST_BE_UNLOCKED(p)\r |
232 | if (!p->needStart || p->csWasEntered)\r |
233 | return SZ_ERROR_FAIL;\r |
234 | wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks);\r |
235 | if (wres == 0)\r |
236 | wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks);\r |
237 | return MY_SRes_HRESULT_FROM_WRes(wres);\r |
238 | }\r |
239 | \r |
240 | \r |
241 | static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)\r |
242 | {\r |
243 | WRes wres;\r |
244 | \r |
245 | if (p->wasCreated)\r |
246 | return SZ_OK;\r |
247 | \r |
248 | RINOK_THREAD(CriticalSection_Init(&p->cs));\r |
249 | p->csWasInitialized = True;\r |
250 | p->csWasEntered = False;\r |
251 | \r |
252 | RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart));\r |
253 | RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped));\r |
254 | \r |
255 | p->needStart = True;\r |
256 | p->exit = True; /* p->exit is unused before (canStart) Event.\r |
257 | But in case of some unexpected code failure we will get fast exit from thread */\r |
258 | \r |
259 | // return ERROR_TOO_MANY_POSTS; // for debug\r |
260 | // return EINVAL; // for debug\r |
261 | \r |
262 | if (p->affinity != 0)\r |
263 | wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);\r |
264 | else\r |
265 | wres = Thread_Create(&p->thread, startAddress, obj);\r |
266 | \r |
267 | RINOK_THREAD(wres);\r |
268 | p->wasCreated = True;\r |
269 | return SZ_OK;\r |
270 | }\r |
271 | \r |
272 | \r |
273 | MY_NO_INLINE\r |
274 | static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)\r |
275 | {\r |
276 | const WRes wres = MtSync_Create_WRes(p, startAddress, obj);\r |
277 | if (wres == 0)\r |
278 | return 0;\r |
279 | MtSync_Destruct(p);\r |
280 | return MY_SRes_HRESULT_FROM_WRes(wres);\r |
281 | }\r |
282 | \r |
283 | \r |
284 | // ---------- HASH THREAD ----------\r |
285 | \r |
286 | #define kMtMaxValForNormalize 0xFFFFFFFF\r |
287 | // #define kMtMaxValForNormalize ((1 << 21)) // for debug\r |
288 | // #define kNormalizeAlign (1 << 7) // alignment for speculated accesses\r |
289 | \r |
290 | #ifdef MY_CPU_LE_UNALIGN\r |
291 | #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)\r |
292 | #else\r |
293 | #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16))\r |
294 | #endif\r |
295 | \r |
296 | #define GetHeads_DECL(name) \\r |
297 | static void GetHeads ## name(const Byte *p, UInt32 pos, \\r |
298 | UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc)\r |
299 | \r |
300 | #define GetHeads_LOOP(v) \\r |
301 | for (; numHeads != 0; numHeads--) { \\r |
302 | const UInt32 value = (v); \\r |
303 | p++; \\r |
304 | *heads++ = pos - hash[value]; \\r |
305 | hash[value] = pos++; }\r |
306 | \r |
307 | #define DEF_GetHeads2(name, v, action) \\r |
308 | GetHeads_DECL(name) { action \\r |
309 | GetHeads_LOOP(v) }\r |
310 | \r |
311 | #define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)\r |
312 | \r |
313 | DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )\r |
314 | DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask)\r |
315 | DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )\r |
316 | // BT3 is not good for crc collisions for big hashMask values.\r |
317 | \r |
318 | /*\r |
319 | GetHeads_DECL(3b)\r |
320 | {\r |
321 | UNUSED_VAR(hashMask);\r |
322 | UNUSED_VAR(crc);\r |
323 | {\r |
324 | const Byte *pLim = p + numHeads;\r |
325 | if (numHeads == 0)\r |
326 | return;\r |
327 | pLim--;\r |
328 | while (p < pLim)\r |
329 | {\r |
330 | UInt32 v1 = GetUi32(p);\r |
331 | UInt32 v0 = v1 & 0xFFFFFF;\r |
332 | UInt32 h0, h1;\r |
333 | p += 2;\r |
334 | v1 >>= 8;\r |
335 | h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++;\r |
336 | h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++;\r |
337 | heads += 2;\r |
338 | }\r |
339 | if (p == pLim)\r |
340 | {\r |
341 | UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16);\r |
342 | *heads = pos - hash[v0];\r |
343 | hash[v0] = pos;\r |
344 | }\r |
345 | }\r |
346 | }\r |
347 | */\r |
348 | \r |
349 | /*\r |
350 | GetHeads_DECL(4)\r |
351 | {\r |
352 | unsigned sh = 0;\r |
353 | UNUSED_VAR(crc)\r |
354 | while ((hashMask & 0x80000000) == 0)\r |
355 | {\r |
356 | hashMask <<= 1;\r |
357 | sh++;\r |
358 | }\r |
359 | GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh)\r |
360 | }\r |
361 | #define GetHeads4b GetHeads4\r |
362 | */\r |
363 | \r |
364 | #define USE_GetHeads_LOCAL_CRC\r |
365 | \r |
366 | #ifdef USE_GetHeads_LOCAL_CRC\r |
367 | \r |
368 | GetHeads_DECL(4)\r |
369 | {\r |
370 | UInt32 crc0[256];\r |
371 | UInt32 crc1[256];\r |
372 | {\r |
373 | unsigned i;\r |
374 | for (i = 0; i < 256; i++)\r |
375 | {\r |
376 | UInt32 v = crc[i];\r |
377 | crc0[i] = v & hashMask;\r |
378 | crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;\r |
379 | // crc1[i] = rotlFixed(v, 8) & hashMask;\r |
380 | }\r |
381 | }\r |
382 | GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1))\r |
383 | }\r |
384 | \r |
385 | GetHeads_DECL(4b)\r |
386 | {\r |
387 | UInt32 crc0[256];\r |
388 | {\r |
389 | unsigned i;\r |
390 | for (i = 0; i < 256; i++)\r |
391 | crc0[i] = crc[i] & hashMask;\r |
392 | }\r |
393 | GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p))\r |
394 | }\r |
395 | \r |
396 | GetHeads_DECL(5)\r |
397 | {\r |
398 | UInt32 crc0[256];\r |
399 | UInt32 crc1[256];\r |
400 | UInt32 crc2[256];\r |
401 | {\r |
402 | unsigned i;\r |
403 | for (i = 0; i < 256; i++)\r |
404 | {\r |
405 | UInt32 v = crc[i];\r |
406 | crc0[i] = v & hashMask;\r |
407 | crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;\r |
408 | crc2[i] = (v << kLzHash_CrcShift_2) & hashMask;\r |
409 | }\r |
410 | }\r |
411 | GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1))\r |
412 | }\r |
413 | \r |
414 | GetHeads_DECL(5b)\r |
415 | {\r |
416 | UInt32 crc0[256];\r |
417 | UInt32 crc1[256];\r |
418 | {\r |
419 | unsigned i;\r |
420 | for (i = 0; i < 256; i++)\r |
421 | {\r |
422 | UInt32 v = crc[i];\r |
423 | crc0[i] = v & hashMask;\r |
424 | crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;\r |
425 | }\r |
426 | }\r |
427 | GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p))\r |
428 | }\r |
429 | \r |
430 | #else\r |
431 | \r |
432 | DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask)\r |
433 | DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask)\r |
434 | DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask)\r |
435 | DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask)\r |
436 | \r |
437 | #endif\r |
438 | \r |
439 | \r |
440 | static void HashThreadFunc(CMatchFinderMt *mt)\r |
441 | {\r |
442 | CMtSync *p = &mt->hashSync;\r |
443 | PRF(printf("\nHashThreadFunc\n"));\r |
444 | \r |
445 | for (;;)\r |
446 | {\r |
447 | UInt32 blockIndex = 0;\r |
448 | PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n"));\r |
449 | Event_Wait(&p->canStart);\r |
450 | PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n"));\r |
451 | if (p->exit)\r |
452 | {\r |
453 | PRF(printf("\nHashThreadFunc : exit \n"));\r |
454 | return;\r |
455 | }\r |
456 | \r |
457 | MatchFinder_Init_HighHash(MF(mt));\r |
458 | \r |
459 | for (;;)\r |
460 | {\r |
461 | PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos));\r |
462 | \r |
463 | {\r |
464 | CMatchFinder *mf = MF(mt);\r |
465 | if (MatchFinder_NeedMove(mf))\r |
466 | {\r |
467 | CriticalSection_Enter(&mt->btSync.cs);\r |
468 | CriticalSection_Enter(&mt->hashSync.cs);\r |
469 | {\r |
470 | const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf);\r |
471 | ptrdiff_t offset;\r |
472 | MatchFinder_MoveBlock(mf);\r |
473 | offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf);\r |
474 | mt->pointerToCurPos -= offset;\r |
475 | mt->buffer -= offset;\r |
476 | }\r |
477 | CriticalSection_Leave(&mt->hashSync.cs);\r |
478 | CriticalSection_Leave(&mt->btSync.cs);\r |
479 | continue;\r |
480 | }\r |
481 | \r |
482 | Semaphore_Wait(&p->freeSemaphore);\r |
483 | \r |
484 | if (p->exit) // exit is unexpected here. But we check it here for some failure case\r |
485 | return;\r |
486 | \r |
487 | // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)\r |
488 | if (p->stopWriting)\r |
489 | break;\r |
490 | \r |
491 | MatchFinder_ReadIfRequired(mf);\r |
492 | {\r |
493 | UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++);\r |
494 | UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf);\r |
495 | heads[0] = 2;\r |
496 | heads[1] = num;\r |
497 | \r |
498 | /* heads[1] contains the number of avail bytes:\r |
499 | if (avail < mf->numHashBytes) :\r |
500 | {\r |
501 | it means that stream was finished\r |
502 | HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes.\r |
503 | HASH_THREAD doesn't stop,\r |
504 | HASH_THREAD fills only the header (2 numbers) for all next blocks:\r |
505 | {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0}\r |
506 | }\r |
507 | else\r |
508 | {\r |
509 | HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes;\r |
510 | }\r |
511 | */\r |
512 | \r |
513 | if (num >= mf->numHashBytes)\r |
514 | {\r |
515 | num = num - mf->numHashBytes + 1;\r |
516 | if (num > kMtHashBlockSize - 2)\r |
517 | num = kMtHashBlockSize - 2;\r |
518 | \r |
519 | if (mf->pos > (UInt32)kMtMaxValForNormalize - num)\r |
520 | {\r |
521 | const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);\r |
522 | Inline_MatchFinder_ReduceOffsets(mf, subValue);\r |
523 | MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);\r |
524 | }\r |
525 | \r |
526 | heads[0] = 2 + num;\r |
527 | mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);\r |
528 | }\r |
529 | \r |
530 | mf->pos += num; // wrap over zero is allowed at the end of stream\r |
531 | mf->buffer += num;\r |
532 | }\r |
533 | }\r |
534 | \r |
535 | Semaphore_Release1(&p->filledSemaphore);\r |
536 | } // for() processing end\r |
537 | \r |
538 | // p->numBlocks_Sent = blockIndex;\r |
539 | Event_Set(&p->wasStopped);\r |
540 | } // for() thread end\r |
541 | }\r |
542 | \r |
543 | \r |
544 | \r |
545 | \r |
546 | // ---------- BT THREAD ----------\r |
547 | \r |
548 | /* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap.\r |
549 | here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */\r |
550 | #define CYC_TO_POS_OFFSET 0\r |
551 | // #define CYC_TO_POS_OFFSET 1 // for debug\r |
552 | \r |
553 | #define MFMT_GM_INLINE\r |
554 | \r |
555 | #ifdef MFMT_GM_INLINE\r |
556 | \r |
557 | /*\r |
558 | we use size_t for (pos) instead of UInt32\r |
559 | to eliminate "movsx" BUG in old MSVC x64 compiler.\r |
560 | */\r |
561 | \r |
562 | \r |
563 | UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,\r |
564 | UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,\r |
565 | size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,\r |
566 | UInt32 *posRes);\r |
567 | \r |
568 | #endif\r |
569 | \r |
570 | \r |
571 | static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)\r |
572 | {\r |
573 | UInt32 numProcessed = 0;\r |
574 | UInt32 curPos = 2;\r |
575 | \r |
576 | /* GetMatchesSpec() functions don't create (len = 1)\r |
577 | in [len, dist] match pairs, if (p->numHashBytes >= 2)\r |
578 | Also we suppose here that (matchMaxLen >= 2).\r |
579 | So the following code for (reserve) is not required\r |
580 | UInt32 reserve = (p->matchMaxLen * 2);\r |
581 | const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX\r |
582 | if (reserve < kNumHashBytes_Max - 1)\r |
583 | reserve = kNumHashBytes_Max - 1;\r |
584 | const UInt32 limit = kMtBtBlockSize - (reserve);\r |
585 | */\r |
586 | \r |
587 | const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);\r |
588 | \r |
589 | d[1] = p->hashNumAvail;\r |
590 | \r |
591 | if (p->failure_BT)\r |
592 | {\r |
593 | // printf("\n == 1 BtGetMatches() p->failure_BT\n");\r |
594 | d[0] = 0;\r |
595 | // d[1] = 0;\r |
596 | return;\r |
597 | }\r |
598 | \r |
599 | while (curPos < limit)\r |
600 | {\r |
601 | if (p->hashBufPos == p->hashBufPosLimit)\r |
602 | {\r |
603 | // MatchFinderMt_GetNextBlock_Hash(p);\r |
604 | UInt32 avail;\r |
605 | {\r |
606 | const UInt32 bi = MtSync_GetNextBlock(&p->hashSync);\r |
607 | const UInt32 k = GET_HASH_BLOCK_OFFSET(bi);\r |
608 | const UInt32 *h = p->hashBuf + k;\r |
609 | avail = h[1];\r |
610 | p->hashBufPosLimit = k + h[0];\r |
611 | p->hashNumAvail = avail;\r |
612 | p->hashBufPos = k + 2;\r |
613 | }\r |
614 | \r |
615 | {\r |
616 | /* we must prevent UInt32 overflow for avail total value,\r |
617 | if avail was increased with new hash block */\r |
618 | UInt32 availSum = numProcessed + avail;\r |
619 | if (availSum < numProcessed)\r |
620 | availSum = (UInt32)(Int32)-1;\r |
621 | d[1] = availSum;\r |
622 | }\r |
623 | \r |
624 | if (avail >= p->numHashBytes)\r |
625 | continue;\r |
626 | \r |
627 | // if (p->hashBufPos != p->hashBufPosLimit) exit(1);\r |
628 | \r |
629 | /* (avail < p->numHashBytes)\r |
630 | It means that stream was finished.\r |
631 | And (avail) - is a number of remaining bytes,\r |
632 | we fill (d) for (avail) bytes for LZ_THREAD (receiver).\r |
633 | but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */\r |
634 | \r |
635 | /* here we suppose that we have space enough:\r |
636 | (kMtBtBlockSize - curPos >= p->hashNumAvail) */\r |
637 | p->hashNumAvail = 0;\r |
638 | d[0] = curPos + avail;\r |
639 | d += curPos;\r |
640 | for (; avail != 0; avail--)\r |
641 | *d++ = 0;\r |
642 | return;\r |
643 | }\r |
644 | {\r |
645 | UInt32 size = p->hashBufPosLimit - p->hashBufPos;\r |
646 | UInt32 pos = p->pos;\r |
647 | UInt32 cyclicBufferPos = p->cyclicBufferPos;\r |
648 | UInt32 lenLimit = p->matchMaxLen;\r |
649 | if (lenLimit >= p->hashNumAvail)\r |
650 | lenLimit = p->hashNumAvail;\r |
651 | {\r |
652 | UInt32 size2 = p->hashNumAvail - lenLimit + 1;\r |
653 | if (size2 < size)\r |
654 | size = size2;\r |
655 | size2 = p->cyclicBufferSize - cyclicBufferPos;\r |
656 | if (size2 < size)\r |
657 | size = size2;\r |
658 | }\r |
659 | \r |
660 | if (pos > (UInt32)kMtMaxValForNormalize - size)\r |
661 | {\r |
662 | const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1);\r |
663 | pos -= subValue;\r |
664 | p->pos = pos;\r |
665 | MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);\r |
666 | }\r |
667 | \r |
668 | #ifndef MFMT_GM_INLINE\r |
669 | while (curPos < limit && size-- != 0)\r |
670 | {\r |
671 | UInt32 *startDistances = d + curPos;\r |
672 | UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],\r |
673 | pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,\r |
674 | startDistances + 1, p->numHashBytes - 1) - startDistances);\r |
675 | *startDistances = num - 1;\r |
676 | curPos += num;\r |
677 | cyclicBufferPos++;\r |
678 | pos++;\r |
679 | p->buffer++;\r |
680 | }\r |
681 | #else\r |
682 | {\r |
683 | UInt32 posRes = pos;\r |
684 | const UInt32 *d_end;\r |
685 | {\r |
686 | d_end = GetMatchesSpecN_2(\r |
687 | p->buffer + lenLimit - 1,\r |
688 | pos, p->buffer, p->son, p->cutValue, d + curPos,\r |
689 | p->numHashBytes - 1, p->hashBuf + p->hashBufPos,\r |
690 | d + limit, p->hashBuf + p->hashBufPos + size,\r |
691 | cyclicBufferPos, p->cyclicBufferSize,\r |
692 | &posRes);\r |
693 | }\r |
694 | {\r |
695 | if (!d_end)\r |
696 | {\r |
697 | // printf("\n == 2 BtGetMatches() p->failure_BT\n");\r |
698 | // internal data failure\r |
699 | p->failure_BT = True;\r |
700 | d[0] = 0;\r |
701 | // d[1] = 0;\r |
702 | return;\r |
703 | }\r |
704 | }\r |
705 | curPos = (UInt32)(d_end - d);\r |
706 | {\r |
707 | const UInt32 processed = posRes - pos;\r |
708 | pos = posRes;\r |
709 | p->hashBufPos += processed;\r |
710 | cyclicBufferPos += processed;\r |
711 | p->buffer += processed;\r |
712 | }\r |
713 | }\r |
714 | #endif\r |
715 | \r |
716 | {\r |
717 | const UInt32 processed = pos - p->pos;\r |
718 | numProcessed += processed;\r |
719 | p->hashNumAvail -= processed;\r |
720 | p->pos = pos;\r |
721 | }\r |
722 | if (cyclicBufferPos == p->cyclicBufferSize)\r |
723 | cyclicBufferPos = 0;\r |
724 | p->cyclicBufferPos = cyclicBufferPos;\r |
725 | }\r |
726 | }\r |
727 | \r |
728 | d[0] = curPos;\r |
729 | }\r |
730 | \r |
731 | \r |
732 | static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)\r |
733 | {\r |
734 | CMtSync *sync = &p->hashSync;\r |
735 | \r |
736 | BUFFER_MUST_BE_UNLOCKED(sync)\r |
737 | \r |
738 | if (!sync->needStart)\r |
739 | {\r |
740 | LOCK_BUFFER(sync)\r |
741 | }\r |
742 | \r |
743 | BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex));\r |
744 | \r |
745 | /* We suppose that we have called GetNextBlock() from start.\r |
746 | So buffer is LOCKED */\r |
747 | \r |
748 | UNLOCK_BUFFER(sync)\r |
749 | }\r |
750 | \r |
751 | \r |
752 | MY_NO_INLINE\r |
753 | static void BtThreadFunc(CMatchFinderMt *mt)\r |
754 | {\r |
755 | CMtSync *p = &mt->btSync;\r |
756 | for (;;)\r |
757 | {\r |
758 | UInt32 blockIndex = 0;\r |
759 | Event_Wait(&p->canStart);\r |
760 | \r |
761 | for (;;)\r |
762 | {\r |
763 | PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos));\r |
764 | /* (p->exit == true) is possible after (p->canStart) at first loop iteration\r |
765 | and is unexpected after more Wait(freeSemaphore) iterations */\r |
766 | if (p->exit)\r |
767 | return;\r |
768 | \r |
769 | Semaphore_Wait(&p->freeSemaphore);\r |
770 | \r |
771 | // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)\r |
772 | if (p->stopWriting)\r |
773 | break;\r |
774 | \r |
775 | BtFillBlock(mt, blockIndex++);\r |
776 | \r |
777 | Semaphore_Release1(&p->filledSemaphore);\r |
778 | }\r |
779 | \r |
780 | // we stop HASH_THREAD here\r |
781 | MtSync_StopWriting(&mt->hashSync);\r |
782 | \r |
783 | // p->numBlocks_Sent = blockIndex;\r |
784 | Event_Set(&p->wasStopped);\r |
785 | }\r |
786 | }\r |
787 | \r |
788 | \r |
789 | void MatchFinderMt_Construct(CMatchFinderMt *p)\r |
790 | {\r |
791 | p->hashBuf = NULL;\r |
792 | MtSync_Construct(&p->hashSync);\r |
793 | MtSync_Construct(&p->btSync);\r |
794 | }\r |
795 | \r |
796 | static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)\r |
797 | {\r |
798 | ISzAlloc_Free(alloc, p->hashBuf);\r |
799 | p->hashBuf = NULL;\r |
800 | }\r |
801 | \r |
802 | void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)\r |
803 | {\r |
804 | /*\r |
805 | HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs.\r |
806 | So we must be sure that HASH_THREAD will not use CriticalSection(s)\r |
807 | after deleting CriticalSection here.\r |
808 | \r |
809 | we call ReleaseStream(p)\r |
810 | that calls StopWriting(btSync)\r |
811 | that calls StopWriting(hashSync), if it's required to stop HASH_THREAD.\r |
812 | after StopWriting() it's safe to destruct MtSync(s) in any order */\r |
813 | \r |
814 | MatchFinderMt_ReleaseStream(p);\r |
815 | \r |
816 | MtSync_Destruct(&p->btSync);\r |
817 | MtSync_Destruct(&p->hashSync);\r |
818 | \r |
819 | LOG_ITER(\r |
820 | printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n",\r |
821 | (UInt32)(g_NumIters_Tree / 1000),\r |
822 | (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),\r |
823 | (UInt32)(g_NumIters_Loop / 1000),\r |
824 | (UInt32)(g_NumIters_Bytes / 1000)\r |
825 | ));\r |
826 | \r |
827 | MatchFinderMt_FreeMem(p, alloc);\r |
828 | }\r |
829 | \r |
830 | \r |
831 | #define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)\r |
832 | #define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)\r |
833 | \r |
834 | \r |
835 | static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; }\r |
836 | static THREAD_FUNC_DECL BtThreadFunc2(void *p)\r |
837 | {\r |
838 | Byte allocaDummy[0x180];\r |
839 | unsigned i = 0;\r |
840 | for (i = 0; i < 16; i++)\r |
841 | allocaDummy[i] = (Byte)0;\r |
842 | if (allocaDummy[0] == 0)\r |
843 | BtThreadFunc((CMatchFinderMt *)p);\r |
844 | return 0;\r |
845 | }\r |
846 | \r |
847 | \r |
848 | SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,\r |
849 | UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)\r |
850 | {\r |
851 | CMatchFinder *mf = MF(p);\r |
852 | p->historySize = historySize;\r |
853 | if (kMtBtBlockSize <= matchMaxLen * 4)\r |
854 | return SZ_ERROR_PARAM;\r |
855 | if (!p->hashBuf)\r |
856 | {\r |
857 | p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32));\r |
858 | if (!p->hashBuf)\r |
859 | return SZ_ERROR_MEM;\r |
860 | p->btBuf = p->hashBuf + kHashBufferSize;\r |
861 | }\r |
862 | keepAddBufferBefore += (kHashBufferSize + kBtBufferSize);\r |
863 | keepAddBufferAfter += kMtHashBlockSize;\r |
864 | if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))\r |
865 | return SZ_ERROR_MEM;\r |
866 | \r |
867 | RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p));\r |
868 | RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p));\r |
869 | return SZ_OK;\r |
870 | }\r |
871 | \r |
872 | \r |
873 | SRes MatchFinderMt_InitMt(CMatchFinderMt *p)\r |
874 | {\r |
875 | RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks));\r |
876 | return MtSync_Init(&p->btSync, kMtBtNumBlocks);\r |
877 | }\r |
878 | \r |
879 | \r |
880 | static void MatchFinderMt_Init(CMatchFinderMt *p)\r |
881 | {\r |
882 | CMatchFinder *mf = MF(p);\r |
883 | \r |
884 | p->btBufPos =\r |
885 | p->btBufPosLimit = NULL;\r |
886 | p->hashBufPos =\r |
887 | p->hashBufPosLimit = 0;\r |
888 | p->hashNumAvail = 0; // 21.03\r |
889 | \r |
890 | p->failure_BT = False;\r |
891 | \r |
892 | /* Init without data reading. We don't want to read data in this thread */\r |
893 | MatchFinder_Init_4(mf);\r |
894 | \r |
895 | MatchFinder_Init_LowHash(mf);\r |
896 | \r |
897 | p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);\r |
898 | p->btNumAvailBytes = 0;\r |
899 | p->failure_LZ_BT = False;\r |
900 | // p->failure_LZ_LZ = False;\r |
901 | \r |
902 | p->lzPos =\r |
903 | 1; // optimal smallest value\r |
904 | // 0; // for debug: ignores match to start\r |
905 | // kNormalizeAlign; // for debug\r |
906 | \r |
907 | p->hash = mf->hash;\r |
908 | p->fixedHashSize = mf->fixedHashSize;\r |
909 | // p->hash4Mask = mf->hash4Mask;\r |
910 | p->crc = mf->crc;\r |
911 | // memcpy(p->crc, mf->crc, sizeof(mf->crc));\r |
912 | \r |
913 | p->son = mf->son;\r |
914 | p->matchMaxLen = mf->matchMaxLen;\r |
915 | p->numHashBytes = mf->numHashBytes;\r |
916 | \r |
917 | /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */\r |
918 | // mf->streamPos = mf->pos = 1; // optimal smallest value\r |
919 | // 0; // for debug: ignores match to start\r |
920 | // kNormalizeAlign; // for debug\r |
921 | \r |
922 | /* we must init (p->pos = mf->pos) for BT, because\r |
923 | BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */\r |
924 | p->pos = mf->pos; // do not change it\r |
925 | \r |
926 | p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET);\r |
927 | p->cyclicBufferSize = mf->cyclicBufferSize;\r |
928 | p->buffer = mf->buffer;\r |
929 | p->cutValue = mf->cutValue;\r |
930 | // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses.\r |
931 | }\r |
932 | \r |
933 | \r |
934 | /* ReleaseStream is required to finish multithreading */\r |
935 | void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)\r |
936 | {\r |
937 | // Sleep(1); // for debug\r |
938 | MtSync_StopWriting(&p->btSync);\r |
939 | // Sleep(200); // for debug\r |
940 | /* p->MatchFinder->ReleaseStream(); */\r |
941 | }\r |
942 | \r |
943 | \r |
944 | MY_NO_INLINE\r |
945 | static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)\r |
946 | {\r |
947 | if (p->failure_LZ_BT)\r |
948 | p->btBufPos = p->failureBuf;\r |
949 | else\r |
950 | {\r |
951 | const UInt32 bi = MtSync_GetNextBlock(&p->btSync);\r |
952 | const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi);\r |
953 | {\r |
954 | const UInt32 numItems = bt[0];\r |
955 | p->btBufPosLimit = bt + numItems;\r |
956 | p->btNumAvailBytes = bt[1];\r |
957 | p->btBufPos = bt + 2;\r |
958 | if (numItems < 2 || numItems > kMtBtBlockSize)\r |
959 | {\r |
960 | p->failureBuf[0] = 0;\r |
961 | p->btBufPos = p->failureBuf;\r |
962 | p->btBufPosLimit = p->failureBuf + 1;\r |
963 | p->failure_LZ_BT = True;\r |
964 | // p->btNumAvailBytes = 0;\r |
965 | /* we don't want to decrease AvailBytes, that was load before.\r |
966 | that can be unxepected for the code that have loaded anopther value before */\r |
967 | }\r |
968 | }\r |
969 | \r |
970 | if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize)\r |
971 | {\r |
972 | /* we don't check (lzPos) over exact avail bytes in (btBuf).\r |
973 | (fixedHashSize) is small, so normalization is fast */\r |
974 | const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);\r |
975 | p->lzPos -= subValue;\r |
976 | MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize);\r |
977 | }\r |
978 | }\r |
979 | return p->btNumAvailBytes;\r |
980 | }\r |
981 | \r |
982 | \r |
983 | \r |
984 | static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)\r |
985 | {\r |
986 | return p->pointerToCurPos;\r |
987 | }\r |
988 | \r |
989 | \r |
990 | #define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);\r |
991 | \r |
992 | \r |
993 | static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)\r |
994 | {\r |
995 | if (p->btBufPos != p->btBufPosLimit)\r |
996 | return p->btNumAvailBytes;\r |
997 | return MatchFinderMt_GetNextBlock_Bt(p);\r |
998 | }\r |
999 | \r |
1000 | \r |
1001 | // #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; }\r |
1002 | #define CHECK_FAILURE_LZ(_match_, _pos_)\r |
1003 | \r |
1004 | static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)\r |
1005 | {\r |
1006 | UInt32 h2, c2;\r |
1007 | UInt32 *hash = p->hash;\r |
1008 | const Byte *cur = p->pointerToCurPos;\r |
1009 | const UInt32 m = p->lzPos;\r |
1010 | MT_HASH2_CALC\r |
1011 | \r |
1012 | c2 = hash[h2];\r |
1013 | hash[h2] = m;\r |
1014 | \r |
1015 | if (c2 >= matchMinPos)\r |
1016 | {\r |
1017 | CHECK_FAILURE_LZ(c2, m)\r |
1018 | if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])\r |
1019 | {\r |
1020 | *d++ = 2;\r |
1021 | *d++ = m - c2 - 1;\r |
1022 | }\r |
1023 | }\r |
1024 | \r |
1025 | return d;\r |
1026 | }\r |
1027 | \r |
1028 | static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)\r |
1029 | {\r |
1030 | UInt32 h2, h3, c2, c3;\r |
1031 | UInt32 *hash = p->hash;\r |
1032 | const Byte *cur = p->pointerToCurPos;\r |
1033 | const UInt32 m = p->lzPos;\r |
1034 | MT_HASH3_CALC\r |
1035 | \r |
1036 | c2 = hash[h2];\r |
1037 | c3 = (hash + kFix3HashSize)[h3];\r |
1038 | \r |
1039 | hash[h2] = m;\r |
1040 | (hash + kFix3HashSize)[h3] = m;\r |
1041 | \r |
1042 | if (c2 >= matchMinPos)\r |
1043 | {\r |
1044 | CHECK_FAILURE_LZ(c2, m)\r |
1045 | if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])\r |
1046 | {\r |
1047 | d[1] = m - c2 - 1;\r |
1048 | if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])\r |
1049 | {\r |
1050 | d[0] = 3;\r |
1051 | return d + 2;\r |
1052 | }\r |
1053 | d[0] = 2;\r |
1054 | d += 2;\r |
1055 | }\r |
1056 | }\r |
1057 | \r |
1058 | if (c3 >= matchMinPos)\r |
1059 | {\r |
1060 | CHECK_FAILURE_LZ(c3, m)\r |
1061 | if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])\r |
1062 | {\r |
1063 | *d++ = 3;\r |
1064 | *d++ = m - c3 - 1;\r |
1065 | }\r |
1066 | }\r |
1067 | \r |
1068 | return d;\r |
1069 | }\r |
1070 | \r |
1071 | \r |
1072 | #define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;\r |
1073 | \r |
1074 | /*\r |
1075 | static\r |
1076 | UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)\r |
1077 | {\r |
1078 | const UInt32 *bt = p->btBufPos;\r |
1079 | const UInt32 len = *bt++;\r |
1080 | const UInt32 *btLim = bt + len;\r |
1081 | UInt32 matchMinPos;\r |
1082 | UInt32 avail = p->btNumAvailBytes - 1;\r |
1083 | p->btBufPos = btLim;\r |
1084 | \r |
1085 | {\r |
1086 | p->btNumAvailBytes = avail;\r |
1087 | \r |
1088 | #define BT_HASH_BYTES_MAX 5\r |
1089 | \r |
1090 | matchMinPos = p->lzPos;\r |
1091 | \r |
1092 | if (len != 0)\r |
1093 | matchMinPos -= bt[1];\r |
1094 | else if (avail < (BT_HASH_BYTES_MAX - 1) - 1)\r |
1095 | {\r |
1096 | INCREASE_LZ_POS\r |
1097 | return d;\r |
1098 | }\r |
1099 | else\r |
1100 | {\r |
1101 | const UInt32 hs = p->historySize;\r |
1102 | if (matchMinPos > hs)\r |
1103 | matchMinPos -= hs;\r |
1104 | else\r |
1105 | matchMinPos = 1;\r |
1106 | }\r |
1107 | }\r |
1108 | \r |
1109 | for (;;)\r |
1110 | {\r |
1111 | \r |
1112 | UInt32 h2, h3, c2, c3;\r |
1113 | UInt32 *hash = p->hash;\r |
1114 | const Byte *cur = p->pointerToCurPos;\r |
1115 | UInt32 m = p->lzPos;\r |
1116 | MT_HASH3_CALC\r |
1117 | \r |
1118 | c2 = hash[h2];\r |
1119 | c3 = (hash + kFix3HashSize)[h3];\r |
1120 | \r |
1121 | hash[h2] = m;\r |
1122 | (hash + kFix3HashSize)[h3] = m;\r |
1123 | \r |
1124 | if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])\r |
1125 | {\r |
1126 | d[1] = m - c2 - 1;\r |
1127 | if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])\r |
1128 | {\r |
1129 | d[0] = 3;\r |
1130 | d += 2;\r |
1131 | break;\r |
1132 | }\r |
1133 | // else\r |
1134 | {\r |
1135 | d[0] = 2;\r |
1136 | d += 2;\r |
1137 | }\r |
1138 | }\r |
1139 | if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])\r |
1140 | {\r |
1141 | *d++ = 3;\r |
1142 | *d++ = m - c3 - 1;\r |
1143 | }\r |
1144 | break;\r |
1145 | }\r |
1146 | \r |
1147 | if (len != 0)\r |
1148 | {\r |
1149 | do\r |
1150 | {\r |
1151 | const UInt32 v0 = bt[0];\r |
1152 | const UInt32 v1 = bt[1];\r |
1153 | bt += 2;\r |
1154 | d[0] = v0;\r |
1155 | d[1] = v1;\r |
1156 | d += 2;\r |
1157 | }\r |
1158 | while (bt != btLim);\r |
1159 | }\r |
1160 | INCREASE_LZ_POS\r |
1161 | return d;\r |
1162 | }\r |
1163 | */\r |
1164 | \r |
1165 | \r |
1166 | static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)\r |
1167 | {\r |
1168 | UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;\r |
1169 | UInt32 *hash = p->hash;\r |
1170 | const Byte *cur = p->pointerToCurPos;\r |
1171 | const UInt32 m = p->lzPos;\r |
1172 | MT_HASH3_CALC\r |
1173 | // MT_HASH4_CALC\r |
1174 | c2 = hash[h2];\r |
1175 | c3 = (hash + kFix3HashSize)[h3];\r |
1176 | // c4 = (hash + kFix4HashSize)[h4];\r |
1177 | \r |
1178 | hash[h2] = m;\r |
1179 | (hash + kFix3HashSize)[h3] = m;\r |
1180 | // (hash + kFix4HashSize)[h4] = m;\r |
1181 | \r |
1182 | #define _USE_H2\r |
1183 | \r |
1184 | #ifdef _USE_H2\r |
1185 | if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])\r |
1186 | {\r |
1187 | d[1] = m - c2 - 1;\r |
1188 | if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])\r |
1189 | {\r |
1190 | // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3;\r |
1191 | // return d + 2;\r |
1192 | \r |
1193 | if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3])\r |
1194 | {\r |
1195 | d[0] = 4;\r |
1196 | return d + 2;\r |
1197 | }\r |
1198 | d[0] = 3;\r |
1199 | d += 2;\r |
1200 | \r |
1201 | #ifdef _USE_H4\r |
1202 | if (c4 >= matchMinPos)\r |
1203 | if (\r |
1204 | cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&\r |
1205 | cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]\r |
1206 | )\r |
1207 | {\r |
1208 | *d++ = 4;\r |
1209 | *d++ = m - c4 - 1;\r |
1210 | }\r |
1211 | #endif\r |
1212 | return d;\r |
1213 | }\r |
1214 | d[0] = 2;\r |
1215 | d += 2;\r |
1216 | }\r |
1217 | #endif\r |
1218 | \r |
1219 | if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])\r |
1220 | {\r |
1221 | d[1] = m - c3 - 1;\r |
1222 | if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3])\r |
1223 | {\r |
1224 | d[0] = 4;\r |
1225 | return d + 2;\r |
1226 | }\r |
1227 | d[0] = 3;\r |
1228 | d += 2;\r |
1229 | }\r |
1230 | \r |
1231 | #ifdef _USE_H4\r |
1232 | if (c4 >= matchMinPos)\r |
1233 | if (\r |
1234 | cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&\r |
1235 | cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]\r |
1236 | )\r |
1237 | {\r |
1238 | *d++ = 4;\r |
1239 | *d++ = m - c4 - 1;\r |
1240 | }\r |
1241 | #endif\r |
1242 | \r |
1243 | return d;\r |
1244 | }\r |
1245 | \r |
1246 | \r |
1247 | static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)\r |
1248 | {\r |
1249 | const UInt32 *bt = p->btBufPos;\r |
1250 | const UInt32 len = *bt++;\r |
1251 | const UInt32 *btLim = bt + len;\r |
1252 | p->btBufPos = btLim;\r |
1253 | p->btNumAvailBytes--;\r |
1254 | INCREASE_LZ_POS\r |
1255 | {\r |
1256 | while (bt != btLim)\r |
1257 | {\r |
1258 | const UInt32 v0 = bt[0];\r |
1259 | const UInt32 v1 = bt[1];\r |
1260 | bt += 2;\r |
1261 | d[0] = v0;\r |
1262 | d[1] = v1;\r |
1263 | d += 2;\r |
1264 | }\r |
1265 | }\r |
1266 | return d;\r |
1267 | }\r |
1268 | \r |
1269 | \r |
1270 | \r |
1271 | static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)\r |
1272 | {\r |
1273 | const UInt32 *bt = p->btBufPos;\r |
1274 | UInt32 len = *bt++;\r |
1275 | const UInt32 avail = p->btNumAvailBytes - 1;\r |
1276 | p->btNumAvailBytes = avail;\r |
1277 | p->btBufPos = bt + len;\r |
1278 | if (len == 0)\r |
1279 | {\r |
1280 | #define BT_HASH_BYTES_MAX 5\r |
1281 | if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)\r |
1282 | {\r |
1283 | UInt32 m = p->lzPos;\r |
1284 | if (m > p->historySize)\r |
1285 | m -= p->historySize;\r |
1286 | else\r |
1287 | m = 1;\r |
1288 | d = p->MixMatchesFunc(p, m, d);\r |
1289 | }\r |
1290 | }\r |
1291 | else\r |
1292 | {\r |
1293 | /*\r |
1294 | first match pair from BinTree: (match_len, match_dist),\r |
1295 | (match_len >= numHashBytes).\r |
1296 | MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)\r |
1297 | */\r |
1298 | d = p->MixMatchesFunc(p, p->lzPos - bt[1], d);\r |
1299 | // if (d) // check for failure\r |
1300 | do\r |
1301 | {\r |
1302 | const UInt32 v0 = bt[0];\r |
1303 | const UInt32 v1 = bt[1];\r |
1304 | bt += 2;\r |
1305 | d[0] = v0;\r |
1306 | d[1] = v1;\r |
1307 | d += 2;\r |
1308 | }\r |
1309 | while (len -= 2);\r |
1310 | }\r |
1311 | INCREASE_LZ_POS\r |
1312 | return d;\r |
1313 | }\r |
1314 | \r |
1315 | #define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED\r |
1316 | #define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;\r |
1317 | #define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);\r |
1318 | \r |
1319 | static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)\r |
1320 | {\r |
1321 | SKIP_HEADER2_MT { p->btNumAvailBytes--;\r |
1322 | SKIP_FOOTER_MT\r |
1323 | }\r |
1324 | \r |
1325 | static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)\r |
1326 | {\r |
1327 | SKIP_HEADER_MT(2)\r |
1328 | UInt32 h2;\r |
1329 | MT_HASH2_CALC\r |
1330 | hash[h2] = p->lzPos;\r |
1331 | SKIP_FOOTER_MT\r |
1332 | }\r |
1333 | \r |
1334 | static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)\r |
1335 | {\r |
1336 | SKIP_HEADER_MT(3)\r |
1337 | UInt32 h2, h3;\r |
1338 | MT_HASH3_CALC\r |
1339 | (hash + kFix3HashSize)[h3] =\r |
1340 | hash[ h2] =\r |
1341 | p->lzPos;\r |
1342 | SKIP_FOOTER_MT\r |
1343 | }\r |
1344 | \r |
1345 | /*\r |
1346 | // MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip().\r |
1347 | // The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream.\r |
1348 | \r |
1349 | static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)\r |
1350 | {\r |
1351 | SKIP_HEADER_MT(4)\r |
1352 | UInt32 h2, h3; // h4\r |
1353 | MT_HASH3_CALC\r |
1354 | // MT_HASH4_CALC\r |
1355 | // (hash + kFix4HashSize)[h4] =\r |
1356 | (hash + kFix3HashSize)[h3] =\r |
1357 | hash[ h2] =\r |
1358 | p->lzPos;\r |
1359 | SKIP_FOOTER_MT\r |
1360 | }\r |
1361 | */\r |
1362 | \r |
1363 | void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)\r |
1364 | {\r |
1365 | vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;\r |
1366 | vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;\r |
1367 | vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;\r |
1368 | vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;\r |
1369 | \r |
1370 | switch (MF(p)->numHashBytes)\r |
1371 | {\r |
1372 | case 2:\r |
1373 | p->GetHeadsFunc = GetHeads2;\r |
1374 | p->MixMatchesFunc = (Mf_Mix_Matches)NULL;\r |
1375 | vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;\r |
1376 | vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;\r |
1377 | break;\r |
1378 | case 3:\r |
1379 | p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;\r |
1380 | p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;\r |
1381 | vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;\r |
1382 | break;\r |
1383 | case 4:\r |
1384 | p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;\r |
1385 | \r |
1386 | // it's fast inline version of GetMatches()\r |
1387 | // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;\r |
1388 | \r |
1389 | p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;\r |
1390 | vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;\r |
1391 | break;\r |
1392 | default:\r |
1393 | p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;\r |
1394 | p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;\r |
1395 | vTable->Skip =\r |
1396 | (Mf_Skip_Func)MatchFinderMt3_Skip;\r |
1397 | // (Mf_Skip_Func)MatchFinderMt4_Skip;\r |
1398 | break;\r |
1399 | }\r |
1400 | }\r |