git subrepo pull (merge) --force deps/libchdr
[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.6 / lib / dictBuilder / cover.h
CommitLineData
648db22b 1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11#ifndef ZDICT_STATIC_LINKING_ONLY
12# define ZDICT_STATIC_LINKING_ONLY
13#endif
14
f535537f 15#include "../common/threading.h" /* ZSTD_pthread_mutex_t */
16#include "../common/mem.h" /* U32, BYTE */
648db22b 17#include "../zdict.h"
18
19/**
20 * COVER_best_t is used for two purposes:
21 * 1. Synchronizing threads.
22 * 2. Saving the best parameters and dictionary.
23 *
24 * All of the methods except COVER_best_init() are thread safe if zstd is
25 * compiled with multithreaded support.
26 */
27typedef struct COVER_best_s {
28 ZSTD_pthread_mutex_t mutex;
29 ZSTD_pthread_cond_t cond;
30 size_t liveJobs;
31 void *dict;
32 size_t dictSize;
33 ZDICT_cover_params_t parameters;
34 size_t compressedSize;
35} COVER_best_t;
36
37/**
38 * A segment is a range in the source as well as the score of the segment.
39 */
40typedef struct {
41 U32 begin;
42 U32 end;
43 U32 score;
44} COVER_segment_t;
45
46/**
47 *Number of epochs and size of each epoch.
48 */
49typedef struct {
50 U32 num;
51 U32 size;
52} COVER_epoch_info_t;
53
54/**
55 * Struct used for the dictionary selection function.
56 */
57typedef struct COVER_dictSelection {
58 BYTE* dictContent;
59 size_t dictSize;
60 size_t totalCompressedSize;
61} COVER_dictSelection_t;
62
63/**
64 * Computes the number of epochs and the size of each epoch.
65 * We will make sure that each epoch gets at least 10 * k bytes.
66 *
67 * The COVER algorithms divide the data up into epochs of equal size and
68 * select one segment from each epoch.
69 *
70 * @param maxDictSize The maximum allowed dictionary size.
71 * @param nbDmers The number of dmers we are training on.
72 * @param k The parameter k (segment size).
73 * @param passes The target number of passes over the dmer corpus.
74 * More passes means a better dictionary.
75 */
76COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers,
77 U32 k, U32 passes);
78
79/**
80 * Warns the user when their corpus is too small.
81 */
82void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel);
83
84/**
85 * Checks total compressed size of a dictionary
86 */
87size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
88 const size_t *samplesSizes, const BYTE *samples,
89 size_t *offsets,
90 size_t nbTrainSamples, size_t nbSamples,
91 BYTE *const dict, size_t dictBufferCapacity);
92
93/**
94 * Returns the sum of the sample sizes.
95 */
96size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ;
97
98/**
99 * Initialize the `COVER_best_t`.
100 */
101void COVER_best_init(COVER_best_t *best);
102
103/**
104 * Wait until liveJobs == 0.
105 */
106void COVER_best_wait(COVER_best_t *best);
107
108/**
109 * Call COVER_best_wait() and then destroy the COVER_best_t.
110 */
111void COVER_best_destroy(COVER_best_t *best);
112
113/**
114 * Called when a thread is about to be launched.
115 * Increments liveJobs.
116 */
117void COVER_best_start(COVER_best_t *best);
118
119/**
120 * Called when a thread finishes executing, both on error or success.
121 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
122 * If this dictionary is the best so far save it and its parameters.
123 */
124void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
125 COVER_dictSelection_t selection);
126/**
127 * Error function for COVER_selectDict function. Checks if the return
128 * value is an error.
129 */
130unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
131
132 /**
133 * Error function for COVER_selectDict function. Returns a struct where
134 * return.totalCompressedSize is a ZSTD error.
135 */
136COVER_dictSelection_t COVER_dictSelectionError(size_t error);
137
138/**
139 * Always call after selectDict is called to free up used memory from
140 * newly created dictionary.
141 */
142void COVER_dictSelectionFree(COVER_dictSelection_t selection);
143
144/**
145 * Called to finalize the dictionary and select one based on whether or not
146 * the shrink-dict flag was enabled. If enabled the dictionary used is the
147 * smallest dictionary within a specified regression of the compressed size
148 * from the largest dictionary.
149 */
150 COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
151 size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
152 size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);