2 * Copyright (c) Meta Platforms, Inc. and affiliates.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
15 #define MIN(a, b) ((a) < (b) ? (a) : (b))
17 static const size_t kMatchBytes = 128;
19 #define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
20 static BYTE SEQ_randByte(unsigned* src)
22 static const U32 prime1 = 2654435761U;
23 static const U32 prime2 = 2246822519U;
27 rand32 = SEQ_rotl32(rand32, 13);
29 return (BYTE)(rand32 >> 5);
32 SEQ_stream SEQ_initStream(unsigned seed)
36 XXH64_reset(&stream.xxh, 0);
41 /* Generates a single guard byte, then match length + 1 of a different byte,
42 * then another guard byte.
44 static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value,
52 BYTE* const ostart = (BYTE*)out->dst;
53 BYTE* const oend = ostart + out->size;
54 BYTE* op = ostart + out->pos;
56 switch ((ml_state)stream->state) {
58 /* Generate a single byte and pick a different byte for the match */
60 stream->bytesLeft = 1;
63 *op = SEQ_randByte(&stream->seed) & 0xFF;
65 stream->saved = SEQ_randByte(&stream->seed) & 0xFF;
66 } while (*op == stream->saved);
68 /* State transition */
69 stream->state = ml_match_bytes;
70 stream->bytesLeft = value + 1;
72 case ml_match_bytes: {
73 /* Copy matchLength + 1 bytes to the output buffer */
74 size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
76 memset(op, stream->saved, setLength);
78 stream->bytesLeft -= setLength;
80 if (stream->bytesLeft > 0)
82 /* State transition */
83 stream->state = ml_last_byte;
87 /* Generate a single byte and pick a different byte for the match */
89 stream->bytesLeft = 1;
93 *op = SEQ_randByte(&stream->seed) & 0xFF;
94 } while (*op == stream->saved);
96 /* State transition */
100 stream->bytesLeft = 0;
103 XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
104 out->pos = op - ostart;
105 return stream->bytesLeft;
108 /* Saves the current seed then generates kMatchBytes random bytes >= 128.
109 * Generates literal length - kMatchBytes random bytes < 128.
110 * Generates another kMatchBytes using the saved seed to generate a match.
111 * This way the match is easy to find for the compressors.
113 static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
121 BYTE* const ostart = (BYTE*)out->dst;
122 BYTE* const oend = ostart + out->size;
123 BYTE* op = ostart + out->pos;
125 switch ((ll_state)stream->state) {
127 stream->state = ll_run_bytes;
128 stream->saved = stream->seed;
129 stream->bytesLeft = MIN(kMatchBytes, value);
132 while (stream->bytesLeft > 0 && op < oend) {
133 *op++ = SEQ_randByte(&stream->seed) | 0x80;
136 if (stream->bytesLeft > 0)
138 /* State transition */
139 stream->state = ll_literals;
140 stream->bytesLeft = value - MIN(kMatchBytes, value);
143 while (stream->bytesLeft > 0 && op < oend) {
144 *op++ = SEQ_randByte(&stream->seed) & 0x7F;
147 if (stream->bytesLeft > 0)
149 /* State transition */
150 stream->state = ll_run_match;
151 stream->bytesLeft = MIN(kMatchBytes, value);
154 while (stream->bytesLeft > 0 && op < oend) {
155 *op++ = SEQ_randByte(&stream->saved) | 0x80;
158 if (stream->bytesLeft > 0)
164 stream->bytesLeft = 0;
167 XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
168 out->pos = op - ostart;
169 return stream->bytesLeft;
172 /* Saves the current seed then generates kMatchBytes random bytes >= 128.
173 * Generates offset - kMatchBytes of zeros to get a large offset without
174 * polluting the hash tables.
175 * Generates another kMatchBytes using the saved seed to generate a with the
178 static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
186 BYTE* const ostart = (BYTE*)out->dst;
187 BYTE* const oend = ostart + out->size;
188 BYTE* op = ostart + out->pos;
190 switch ((of_state)stream->state) {
192 stream->state = of_run_bytes;
193 stream->saved = stream->seed;
194 stream->bytesLeft = MIN(value, kMatchBytes);
197 while (stream->bytesLeft > 0 && op < oend) {
198 *op++ = SEQ_randByte(&stream->seed) | 0x80;
201 if (stream->bytesLeft > 0)
203 /* State transition */
204 stream->state = of_offset;
205 stream->bytesLeft = value - MIN(value, kMatchBytes);
209 /* Copy matchLength + 1 bytes to the output buffer */
210 size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
212 memset(op, 0, setLength);
214 stream->bytesLeft -= setLength;
216 if (stream->bytesLeft > 0)
218 /* State transition */
219 stream->state = of_run_match;
220 stream->bytesLeft = MIN(value, kMatchBytes);
224 while (stream->bytesLeft > 0 && op < oend) {
225 *op++ = SEQ_randByte(&stream->saved) | 0x80;
228 if (stream->bytesLeft > 0)
234 stream->bytesLeft = 0;
237 XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
238 out->pos = op - ostart;
239 return stream->bytesLeft;
242 /* Returns the number of bytes left to generate.
243 * Must pass the same type/value until it returns 0.
245 size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out)
248 case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out);
249 case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out);
250 case SEQ_gen_of: return SEQ_gen_offset(stream, value, out);
251 case SEQ_gen_max: /* fall-through */
256 /* Returns the xxhash of the data produced so far */
257 XXH64_hash_t SEQ_digest(SEQ_stream const* stream)
259 return XXH64_digest(&stream->xxh);