648db22b |
1 | #!/usr/bin/env python3 |
2 | """Test zstd interoperability between versions""" |
3 | |
4 | # ################################################################ |
5 | # Copyright (c) Meta Platforms, Inc. and affiliates. |
6 | # All rights reserved. |
7 | # |
8 | # This source code is licensed under both the BSD-style license (found in the |
9 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found |
10 | # in the COPYING file in the root directory of this source tree). |
11 | # You may select, at your option, one of the above-listed licenses. |
12 | # ################################################################ |
13 | |
14 | import filecmp |
15 | import glob |
16 | import hashlib |
17 | import os |
18 | import shutil |
19 | import sys |
20 | import subprocess |
21 | from subprocess import Popen, PIPE |
22 | |
23 | repo_url = 'https://github.com/facebook/zstd.git' |
24 | tmp_dir_name = 'tests/versionsTest' |
25 | make_cmd = 'make' |
26 | make_args = ['-j','CFLAGS=-O0'] |
27 | git_cmd = 'git' |
28 | test_dat_src = 'README.md' |
29 | test_dat = 'test_dat' |
30 | head = 'vdevel' |
31 | dict_source = 'dict_source' |
32 | dict_globs = [ |
33 | 'programs/*.c', |
34 | 'lib/common/*.c', |
35 | 'lib/compress/*.c', |
36 | 'lib/decompress/*.c', |
37 | 'lib/dictBuilder/*.c', |
38 | 'lib/legacy/*.c', |
39 | 'programs/*.h', |
40 | 'lib/common/*.h', |
41 | 'lib/compress/*.h', |
42 | 'lib/dictBuilder/*.h', |
43 | 'lib/legacy/*.h' |
44 | ] |
45 | |
46 | |
47 | def execute(command, print_output=False, print_error=True, param_shell=False): |
48 | popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=param_shell) |
49 | stdout_lines, stderr_lines = popen.communicate() |
50 | stderr_lines = stderr_lines.decode("utf-8") |
51 | stdout_lines = stdout_lines.decode("utf-8") |
52 | if print_output: |
53 | print(stdout_lines) |
54 | print(stderr_lines) |
55 | if popen.returncode is not None and popen.returncode != 0: |
56 | if not print_output and print_error: |
57 | print(stderr_lines) |
58 | return popen.returncode |
59 | |
60 | |
61 | def proc(cmd_args, pipe=True, dummy=False): |
62 | if dummy: |
63 | return |
64 | if pipe: |
65 | subproc = Popen(cmd_args, stdout=PIPE, stderr=PIPE) |
66 | else: |
67 | subproc = Popen(cmd_args) |
68 | return subproc.communicate() |
69 | |
70 | |
71 | def make(targets, pipe=True): |
72 | cmd = [make_cmd] + make_args + targets |
73 | cmd_str = str(cmd) |
74 | print('compilation command : ' + cmd_str) |
75 | return proc(cmd, pipe) |
76 | |
77 | |
78 | def git(args, pipe=True): |
79 | return proc([git_cmd] + args, pipe) |
80 | |
81 | |
82 | def get_git_tags(): |
83 | stdout, stderr = git(['tag', '-l', 'v[0-9].[0-9].[0-9]']) |
84 | tags = stdout.decode('utf-8').split() |
85 | return tags |
86 | |
87 | |
88 | def dict_ok(tag, dict_name, sample): |
89 | if not os.path.isfile(dict_name): |
90 | return False |
91 | try: |
92 | cmd = ['./zstd.' + tag, '-D', dict_name] |
93 | with open(sample, "rb") as i: |
94 | subprocess.check_call(cmd, stdin=i, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) |
95 | return True |
96 | except: |
97 | return False |
98 | |
99 | |
100 | def create_dict(tag, dict_source_path, fallback_tag=None): |
101 | dict_name = 'dict.' + tag |
102 | if not os.path.isfile(dict_name): |
103 | cFiles = glob.glob(dict_source_path + "/*.c") |
104 | hFiles = glob.glob(dict_source_path + "/*.h") |
105 | # Ensure the dictionary builder is deterministic |
106 | files = sorted(cFiles + hFiles) |
107 | if tag == 'v0.5.0': |
108 | result = execute('./dictBuilder.' + tag + ' ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True) |
109 | else: |
110 | result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True) |
111 | if result == 0 and dict_ok(tag, dict_name, files[0]): |
112 | print(dict_name + ' created') |
113 | elif fallback_tag is not None: |
114 | fallback_dict_name = 'dict.' + fallback_tag |
115 | print('creating dictionary ' + dict_name + ' failed, falling back to ' + fallback_dict_name) |
116 | shutil.copy(fallback_dict_name, dict_name) |
117 | else: |
118 | raise RuntimeError('ERROR: creating of ' + dict_name + ' failed') |
119 | else: |
120 | print(dict_name + ' already exists') |
121 | |
122 | |
123 | def zstd(tag, args, input_file, output_file): |
124 | """ |
125 | Zstd compress input_file to output_file. |
126 | Need this helper because 0.5.0 is broken when stdout is not a TTY. |
127 | Throws an exception if the command returns non-zero. |
128 | """ |
129 | with open(input_file, "rb") as i: |
130 | with open(output_file, "wb") as o: |
131 | cmd = ['./zstd.' + tag] + args |
132 | print("Running: '{}', input={}, output={}" .format( |
133 | ' '.join(cmd), input_file, output_file |
134 | )) |
135 | result = subprocess.run(cmd, stdin=i, stdout=o, stderr=subprocess.PIPE) |
136 | print("Stderr: {}".format(result.stderr.decode("ascii"))) |
137 | result.check_returncode() |
138 | |
139 | |
140 | def dict_compress_sample(tag, sample): |
141 | dict_name = 'dict.' + tag |
142 | verbose = ['-v', '-v', '-v'] |
143 | zstd(tag, ['-D', dict_name, '-1'] + verbose, sample, sample + '_01_64_' + tag + '_dictio.zst') |
144 | zstd(tag, ['-D', dict_name, '-3'], sample, sample + '_03_64_' + tag + '_dictio.zst') |
145 | zstd(tag, ['-D', dict_name, '-5'], sample, sample + '_05_64_' + tag + '_dictio.zst') |
146 | zstd(tag, ['-D', dict_name, '-9'], sample, sample + '_09_64_' + tag + '_dictio.zst') |
147 | zstd(tag, ['-D', dict_name, '-15'], sample, sample + '_15_64_' + tag + '_dictio.zst') |
148 | zstd(tag, ['-D', dict_name, '-18'], sample, sample + '_18_64_' + tag + '_dictio.zst') |
149 | # zstdFiles = glob.glob("*.zst*") |
150 | # print(zstdFiles) |
151 | print(tag + " : dict compression completed") |
152 | |
153 | |
154 | def compress_sample(tag, sample): |
155 | zstd(tag, ['-1'], sample, sample + '_01_64_' + tag + '_nodict.zst') |
156 | zstd(tag, ['-3'], sample, sample + '_03_64_' + tag + '_nodict.zst') |
157 | zstd(tag, ['-5'], sample, sample + '_05_64_' + tag + '_nodict.zst') |
158 | zstd(tag, ['-9'], sample, sample + '_09_64_' + tag + '_nodict.zst') |
159 | zstd(tag, ['-15'], sample, sample + '_15_64_' + tag + '_nodict.zst') |
160 | zstd(tag, ['-18'], sample, sample + '_18_64_' + tag + '_nodict.zst') |
161 | # zstdFiles = glob.glob("*.zst*") |
162 | # print(zstdFiles) |
163 | print(tag + " : compression completed") |
164 | |
165 | |
166 | # https://stackoverflow.com/a/19711609/2132223 |
167 | def sha1_of_file(filepath): |
168 | with open(filepath, 'rb') as f: |
169 | return hashlib.sha1(f.read()).hexdigest() |
170 | |
171 | |
172 | def remove_duplicates(): |
173 | list_of_zst = sorted(glob.glob('*.zst')) |
174 | for i, ref_zst in enumerate(list_of_zst): |
175 | if not os.path.isfile(ref_zst): |
176 | continue |
177 | for j in range(i + 1, len(list_of_zst)): |
178 | compared_zst = list_of_zst[j] |
179 | if not os.path.isfile(compared_zst): |
180 | continue |
181 | if filecmp.cmp(ref_zst, compared_zst): |
182 | os.remove(compared_zst) |
183 | print('duplicated : {} == {}'.format(ref_zst, compared_zst)) |
184 | |
185 | |
186 | def decompress_zst(tag): |
187 | dec_error = 0 |
188 | list_zst = sorted(glob.glob('*_nodict.zst')) |
189 | for file_zst in list_zst: |
190 | print(file_zst + ' ' + tag) |
191 | file_dec = file_zst + '_d64_' + tag + '.dec' |
192 | zstd(tag, ['-d'], file_zst, file_dec) |
193 | if not filecmp.cmp(file_dec, test_dat): |
194 | raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst)) |
195 | else: |
196 | print('OK ') |
197 | |
198 | |
199 | def decompress_dict(tag): |
200 | dec_error = 0 |
201 | list_zst = sorted(glob.glob('*_dictio.zst')) |
202 | for file_zst in list_zst: |
203 | dict_tag = file_zst[0:len(file_zst)-11] # remove "_dictio.zst" |
204 | if head in dict_tag: # find vdevel |
205 | dict_tag = head |
206 | else: |
207 | dict_tag = dict_tag[dict_tag.rfind('v'):] |
208 | if tag == 'v0.6.0' and dict_tag < 'v0.6.0': |
209 | continue |
210 | dict_name = 'dict.' + dict_tag |
211 | print(file_zst + ' ' + tag + ' dict=' + dict_tag) |
212 | file_dec = file_zst + '_d64_' + tag + '.dec' |
213 | zstd(tag, ['-D', dict_name, '-d'], file_zst, file_dec) |
214 | if not filecmp.cmp(file_dec, test_dat): |
215 | raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst)) |
216 | else: |
217 | print('OK ') |
218 | |
219 | |
220 | if __name__ == '__main__': |
221 | error_code = 0 |
222 | base_dir = os.getcwd() + '/..' # /path/to/zstd |
223 | tmp_dir = base_dir + '/' + tmp_dir_name # /path/to/zstd/tests/versionsTest |
224 | clone_dir = tmp_dir + '/' + 'zstd' # /path/to/zstd/tests/versionsTest/zstd |
225 | dict_source_path = tmp_dir + '/' + dict_source # /path/to/zstd/tests/versionsTest/dict_source |
226 | programs_dir = base_dir + '/programs' # /path/to/zstd/programs |
227 | os.makedirs(tmp_dir, exist_ok=True) |
228 | |
229 | # since Travis clones limited depth, we should clone full repository |
230 | if not os.path.isdir(clone_dir): |
231 | git(['clone', repo_url, clone_dir]) |
232 | |
233 | shutil.copy2(base_dir + '/' + test_dat_src, tmp_dir + '/' + test_dat) |
234 | |
235 | # Retrieve all release tags |
236 | print('Retrieve all release tags :') |
237 | os.chdir(clone_dir) |
238 | alltags = get_git_tags() + [head] |
239 | tags = [t for t in alltags if t >= 'v0.5.0'] |
240 | print(tags) |
241 | |
242 | # Build all release zstd |
243 | for tag in tags: |
244 | os.chdir(base_dir) |
245 | dst_zstd = '{}/zstd.{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/zstd.<TAG> |
246 | if not os.path.isfile(dst_zstd) or tag == head: |
247 | if tag != head: |
248 | print('-----------------------------------------------') |
249 | print('compiling ' + tag) |
250 | print('-----------------------------------------------') |
251 | r_dir = '{}/{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/<TAG> |
252 | os.makedirs(r_dir, exist_ok=True) |
253 | os.chdir(clone_dir) |
254 | git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False) |
255 | if tag == 'v0.5.0': |
256 | os.chdir(r_dir + '/dictBuilder') # /path/to/zstd/tests/versionsTest/v0.5.0/dictBuilder |
257 | make(['clean'], False) # separate 'clean' target to allow parallel build |
258 | make(['dictBuilder'], False) |
259 | shutil.copy2('dictBuilder', '{}/dictBuilder.{}'.format(tmp_dir, tag)) |
260 | os.chdir(r_dir + '/programs') # /path/to/zstd/tests/versionsTest/<TAG>/programs |
261 | make(['clean'], False) # separate 'clean' target to allow parallel build |
262 | make(['zstd'], False) |
263 | else: |
264 | os.chdir(programs_dir) |
265 | print('-----------------------------------------------') |
266 | print('compiling head') |
267 | print('-----------------------------------------------') |
268 | make(['zstd'], False) |
269 | shutil.copy2('zstd', dst_zstd) |
270 | |
271 | # remove any remaining *.zst and *.dec from previous test |
272 | os.chdir(tmp_dir) |
273 | for compressed in glob.glob("*.zst"): |
274 | os.remove(compressed) |
275 | for dec in glob.glob("*.dec"): |
276 | os.remove(dec) |
277 | |
278 | # copy *.c and *.h to a temporary directory ("dict_source") |
279 | if not os.path.isdir(dict_source_path): |
280 | os.mkdir(dict_source_path) |
281 | for dict_glob in dict_globs: |
282 | files = glob.glob(dict_glob, root_dir=base_dir) |
283 | for file in files: |
284 | file = os.path.join(base_dir, file) |
285 | print("copying " + file + " to " + dict_source_path) |
286 | shutil.copy(file, dict_source_path) |
287 | |
288 | print('-----------------------------------------------') |
289 | print('Compress test.dat by all released zstd') |
290 | print('-----------------------------------------------') |
291 | |
292 | create_dict(head, dict_source_path) |
293 | for tag in tags: |
294 | print(tag) |
295 | if tag >= 'v0.5.0': |
296 | create_dict(tag, dict_source_path, head) |
297 | dict_compress_sample(tag, test_dat) |
298 | remove_duplicates() |
299 | decompress_dict(tag) |
300 | compress_sample(tag, test_dat) |
301 | remove_duplicates() |
302 | decompress_zst(tag) |
303 | |
304 | print('') |
305 | print('Enumerate different compressed files') |
306 | zstds = sorted(glob.glob('*.zst')) |
307 | for zstd in zstds: |
308 | print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd)) |