| 1 | #!/usr/bin/env python3 |
| 2 | """Test zstd interoperability between versions""" |
| 3 | |
| 4 | # ################################################################ |
| 5 | # Copyright (c) Meta Platforms, Inc. and affiliates. |
| 6 | # All rights reserved. |
| 7 | # |
| 8 | # This source code is licensed under both the BSD-style license (found in the |
| 9 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 10 | # in the COPYING file in the root directory of this source tree). |
| 11 | # You may select, at your option, one of the above-listed licenses. |
| 12 | # ################################################################ |
| 13 | |
| 14 | import filecmp |
| 15 | import glob |
| 16 | import hashlib |
| 17 | import os |
| 18 | import shutil |
| 19 | import sys |
| 20 | import subprocess |
| 21 | from subprocess import Popen, PIPE |
| 22 | |
| 23 | repo_url = 'https://github.com/facebook/zstd.git' |
| 24 | tmp_dir_name = 'tests/versionsTest' |
| 25 | make_cmd = 'make' |
| 26 | make_args = ['-j','CFLAGS=-O0'] |
| 27 | git_cmd = 'git' |
| 28 | test_dat_src = 'README.md' |
| 29 | test_dat = 'test_dat' |
| 30 | head = 'vdevel' |
| 31 | dict_source = 'dict_source' |
| 32 | dict_globs = [ |
| 33 | 'programs/*.c', |
| 34 | 'lib/common/*.c', |
| 35 | 'lib/compress/*.c', |
| 36 | 'lib/decompress/*.c', |
| 37 | 'lib/dictBuilder/*.c', |
| 38 | 'lib/legacy/*.c', |
| 39 | 'programs/*.h', |
| 40 | 'lib/common/*.h', |
| 41 | 'lib/compress/*.h', |
| 42 | 'lib/dictBuilder/*.h', |
| 43 | 'lib/legacy/*.h' |
| 44 | ] |
| 45 | |
| 46 | |
| 47 | def execute(command, print_output=False, print_error=True, param_shell=False): |
| 48 | popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=param_shell) |
| 49 | stdout_lines, stderr_lines = popen.communicate() |
| 50 | stderr_lines = stderr_lines.decode("utf-8") |
| 51 | stdout_lines = stdout_lines.decode("utf-8") |
| 52 | if print_output: |
| 53 | print(stdout_lines) |
| 54 | print(stderr_lines) |
| 55 | if popen.returncode is not None and popen.returncode != 0: |
| 56 | if not print_output and print_error: |
| 57 | print(stderr_lines) |
| 58 | return popen.returncode |
| 59 | |
| 60 | |
| 61 | def proc(cmd_args, pipe=True, dummy=False): |
| 62 | if dummy: |
| 63 | return |
| 64 | if pipe: |
| 65 | subproc = Popen(cmd_args, stdout=PIPE, stderr=PIPE) |
| 66 | else: |
| 67 | subproc = Popen(cmd_args) |
| 68 | return subproc.communicate() |
| 69 | |
| 70 | |
| 71 | def make(targets, pipe=True): |
| 72 | cmd = [make_cmd] + make_args + targets |
| 73 | cmd_str = str(cmd) |
| 74 | print('compilation command : ' + cmd_str) |
| 75 | return proc(cmd, pipe) |
| 76 | |
| 77 | |
| 78 | def git(args, pipe=True): |
| 79 | return proc([git_cmd] + args, pipe) |
| 80 | |
| 81 | |
| 82 | def get_git_tags(): |
| 83 | stdout, stderr = git(['tag', '-l', 'v[0-9].[0-9].[0-9]']) |
| 84 | tags = stdout.decode('utf-8').split() |
| 85 | return tags |
| 86 | |
| 87 | |
| 88 | def dict_ok(tag, dict_name, sample): |
| 89 | if not os.path.isfile(dict_name): |
| 90 | return False |
| 91 | try: |
| 92 | cmd = ['./zstd.' + tag, '-D', dict_name] |
| 93 | with open(sample, "rb") as i: |
| 94 | subprocess.check_call(cmd, stdin=i, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) |
| 95 | return True |
| 96 | except: |
| 97 | return False |
| 98 | |
| 99 | |
| 100 | def create_dict(tag, dict_source_path, fallback_tag=None): |
| 101 | dict_name = 'dict.' + tag |
| 102 | if not os.path.isfile(dict_name): |
| 103 | cFiles = glob.glob(dict_source_path + "/*.c") |
| 104 | hFiles = glob.glob(dict_source_path + "/*.h") |
| 105 | # Ensure the dictionary builder is deterministic |
| 106 | files = sorted(cFiles + hFiles) |
| 107 | if tag == 'v0.5.0': |
| 108 | result = execute('./dictBuilder.' + tag + ' ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True) |
| 109 | else: |
| 110 | result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True) |
| 111 | if result == 0 and dict_ok(tag, dict_name, files[0]): |
| 112 | print(dict_name + ' created') |
| 113 | elif fallback_tag is not None: |
| 114 | fallback_dict_name = 'dict.' + fallback_tag |
| 115 | print('creating dictionary ' + dict_name + ' failed, falling back to ' + fallback_dict_name) |
| 116 | shutil.copy(fallback_dict_name, dict_name) |
| 117 | else: |
| 118 | raise RuntimeError('ERROR: creating of ' + dict_name + ' failed') |
| 119 | else: |
| 120 | print(dict_name + ' already exists') |
| 121 | |
| 122 | |
| 123 | def zstd(tag, args, input_file, output_file): |
| 124 | """ |
| 125 | Zstd compress input_file to output_file. |
| 126 | Need this helper because 0.5.0 is broken when stdout is not a TTY. |
| 127 | Throws an exception if the command returns non-zero. |
| 128 | """ |
| 129 | with open(input_file, "rb") as i: |
| 130 | with open(output_file, "wb") as o: |
| 131 | cmd = ['./zstd.' + tag] + args |
| 132 | print("Running: '{}', input={}, output={}" .format( |
| 133 | ' '.join(cmd), input_file, output_file |
| 134 | )) |
| 135 | result = subprocess.run(cmd, stdin=i, stdout=o, stderr=subprocess.PIPE) |
| 136 | print("Stderr: {}".format(result.stderr.decode("ascii"))) |
| 137 | result.check_returncode() |
| 138 | |
| 139 | |
| 140 | def dict_compress_sample(tag, sample): |
| 141 | dict_name = 'dict.' + tag |
| 142 | verbose = ['-v', '-v', '-v'] |
| 143 | zstd(tag, ['-D', dict_name, '-1'] + verbose, sample, sample + '_01_64_' + tag + '_dictio.zst') |
| 144 | zstd(tag, ['-D', dict_name, '-3'], sample, sample + '_03_64_' + tag + '_dictio.zst') |
| 145 | zstd(tag, ['-D', dict_name, '-5'], sample, sample + '_05_64_' + tag + '_dictio.zst') |
| 146 | zstd(tag, ['-D', dict_name, '-9'], sample, sample + '_09_64_' + tag + '_dictio.zst') |
| 147 | zstd(tag, ['-D', dict_name, '-15'], sample, sample + '_15_64_' + tag + '_dictio.zst') |
| 148 | zstd(tag, ['-D', dict_name, '-18'], sample, sample + '_18_64_' + tag + '_dictio.zst') |
| 149 | # zstdFiles = glob.glob("*.zst*") |
| 150 | # print(zstdFiles) |
| 151 | print(tag + " : dict compression completed") |
| 152 | |
| 153 | |
| 154 | def compress_sample(tag, sample): |
| 155 | zstd(tag, ['-1'], sample, sample + '_01_64_' + tag + '_nodict.zst') |
| 156 | zstd(tag, ['-3'], sample, sample + '_03_64_' + tag + '_nodict.zst') |
| 157 | zstd(tag, ['-5'], sample, sample + '_05_64_' + tag + '_nodict.zst') |
| 158 | zstd(tag, ['-9'], sample, sample + '_09_64_' + tag + '_nodict.zst') |
| 159 | zstd(tag, ['-15'], sample, sample + '_15_64_' + tag + '_nodict.zst') |
| 160 | zstd(tag, ['-18'], sample, sample + '_18_64_' + tag + '_nodict.zst') |
| 161 | # zstdFiles = glob.glob("*.zst*") |
| 162 | # print(zstdFiles) |
| 163 | print(tag + " : compression completed") |
| 164 | |
| 165 | |
| 166 | # https://stackoverflow.com/a/19711609/2132223 |
| 167 | def sha1_of_file(filepath): |
| 168 | with open(filepath, 'rb') as f: |
| 169 | return hashlib.sha1(f.read()).hexdigest() |
| 170 | |
| 171 | |
| 172 | def remove_duplicates(): |
| 173 | list_of_zst = sorted(glob.glob('*.zst')) |
| 174 | for i, ref_zst in enumerate(list_of_zst): |
| 175 | if not os.path.isfile(ref_zst): |
| 176 | continue |
| 177 | for j in range(i + 1, len(list_of_zst)): |
| 178 | compared_zst = list_of_zst[j] |
| 179 | if not os.path.isfile(compared_zst): |
| 180 | continue |
| 181 | if filecmp.cmp(ref_zst, compared_zst): |
| 182 | os.remove(compared_zst) |
| 183 | print('duplicated : {} == {}'.format(ref_zst, compared_zst)) |
| 184 | |
| 185 | |
| 186 | def decompress_zst(tag): |
| 187 | dec_error = 0 |
| 188 | list_zst = sorted(glob.glob('*_nodict.zst')) |
| 189 | for file_zst in list_zst: |
| 190 | print(file_zst + ' ' + tag) |
| 191 | file_dec = file_zst + '_d64_' + tag + '.dec' |
| 192 | zstd(tag, ['-d'], file_zst, file_dec) |
| 193 | if not filecmp.cmp(file_dec, test_dat): |
| 194 | raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst)) |
| 195 | else: |
| 196 | print('OK ') |
| 197 | |
| 198 | |
| 199 | def decompress_dict(tag): |
| 200 | dec_error = 0 |
| 201 | list_zst = sorted(glob.glob('*_dictio.zst')) |
| 202 | for file_zst in list_zst: |
| 203 | dict_tag = file_zst[0:len(file_zst)-11] # remove "_dictio.zst" |
| 204 | if head in dict_tag: # find vdevel |
| 205 | dict_tag = head |
| 206 | else: |
| 207 | dict_tag = dict_tag[dict_tag.rfind('v'):] |
| 208 | if tag == 'v0.6.0' and dict_tag < 'v0.6.0': |
| 209 | continue |
| 210 | dict_name = 'dict.' + dict_tag |
| 211 | print(file_zst + ' ' + tag + ' dict=' + dict_tag) |
| 212 | file_dec = file_zst + '_d64_' + tag + '.dec' |
| 213 | zstd(tag, ['-D', dict_name, '-d'], file_zst, file_dec) |
| 214 | if not filecmp.cmp(file_dec, test_dat): |
| 215 | raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst)) |
| 216 | else: |
| 217 | print('OK ') |
| 218 | |
| 219 | |
| 220 | if __name__ == '__main__': |
| 221 | error_code = 0 |
| 222 | base_dir = os.getcwd() + '/..' # /path/to/zstd |
| 223 | tmp_dir = base_dir + '/' + tmp_dir_name # /path/to/zstd/tests/versionsTest |
| 224 | clone_dir = tmp_dir + '/' + 'zstd' # /path/to/zstd/tests/versionsTest/zstd |
| 225 | dict_source_path = tmp_dir + '/' + dict_source # /path/to/zstd/tests/versionsTest/dict_source |
| 226 | programs_dir = base_dir + '/programs' # /path/to/zstd/programs |
| 227 | os.makedirs(tmp_dir, exist_ok=True) |
| 228 | |
| 229 | # since Travis clones limited depth, we should clone full repository |
| 230 | if not os.path.isdir(clone_dir): |
| 231 | git(['clone', repo_url, clone_dir]) |
| 232 | |
| 233 | shutil.copy2(base_dir + '/' + test_dat_src, tmp_dir + '/' + test_dat) |
| 234 | |
| 235 | # Retrieve all release tags |
| 236 | print('Retrieve all release tags :') |
| 237 | os.chdir(clone_dir) |
| 238 | alltags = get_git_tags() + [head] |
| 239 | tags = [t for t in alltags if t >= 'v0.5.0'] |
| 240 | print(tags) |
| 241 | |
| 242 | # Build all release zstd |
| 243 | for tag in tags: |
| 244 | os.chdir(base_dir) |
| 245 | dst_zstd = '{}/zstd.{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/zstd.<TAG> |
| 246 | if not os.path.isfile(dst_zstd) or tag == head: |
| 247 | if tag != head: |
| 248 | print('-----------------------------------------------') |
| 249 | print('compiling ' + tag) |
| 250 | print('-----------------------------------------------') |
| 251 | r_dir = '{}/{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/<TAG> |
| 252 | os.makedirs(r_dir, exist_ok=True) |
| 253 | os.chdir(clone_dir) |
| 254 | git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False) |
| 255 | if tag == 'v0.5.0': |
| 256 | os.chdir(r_dir + '/dictBuilder') # /path/to/zstd/tests/versionsTest/v0.5.0/dictBuilder |
| 257 | make(['clean'], False) # separate 'clean' target to allow parallel build |
| 258 | make(['dictBuilder'], False) |
| 259 | shutil.copy2('dictBuilder', '{}/dictBuilder.{}'.format(tmp_dir, tag)) |
| 260 | os.chdir(r_dir + '/programs') # /path/to/zstd/tests/versionsTest/<TAG>/programs |
| 261 | make(['clean'], False) # separate 'clean' target to allow parallel build |
| 262 | make(['zstd'], False) |
| 263 | else: |
| 264 | os.chdir(programs_dir) |
| 265 | print('-----------------------------------------------') |
| 266 | print('compiling head') |
| 267 | print('-----------------------------------------------') |
| 268 | make(['zstd'], False) |
| 269 | shutil.copy2('zstd', dst_zstd) |
| 270 | |
| 271 | # remove any remaining *.zst and *.dec from previous test |
| 272 | os.chdir(tmp_dir) |
| 273 | for compressed in glob.glob("*.zst"): |
| 274 | os.remove(compressed) |
| 275 | for dec in glob.glob("*.dec"): |
| 276 | os.remove(dec) |
| 277 | |
| 278 | # copy *.c and *.h to a temporary directory ("dict_source") |
| 279 | if not os.path.isdir(dict_source_path): |
| 280 | os.mkdir(dict_source_path) |
| 281 | for dict_glob in dict_globs: |
| 282 | files = glob.glob(dict_glob, root_dir=base_dir) |
| 283 | for file in files: |
| 284 | file = os.path.join(base_dir, file) |
| 285 | print("copying " + file + " to " + dict_source_path) |
| 286 | shutil.copy(file, dict_source_path) |
| 287 | |
| 288 | print('-----------------------------------------------') |
| 289 | print('Compress test.dat by all released zstd') |
| 290 | print('-----------------------------------------------') |
| 291 | |
| 292 | create_dict(head, dict_source_path) |
| 293 | for tag in tags: |
| 294 | print(tag) |
| 295 | if tag >= 'v0.5.0': |
| 296 | create_dict(tag, dict_source_path, head) |
| 297 | dict_compress_sample(tag, test_dat) |
| 298 | remove_duplicates() |
| 299 | decompress_dict(tag) |
| 300 | compress_sample(tag, test_dat) |
| 301 | remove_duplicates() |
| 302 | decompress_zst(tag) |
| 303 | |
| 304 | print('') |
| 305 | print('Enumerate different compressed files') |
| 306 | zstds = sorted(glob.glob('*.zst')) |
| 307 | for zstd in zstds: |
| 308 | print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd)) |