| 1 | #! /usr/bin/env python3 |
| 2 | # THIS BENCHMARK IS BEING REPLACED BY automated-bencmarking.py |
| 3 | |
| 4 | # ################################################################ |
| 5 | # Copyright (c) Meta Platforms, Inc. and affiliates. |
| 6 | # All rights reserved. |
| 7 | # |
| 8 | # This source code is licensed under both the BSD-style license (found in the |
| 9 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 10 | # in the COPYING file in the root directory of this source tree). |
| 11 | # You may select, at your option, one of the above-listed licenses. |
| 12 | # ########################################################################## |
| 13 | |
| 14 | # Limitations: |
| 15 | # - doesn't support filenames with spaces |
| 16 | # - dir1/zstd and dir2/zstd will be merged in a single results file |
| 17 | |
| 18 | import argparse |
| 19 | import os # getloadavg |
| 20 | import string |
| 21 | import subprocess |
| 22 | import time # strftime |
| 23 | import traceback |
| 24 | import hashlib |
| 25 | import platform # system |
| 26 | |
| 27 | script_version = 'v1.1.2 (2017-03-26)' |
| 28 | default_repo_url = 'https://github.com/facebook/zstd.git' |
| 29 | working_dir_name = 'speedTest' |
| 30 | working_path = os.getcwd() + '/' + working_dir_name # /path/to/zstd/tests/speedTest |
| 31 | clone_path = working_path + '/' + 'zstd' # /path/to/zstd/tests/speedTest/zstd |
| 32 | email_header = 'ZSTD_speedTest' |
| 33 | pid = str(os.getpid()) |
| 34 | verbose = False |
| 35 | clang_version = "unknown" |
| 36 | gcc_version = "unknown" |
| 37 | args = None |
| 38 | |
| 39 | |
| 40 | def hashfile(hasher, fname, blocksize=65536): |
| 41 | with open(fname, "rb") as f: |
| 42 | for chunk in iter(lambda: f.read(blocksize), b""): |
| 43 | hasher.update(chunk) |
| 44 | return hasher.hexdigest() |
| 45 | |
| 46 | |
| 47 | def log(text): |
| 48 | print(time.strftime("%Y/%m/%d %H:%M:%S") + ' - ' + text) |
| 49 | |
| 50 | |
| 51 | def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True): |
| 52 | if print_command: |
| 53 | log("> " + command) |
| 54 | popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=param_shell, cwd=execute.cwd) |
| 55 | stdout_lines, stderr_lines = popen.communicate(timeout=args.timeout) |
| 56 | stderr_lines = stderr_lines.decode("utf-8") |
| 57 | stdout_lines = stdout_lines.decode("utf-8") |
| 58 | if print_output: |
| 59 | if stdout_lines: |
| 60 | print(stdout_lines) |
| 61 | if stderr_lines: |
| 62 | print(stderr_lines) |
| 63 | if popen.returncode is not None and popen.returncode != 0: |
| 64 | if stderr_lines and not print_output and print_error: |
| 65 | print(stderr_lines) |
| 66 | raise RuntimeError(stdout_lines + stderr_lines) |
| 67 | return (stdout_lines + stderr_lines).splitlines() |
| 68 | execute.cwd = None |
| 69 | |
| 70 | |
| 71 | def does_command_exist(command): |
| 72 | try: |
| 73 | execute(command, verbose, False, False) |
| 74 | except Exception: |
| 75 | return False |
| 76 | return True |
| 77 | |
| 78 | |
| 79 | def send_email(emails, topic, text, have_mutt, have_mail): |
| 80 | logFileName = working_path + '/' + 'tmpEmailContent' |
| 81 | with open(logFileName, "w") as myfile: |
| 82 | myfile.writelines(text) |
| 83 | myfile.close() |
| 84 | if have_mutt: |
| 85 | execute('mutt -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose) |
| 86 | elif have_mail: |
| 87 | execute('mail -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose) |
| 88 | else: |
| 89 | log("e-mail cannot be sent (mail or mutt not found)") |
| 90 | |
| 91 | |
| 92 | def send_email_with_attachments(branch, commit, last_commit, args, text, results_files, |
| 93 | logFileName, have_mutt, have_mail): |
| 94 | with open(logFileName, "w") as myfile: |
| 95 | myfile.writelines(text) |
| 96 | myfile.close() |
| 97 | email_topic = '[%s:%s] Warning for %s:%s last_commit=%s speed<%s ratio<%s' \ |
| 98 | % (email_header, pid, branch, commit, last_commit, |
| 99 | args.lowerLimit, args.ratioLimit) |
| 100 | if have_mutt: |
| 101 | execute('mutt -s "' + email_topic + '" ' + args.emails + ' -a ' + results_files |
| 102 | + ' < ' + logFileName) |
| 103 | elif have_mail: |
| 104 | execute('mail -s "' + email_topic + '" ' + args.emails + ' < ' + logFileName) |
| 105 | else: |
| 106 | log("e-mail cannot be sent (mail or mutt not found)") |
| 107 | |
| 108 | |
| 109 | def git_get_branches(): |
| 110 | execute('git fetch -p', verbose) |
| 111 | branches = execute('git branch -rl', verbose) |
| 112 | output = [] |
| 113 | for line in branches: |
| 114 | if ("HEAD" not in line) and ("coverity_scan" not in line) and ("gh-pages" not in line): |
| 115 | output.append(line.strip()) |
| 116 | return output |
| 117 | |
| 118 | |
| 119 | def git_get_changes(branch, commit, last_commit): |
| 120 | fmt = '--format="%h: (%an) %s, %ar"' |
| 121 | if last_commit is None: |
| 122 | commits = execute('git log -n 10 %s %s' % (fmt, commit)) |
| 123 | else: |
| 124 | commits = execute('git --no-pager log %s %s..%s' % (fmt, last_commit, commit)) |
| 125 | return str('Changes in %s since %s:\n' % (branch, last_commit)) + '\n'.join(commits) |
| 126 | |
| 127 | |
| 128 | def get_last_results(resultsFileName): |
| 129 | if not os.path.isfile(resultsFileName): |
| 130 | return None, None, None, None |
| 131 | commit = None |
| 132 | csize = [] |
| 133 | cspeed = [] |
| 134 | dspeed = [] |
| 135 | with open(resultsFileName, 'r') as f: |
| 136 | for line in f: |
| 137 | words = line.split() |
| 138 | if len(words) <= 4: # branch + commit + compilerVer + md5 |
| 139 | commit = words[1] |
| 140 | csize = [] |
| 141 | cspeed = [] |
| 142 | dspeed = [] |
| 143 | if (len(words) == 8) or (len(words) == 9): # results: "filename" or "XX files" |
| 144 | csize.append(int(words[1])) |
| 145 | cspeed.append(float(words[3])) |
| 146 | dspeed.append(float(words[5])) |
| 147 | return commit, csize, cspeed, dspeed |
| 148 | |
| 149 | |
| 150 | def benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, |
| 151 | testFilePath, fileName, last_csize, last_cspeed, last_dspeed): |
| 152 | sleepTime = 30 |
| 153 | while os.getloadavg()[0] > args.maxLoadAvg: |
| 154 | log("WARNING: bench loadavg=%.2f is higher than %s, sleeping for %s seconds" |
| 155 | % (os.getloadavg()[0], args.maxLoadAvg, sleepTime)) |
| 156 | time.sleep(sleepTime) |
| 157 | start_load = str(os.getloadavg()) |
| 158 | osType = platform.system() |
| 159 | if osType == 'Linux': |
| 160 | cpuSelector = "taskset --cpu-list 0" |
| 161 | else: |
| 162 | cpuSelector = "" |
| 163 | if args.dictionary: |
| 164 | result = execute('%s programs/%s -rqi5b1e%s -D %s %s' % (cpuSelector, executableName, args.lastCLevel, args.dictionary, testFilePath), print_output=True) |
| 165 | else: |
| 166 | result = execute('%s programs/%s -rqi5b1e%s %s' % (cpuSelector, executableName, args.lastCLevel, testFilePath), print_output=True) |
| 167 | end_load = str(os.getloadavg()) |
| 168 | linesExpected = args.lastCLevel + 1 |
| 169 | if len(result) != linesExpected: |
| 170 | raise RuntimeError("ERROR: number of result lines=%d is different that expected %d\n%s" % (len(result), linesExpected, '\n'.join(result))) |
| 171 | with open(resultsFileName, "a") as myfile: |
| 172 | myfile.write('%s %s %s md5=%s\n' % (branch, commit, compilerVersion, md5sum)) |
| 173 | myfile.write('\n'.join(result) + '\n') |
| 174 | myfile.close() |
| 175 | if (last_cspeed == None): |
| 176 | log("WARNING: No data for comparison for branch=%s file=%s " % (branch, fileName)) |
| 177 | return "" |
| 178 | commit, csize, cspeed, dspeed = get_last_results(resultsFileName) |
| 179 | text = "" |
| 180 | for i in range(0, min(len(cspeed), len(last_cspeed))): |
| 181 | print("%s:%s -%d cSpeed=%6.2f cLast=%6.2f cDiff=%1.4f dSpeed=%6.2f dLast=%6.2f dDiff=%1.4f ratioDiff=%1.4f %s" % (branch, commit, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], float(last_csize[i])/csize[i], fileName)) |
| 182 | if (cspeed[i]/last_cspeed[i] < args.lowerLimit): |
| 183 | text += "WARNING: %s -%d cSpeed=%.2f cLast=%.2f cDiff=%.4f %s\n" % (executableName, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], fileName) |
| 184 | if (dspeed[i]/last_dspeed[i] < args.lowerLimit): |
| 185 | text += "WARNING: %s -%d dSpeed=%.2f dLast=%.2f dDiff=%.4f %s\n" % (executableName, i+1, dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], fileName) |
| 186 | if (float(last_csize[i])/csize[i] < args.ratioLimit): |
| 187 | text += "WARNING: %s -%d cSize=%d last_cSize=%d diff=%.4f %s\n" % (executableName, i+1, csize[i], last_csize[i], float(last_csize[i])/csize[i], fileName) |
| 188 | if text: |
| 189 | text = args.message + ("\nmaxLoadAvg=%s load average at start=%s end=%s\n%s last_commit=%s md5=%s\n" % (args.maxLoadAvg, start_load, end_load, compilerVersion, last_commit, md5sum)) + text |
| 190 | return text |
| 191 | |
| 192 | |
| 193 | def update_config_file(branch, commit): |
| 194 | last_commit = None |
| 195 | commitFileName = working_path + "/commit_" + branch.replace("/", "_") + ".txt" |
| 196 | if os.path.isfile(commitFileName): |
| 197 | with open(commitFileName, 'r') as infile: |
| 198 | last_commit = infile.read() |
| 199 | with open(commitFileName, 'w') as outfile: |
| 200 | outfile.write(commit) |
| 201 | return last_commit |
| 202 | |
| 203 | |
| 204 | def double_check(branch, commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName): |
| 205 | last_commit, csize, cspeed, dspeed = get_last_results(resultsFileName) |
| 206 | if not args.dry_run: |
| 207 | text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed) |
| 208 | if text: |
| 209 | log("WARNING: redoing tests for branch %s: commit %s" % (branch, commit)) |
| 210 | text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed) |
| 211 | return text |
| 212 | |
| 213 | |
| 214 | def test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail): |
| 215 | local_branch = branch.split('/')[1] |
| 216 | version = local_branch.rpartition('-')[2] + '_' + commit |
| 217 | if not args.dry_run: |
| 218 | execute('make -C programs clean zstd CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -DZSTD_GIT_COMMIT=%s" && ' % version + |
| 219 | 'mv programs/zstd programs/zstd_clang && ' + |
| 220 | 'make -C programs clean zstd zstd32 MOREFLAGS="-DZSTD_GIT_COMMIT=%s"' % version) |
| 221 | md5_zstd = hashfile(hashlib.md5(), clone_path + '/programs/zstd') |
| 222 | md5_zstd32 = hashfile(hashlib.md5(), clone_path + '/programs/zstd32') |
| 223 | md5_zstd_clang = hashfile(hashlib.md5(), clone_path + '/programs/zstd_clang') |
| 224 | print("md5(zstd)=%s\nmd5(zstd32)=%s\nmd5(zstd_clang)=%s" % (md5_zstd, md5_zstd32, md5_zstd_clang)) |
| 225 | print("gcc_version=%s clang_version=%s" % (gcc_version, clang_version)) |
| 226 | |
| 227 | logFileName = working_path + "/log_" + branch.replace("/", "_") + ".txt" |
| 228 | text_to_send = [] |
| 229 | results_files = "" |
| 230 | if args.dictionary: |
| 231 | dictName = args.dictionary.rpartition('/')[2] |
| 232 | else: |
| 233 | dictName = None |
| 234 | |
| 235 | for filePath in testFilePaths: |
| 236 | fileName = filePath.rpartition('/')[2] |
| 237 | if dictName: |
| 238 | resultsFileName = working_path + "/" + dictName.replace(".", "_") + "_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" |
| 239 | else: |
| 240 | resultsFileName = working_path + "/results_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" |
| 241 | text = double_check(branch, commit, args, 'zstd', md5_zstd, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName) |
| 242 | if text: |
| 243 | text_to_send.append(text) |
| 244 | results_files += resultsFileName + " " |
| 245 | resultsFileName = working_path + "/results32_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" |
| 246 | text = double_check(branch, commit, args, 'zstd32', md5_zstd32, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName) |
| 247 | if text: |
| 248 | text_to_send.append(text) |
| 249 | results_files += resultsFileName + " " |
| 250 | resultsFileName = working_path + "/resultsClang_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" |
| 251 | text = double_check(branch, commit, args, 'zstd_clang', md5_zstd_clang, 'clang_version='+clang_version, resultsFileName, filePath, fileName) |
| 252 | if text: |
| 253 | text_to_send.append(text) |
| 254 | results_files += resultsFileName + " " |
| 255 | if text_to_send: |
| 256 | send_email_with_attachments(branch, commit, last_commit, args, text_to_send, results_files, logFileName, have_mutt, have_mail) |
| 257 | |
| 258 | |
| 259 | if __name__ == '__main__': |
| 260 | parser = argparse.ArgumentParser() |
| 261 | parser.add_argument('testFileNames', help='file or directory names list for speed benchmark') |
| 262 | parser.add_argument('emails', help='list of e-mail addresses to send warnings') |
| 263 | parser.add_argument('--dictionary', '-D', help='path to the dictionary') |
| 264 | parser.add_argument('--message', '-m', help='attach an additional message to e-mail', default="") |
| 265 | parser.add_argument('--repoURL', help='changes default repository URL', default=default_repo_url) |
| 266 | parser.add_argument('--lowerLimit', '-l', type=float, help='send email if speed is lower than given limit', default=0.98) |
| 267 | parser.add_argument('--ratioLimit', '-r', type=float, help='send email if ratio is lower than given limit', default=0.999) |
| 268 | parser.add_argument('--maxLoadAvg', type=float, help='maximum load average to start testing', default=0.75) |
| 269 | parser.add_argument('--lastCLevel', type=int, help='last compression level for testing', default=5) |
| 270 | parser.add_argument('--sleepTime', '-s', type=int, help='frequency of repository checking in seconds', default=300) |
| 271 | parser.add_argument('--timeout', '-t', type=int, help='timeout for executing shell commands', default=1800) |
| 272 | parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='not build', default=False) |
| 273 | parser.add_argument('--verbose', '-v', action='store_true', help='more verbose logs', default=False) |
| 274 | args = parser.parse_args() |
| 275 | verbose = args.verbose |
| 276 | |
| 277 | # check if test files are accessible |
| 278 | testFileNames = args.testFileNames.split() |
| 279 | testFilePaths = [] |
| 280 | for fileName in testFileNames: |
| 281 | fileName = os.path.expanduser(fileName) |
| 282 | if os.path.isfile(fileName) or os.path.isdir(fileName): |
| 283 | testFilePaths.append(os.path.abspath(fileName)) |
| 284 | else: |
| 285 | log("ERROR: File/directory not found: " + fileName) |
| 286 | exit(1) |
| 287 | |
| 288 | # check if dictionary is accessible |
| 289 | if args.dictionary: |
| 290 | args.dictionary = os.path.abspath(os.path.expanduser(args.dictionary)) |
| 291 | if not os.path.isfile(args.dictionary): |
| 292 | log("ERROR: Dictionary not found: " + args.dictionary) |
| 293 | exit(1) |
| 294 | |
| 295 | # check availability of e-mail senders |
| 296 | have_mutt = does_command_exist("mutt -h") |
| 297 | have_mail = does_command_exist("mail -V") |
| 298 | if not have_mutt and not have_mail: |
| 299 | log("ERROR: e-mail senders 'mail' or 'mutt' not found") |
| 300 | exit(1) |
| 301 | |
| 302 | clang_version = execute("clang -v 2>&1 | grep ' version ' | sed -e 's:.*version \\([0-9.]*\\).*:\\1:' -e 's:\\.\\([0-9][0-9]\\):\\1:g'", verbose)[0]; |
| 303 | gcc_version = execute("gcc -dumpversion", verbose)[0]; |
| 304 | |
| 305 | if verbose: |
| 306 | print("PARAMETERS:\nrepoURL=%s" % args.repoURL) |
| 307 | print("working_path=%s" % working_path) |
| 308 | print("clone_path=%s" % clone_path) |
| 309 | print("testFilePath(%s)=%s" % (len(testFilePaths), testFilePaths)) |
| 310 | print("message=%s" % args.message) |
| 311 | print("emails=%s" % args.emails) |
| 312 | print("dictionary=%s" % args.dictionary) |
| 313 | print("maxLoadAvg=%s" % args.maxLoadAvg) |
| 314 | print("lowerLimit=%s" % args.lowerLimit) |
| 315 | print("ratioLimit=%s" % args.ratioLimit) |
| 316 | print("lastCLevel=%s" % args.lastCLevel) |
| 317 | print("sleepTime=%s" % args.sleepTime) |
| 318 | print("timeout=%s" % args.timeout) |
| 319 | print("dry_run=%s" % args.dry_run) |
| 320 | print("verbose=%s" % args.verbose) |
| 321 | print("have_mutt=%s have_mail=%s" % (have_mutt, have_mail)) |
| 322 | |
| 323 | # clone ZSTD repo if needed |
| 324 | if not os.path.isdir(working_path): |
| 325 | os.mkdir(working_path) |
| 326 | if not os.path.isdir(clone_path): |
| 327 | execute.cwd = working_path |
| 328 | execute('git clone ' + args.repoURL) |
| 329 | if not os.path.isdir(clone_path): |
| 330 | log("ERROR: ZSTD clone not found: " + clone_path) |
| 331 | exit(1) |
| 332 | execute.cwd = clone_path |
| 333 | |
| 334 | # check if speedTest.pid already exists |
| 335 | pidfile = "./speedTest.pid" |
| 336 | if os.path.isfile(pidfile): |
| 337 | log("ERROR: %s already exists, exiting" % pidfile) |
| 338 | exit(1) |
| 339 | |
| 340 | send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been started' % (email_header, pid, script_version), args.message, have_mutt, have_mail) |
| 341 | with open(pidfile, 'w') as the_file: |
| 342 | the_file.write(pid) |
| 343 | |
| 344 | branch = "" |
| 345 | commit = "" |
| 346 | first_time = True |
| 347 | while True: |
| 348 | try: |
| 349 | if first_time: |
| 350 | first_time = False |
| 351 | else: |
| 352 | time.sleep(args.sleepTime) |
| 353 | loadavg = os.getloadavg()[0] |
| 354 | if (loadavg <= args.maxLoadAvg): |
| 355 | branches = git_get_branches() |
| 356 | for branch in branches: |
| 357 | commit = execute('git show -s --format=%h ' + branch, verbose)[0] |
| 358 | last_commit = update_config_file(branch, commit) |
| 359 | if commit == last_commit: |
| 360 | log("skipping branch %s: head %s already processed" % (branch, commit)) |
| 361 | else: |
| 362 | log("build branch %s: head %s is different from prev %s" % (branch, commit, last_commit)) |
| 363 | execute('git checkout -- . && git checkout ' + branch) |
| 364 | print(git_get_changes(branch, commit, last_commit)) |
| 365 | test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail) |
| 366 | else: |
| 367 | log("WARNING: main loadavg=%.2f is higher than %s" % (loadavg, args.maxLoadAvg)) |
| 368 | if verbose: |
| 369 | log("sleep for %s seconds" % args.sleepTime) |
| 370 | except Exception as e: |
| 371 | stack = traceback.format_exc() |
| 372 | email_topic = '[%s:%s] ERROR in %s:%s' % (email_header, pid, branch, commit) |
| 373 | send_email(args.emails, email_topic, stack, have_mutt, have_mail) |
| 374 | print(stack) |
| 375 | except KeyboardInterrupt: |
| 376 | os.unlink(pidfile) |
| 377 | send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been stopped' % (email_header, pid, script_version), args.message, have_mutt, have_mail) |
| 378 | exit(0) |