deps/libchdr/deps/zstd-1.5.6/tests/fuzz/fuzz.py

   1 #!/usr/bin/env python
   2
   3 # ################################################################
   4 # Copyright (c) Meta Platforms, Inc. and affiliates.
   5 # All rights reserved.
   6 #
   7 # This source code is licensed under both the BSD-style license (found in the
   8 # LICENSE file in the root directory of this source tree) and the GPLv2 (found
   9 # in the COPYING file in the root directory of this source tree).
  10 # You may select, at your option, one of the above-listed licenses.
  11 # ##########################################################################
  12
  13 import argparse
  14 import contextlib
  15 import os
  16 import re
  17 import shlex
  18 import shutil
  19 import subprocess
  20 import sys
  21 import tempfile
  22
  23
  24 def abs_join(a, *p):
  25     return os.path.abspath(os.path.join(a, *p))
  26
  27
  28 class InputType(object):
  29     RAW_DATA = 1
  30     COMPRESSED_DATA = 2
  31     DICTIONARY_DATA = 3
  32
  33
  34 class FrameType(object):
  35     ZSTD = 1
  36     BLOCK = 2
  37
  38
  39 class TargetInfo(object):
  40     def __init__(self, input_type, frame_type=FrameType.ZSTD):
  41         self.input_type = input_type
  42         self.frame_type = frame_type
  43
  44
  45 # Constants
  46 FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
  47 CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
  48 TARGET_INFO = {
  49     'simple_round_trip': TargetInfo(InputType.RAW_DATA),
  50     'stream_round_trip': TargetInfo(InputType.RAW_DATA),
  51     'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
  52     'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
  53     'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
  54     'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
  55     'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
  56     'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
  57     'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
  58     'simple_compress': TargetInfo(InputType.RAW_DATA),
  59     'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA),
  60     'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
  61     'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA),
  62     'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA),
  63     'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
  64     'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
  65     'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
  66     'huf_round_trip': TargetInfo(InputType.RAW_DATA),
  67     'huf_decompress': TargetInfo(InputType.RAW_DATA),
  68     'decompress_cross_format': TargetInfo(InputType.RAW_DATA),
  69     'generate_sequences': TargetInfo(InputType.RAW_DATA),
  70 }
  71 TARGETS = list(TARGET_INFO.keys())
  72 ALL_TARGETS = TARGETS + ['all']
  73 FUZZ_RNG_SEED_SIZE = 4
  74
  75 # Standard environment variables
  76 CC = os.environ.get('CC', 'cc')
  77 CXX = os.environ.get('CXX', 'c++')
  78 CPPFLAGS = os.environ.get('CPPFLAGS', '')
  79 CFLAGS = os.environ.get('CFLAGS', '-O3')
  80 CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
  81 LDFLAGS = os.environ.get('LDFLAGS', '')
  82 MFLAGS = os.environ.get('MFLAGS', '-j')
  83 THIRD_PARTY_SEQ_PROD_OBJ = os.environ.get('THIRD_PARTY_SEQ_PROD_OBJ', '')
  84
  85 # Fuzzing environment variables
  86 LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
  87 AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz')
  88 DECODECORPUS = os.environ.get('DECODECORPUS',
  89                               abs_join(FUZZ_DIR, '..', 'decodecorpus'))
  90 ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd'))
  91
  92 # Sanitizer environment variables
  93 MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '')
  94 MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '')
  95 MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '')
  96 MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
  97
  98
  99 def create(r):
 100     d = os.path.abspath(r)
 101     if not os.path.isdir(d):
 102         os.makedirs(d)
 103     return d
 104
 105
 106 def check(r):
 107     d = os.path.abspath(r)
 108     if not os.path.isdir(d):
 109         return None
 110     return d
 111
 112
 113 @contextlib.contextmanager
 114 def tmpdir():
 115     dirpath = tempfile.mkdtemp()
 116     try:
 117         yield dirpath
 118     finally:
 119         shutil.rmtree(dirpath, ignore_errors=True)
 120
 121
 122 def parse_targets(in_targets):
 123     targets = set()
 124     for target in in_targets:
 125         if not target:
 126             continue
 127         if target == 'all':
 128             targets = targets.union(TARGETS)
 129         elif target in TARGETS:
 130             targets.add(target)
 131         else:
 132             raise RuntimeError('{} is not a valid target'.format(target))
 133     return list(targets)
 134
 135
 136 def targets_parser(args, description):
 137     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 138     parser.add_argument(
 139         'TARGET',
 140         nargs='*',
 141         type=str,
 142         help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
 143     args, extra = parser.parse_known_args(args)
 144     args.extra = extra
 145
 146     args.TARGET = parse_targets(args.TARGET)
 147
 148     return args
 149
 150
 151 def parse_env_flags(args, flags):
 152     """
 153     Look for flags set by environment variables.
 154     """
 155     san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
 156     nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
 157
 158     def set_sanitizer(sanitizer, default, san, nosan):
 159         if sanitizer in san and sanitizer in nosan:
 160             raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'.
 161                                format(s=sanitizer))
 162         if sanitizer in san:
 163             return True
 164         if sanitizer in nosan:
 165             return False
 166         return default
 167
 168     san = set(san_flags.split(','))
 169     nosan = set(nosan_flags.split(','))
 170
 171     args.asan = set_sanitizer('address', args.asan, san, nosan)
 172     args.msan = set_sanitizer('memory', args.msan, san, nosan)
 173     args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan)
 174
 175     args.sanitize = args.asan or args.msan or args.ubsan
 176
 177     return args
 178
 179
 180 def compiler_version(cc, cxx):
 181     """
 182     Determines the compiler and version.
 183     Only works for clang and gcc.
 184     """
 185     cc_version_bytes = subprocess.check_output([cc, "--version"])
 186     cxx_version_bytes = subprocess.check_output([cxx, "--version"])
 187     compiler = None
 188     version = None
 189     print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii')))
 190     if b'clang' in cc_version_bytes:
 191         assert(b'clang' in cxx_version_bytes)
 192         compiler = 'clang'
 193     elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes:
 194         assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
 195         compiler = 'gcc'
 196     if compiler is not None:
 197         version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)'
 198         version_match = re.search(version_regex, cc_version_bytes)
 199         version = tuple(int(version_match.group(i)) for i in range(1, 4))
 200     return compiler, version
 201
 202
 203 def overflow_ubsan_flags(cc, cxx):
 204     compiler, version = compiler_version(cc, cxx)
 205     if compiler == 'gcc' and version < (8, 0, 0):
 206         return ['-fno-sanitize=signed-integer-overflow']
 207     if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)):
 208         return ['-fno-sanitize=pointer-overflow']
 209     return []
 210
 211
 212 def build_parser(args):
 213     description = """
 214     Cleans the repository and builds a fuzz target (or all).
 215     Many flags default to environment variables (default says $X='y').
 216     Options that aren't enabling features default to the correct values for
 217     zstd.
 218     Enable sanitizers with --enable-*san.
 219     For regression testing just build.
 220     For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage.
 221     For AFL set CC and CXX to AFL's compilers and set
 222     LIB_FUZZING_ENGINE='libregression.a'.
 223     """
 224     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 225     parser.add_argument(
 226         '--lib-fuzzing-engine',
 227         dest='lib_fuzzing_engine',
 228         type=str,
 229         default=LIB_FUZZING_ENGINE,
 230         help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a '
 231               "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE)))
 232
 233     fuzz_group = parser.add_mutually_exclusive_group()
 234     fuzz_group.add_argument(
 235         '--enable-coverage',
 236         dest='coverage',
 237         action='store_true',
 238         help='Enable coverage instrumentation (-fsanitize-coverage)')
 239     fuzz_group.add_argument(
 240         '--enable-fuzzer',
 241         dest='fuzzer',
 242         action='store_true',
 243         help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled '
 244               'LIB_FUZZING_ENGINE is ignored')
 245     )
 246
 247     parser.add_argument(
 248         '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN')
 249     parser.add_argument(
 250         '--enable-ubsan',
 251         dest='ubsan',
 252         action='store_true',
 253         help='Enable UBSAN')
 254     parser.add_argument(
 255         '--disable-ubsan-pointer-overflow',
 256         dest='ubsan_pointer_overflow',
 257         action='store_false',
 258         help='Disable UBSAN pointer overflow check (known failure)')
 259     parser.add_argument(
 260         '--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
 261     parser.add_argument(
 262         '--enable-msan-track-origins', dest='msan_track_origins',
 263         action='store_true', help='Enable MSAN origin tracking')
 264     parser.add_argument(
 265         '--msan-extra-cppflags',
 266         dest='msan_extra_cppflags',
 267         type=str,
 268         default=MSAN_EXTRA_CPPFLAGS,
 269         help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')".
 270         format(MSAN_EXTRA_CPPFLAGS))
 271     parser.add_argument(
 272         '--msan-extra-cflags',
 273         dest='msan_extra_cflags',
 274         type=str,
 275         default=MSAN_EXTRA_CFLAGS,
 276         help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format(
 277             MSAN_EXTRA_CFLAGS))
 278     parser.add_argument(
 279         '--msan-extra-cxxflags',
 280         dest='msan_extra_cxxflags',
 281         type=str,
 282         default=MSAN_EXTRA_CXXFLAGS,
 283         help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')".
 284         format(MSAN_EXTRA_CXXFLAGS))
 285     parser.add_argument(
 286         '--msan-extra-ldflags',
 287         dest='msan_extra_ldflags',
 288         type=str,
 289         default=MSAN_EXTRA_LDFLAGS,
 290         help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')".
 291         format(MSAN_EXTRA_LDFLAGS))
 292     parser.add_argument(
 293         '--enable-sanitize-recover',
 294         dest='sanitize_recover',
 295         action='store_true',
 296         help='Non-fatal sanitizer errors where possible')
 297     parser.add_argument(
 298         '--debug',
 299         dest='debug',
 300         type=int,
 301         default=1,
 302         help='Set DEBUGLEVEL (default: 1)')
 303     parser.add_argument(
 304         '--force-memory-access',
 305         dest='memory_access',
 306         type=int,
 307         default=0,
 308         help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)')
 309     parser.add_argument(
 310         '--fuzz-rng-seed-size',
 311         dest='fuzz_rng_seed_size',
 312         type=int,
 313         default=4,
 314         help='Set FUZZ_RNG_SEED_SIZE (default: 4)')
 315     parser.add_argument(
 316         '--disable-fuzzing-mode',
 317         dest='fuzzing_mode',
 318         action='store_false',
 319         help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION')
 320     parser.add_argument(
 321         '--enable-stateful-fuzzing',
 322         dest='stateful_fuzzing',
 323         action='store_true',
 324         help='Reuse contexts between runs (makes reproduction impossible)')
 325     parser.add_argument(
 326         '--custom-seq-prod',
 327         dest='third_party_seq_prod_obj',
 328         type=str,
 329         default=THIRD_PARTY_SEQ_PROD_OBJ,
 330         help='Path to an object file with symbols for fuzzing your sequence producer plugin.')
 331     parser.add_argument(
 332         '--cc',
 333         dest='cc',
 334         type=str,
 335         default=CC,
 336         help="CC (default: $CC='{}')".format(CC))
 337     parser.add_argument(
 338         '--cxx',
 339         dest='cxx',
 340         type=str,
 341         default=CXX,
 342         help="CXX (default: $CXX='{}')".format(CXX))
 343     parser.add_argument(
 344         '--cppflags',
 345         dest='cppflags',
 346         type=str,
 347         default=CPPFLAGS,
 348         help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS))
 349     parser.add_argument(
 350         '--cflags',
 351         dest='cflags',
 352         type=str,
 353         default=CFLAGS,
 354         help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS))
 355     parser.add_argument(
 356         '--cxxflags',
 357         dest='cxxflags',
 358         type=str,
 359         default=CXXFLAGS,
 360         help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS))
 361     parser.add_argument(
 362         '--ldflags',
 363         dest='ldflags',
 364         type=str,
 365         default=LDFLAGS,
 366         help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS))
 367     parser.add_argument(
 368         '--mflags',
 369         dest='mflags',
 370         type=str,
 371         default=MFLAGS,
 372         help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS))
 373     parser.add_argument(
 374         'TARGET',
 375         nargs='*',
 376         type=str,
 377         help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))
 378     )
 379     args = parser.parse_args(args)
 380     args = parse_env_flags(args, ' '.join(
 381         [args.cppflags, args.cflags, args.cxxflags, args.ldflags]))
 382
 383     # Check option sanity
 384     if args.msan and (args.asan or args.ubsan):
 385         raise RuntimeError('MSAN may not be used with any other sanitizers')
 386     if args.msan_track_origins and not args.msan:
 387         raise RuntimeError('--enable-msan-track-origins requires MSAN')
 388     if args.sanitize_recover and not args.sanitize:
 389         raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
 390
 391     return args
 392
 393
 394 def build(args):
 395     try:
 396         args = build_parser(args)
 397     except Exception as e:
 398         print(e)
 399         return 1
 400     # The compilation flags we are setting
 401     targets = args.TARGET
 402     cc = args.cc
 403     cxx = args.cxx
 404     cppflags = shlex.split(args.cppflags)
 405     cflags = shlex.split(args.cflags)
 406     ldflags = shlex.split(args.ldflags)
 407     cxxflags = shlex.split(args.cxxflags)
 408     mflags = shlex.split(args.mflags)
 409     # Flags to be added to both cflags and cxxflags
 410     common_flags = [
 411         '-Werror',
 412         '-Wno-error=declaration-after-statement',
 413         '-Wno-error=c++-compat',
 414         '-Wno-error=deprecated' # C files are sometimes compiled with CXX
 415     ]
 416
 417     cppflags += [
 418         '-DDEBUGLEVEL={}'.format(args.debug),
 419         '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access),
 420         '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size),
 421     ]
 422
 423     # Set flags for options
 424     assert not (args.fuzzer and args.coverage)
 425     if args.coverage:
 426         common_flags += [
 427             '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp'
 428         ]
 429     if args.fuzzer:
 430         common_flags += ['-fsanitize=fuzzer']
 431         args.lib_fuzzing_engine = ''
 432
 433     mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)]
 434
 435     if args.sanitize_recover:
 436         recover_flags = ['-fsanitize-recover=all']
 437     else:
 438         recover_flags = ['-fno-sanitize-recover=all']
 439     if args.sanitize:
 440         common_flags += recover_flags
 441
 442     if args.msan:
 443         msan_flags = ['-fsanitize=memory']
 444         if args.msan_track_origins:
 445             msan_flags += ['-fsanitize-memory-track-origins']
 446         common_flags += msan_flags
 447         # Append extra MSAN flags (it might require special setup)
 448         cppflags += [args.msan_extra_cppflags]
 449         cflags += [args.msan_extra_cflags]
 450         cxxflags += [args.msan_extra_cxxflags]
 451         ldflags += [args.msan_extra_ldflags]
 452
 453     if args.asan:
 454         common_flags += ['-fsanitize=address']
 455
 456     if args.ubsan:
 457         ubsan_flags = ['-fsanitize=undefined']
 458         if not args.ubsan_pointer_overflow:
 459             ubsan_flags += overflow_ubsan_flags(cc, cxx)
 460         common_flags += ubsan_flags
 461
 462     if args.stateful_fuzzing:
 463         cppflags += ['-DSTATEFUL_FUZZING']
 464
 465     if args.third_party_seq_prod_obj:
 466         cppflags += ['-DFUZZ_THIRD_PARTY_SEQ_PROD']
 467         mflags += ['THIRD_PARTY_SEQ_PROD_OBJ={}'.format(args.third_party_seq_prod_obj)]
 468
 469     if args.fuzzing_mode:
 470         cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']
 471
 472     if args.lib_fuzzing_engine == 'libregression.a':
 473         targets = ['libregression.a'] + targets
 474
 475     # Append the common flags
 476     cflags += common_flags
 477     cxxflags += common_flags
 478
 479     # Prepare the flags for Make
 480     cc_str = "CC={}".format(cc)
 481     cxx_str = "CXX={}".format(cxx)
 482     cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags))
 483     cflags_str = "CFLAGS={}".format(' '.join(cflags))
 484     cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags))
 485     ldflags_str = "LDFLAGS={}".format(' '.join(ldflags))
 486
 487     # Print the flags
 488     print('MFLAGS={}'.format(' '.join(mflags)))
 489     print(cc_str)
 490     print(cxx_str)
 491     print(cppflags_str)
 492     print(cflags_str)
 493     print(cxxflags_str)
 494     print(ldflags_str)
 495
 496     # Clean and build
 497     clean_cmd = ['make', 'clean'] + mflags
 498     print(' '.join(clean_cmd))
 499     subprocess.check_call(clean_cmd)
 500     build_cmd = [
 501         'make',
 502         '-j',
 503         cc_str,
 504         cxx_str,
 505         cppflags_str,
 506         cflags_str,
 507         cxxflags_str,
 508         ldflags_str,
 509     ] + mflags + targets
 510     print(' '.join(build_cmd))
 511     subprocess.check_call(build_cmd)
 512     return 0
 513
 514
 515 def libfuzzer_parser(args):
 516     description = """
 517     Runs a libfuzzer binary.
 518     Passes all extra arguments to libfuzzer.
 519     The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to
 520     libFuzzer.a.
 521     Generates output in the CORPORA directory, puts crashes in the ARTIFACT
 522     directory, and takes extra input from the SEED directory.
 523     To merge AFL's output pass the SEED as AFL's output directory and pass
 524     '-merge=1'.
 525     """
 526     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 527     parser.add_argument(
 528         '--corpora',
 529         type=str,
 530         help='Override the default corpora dir (default: {})'.format(
 531             abs_join(CORPORA_DIR, 'TARGET')))
 532     parser.add_argument(
 533         '--artifact',
 534         type=str,
 535         help='Override the default artifact dir (default: {})'.format(
 536             abs_join(CORPORA_DIR, 'TARGET-crash')))
 537     parser.add_argument(
 538         '--seed',
 539         type=str,
 540         help='Override the default seed dir (default: {})'.format(
 541             abs_join(CORPORA_DIR, 'TARGET-seed')))
 542     parser.add_argument(
 543         'TARGET',
 544         type=str,
 545         help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
 546     args, extra = parser.parse_known_args(args)
 547     args.extra = extra
 548
 549     if args.TARGET and args.TARGET not in TARGETS:
 550         raise RuntimeError('{} is not a valid target'.format(args.TARGET))
 551
 552     return args
 553
 554
 555 def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
 556     if corpora is None:
 557         corpora = abs_join(CORPORA_DIR, target)
 558     if artifact is None:
 559         artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
 560     if seed is None:
 561         seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
 562     if extra_args is None:
 563         extra_args = []
 564
 565     target = abs_join(FUZZ_DIR, target)
 566
 567     corpora = [create(corpora)]
 568     artifact = create(artifact)
 569     seed = check(seed)
 570
 571     corpora += [artifact]
 572     if seed is not None:
 573         corpora += [seed]
 574
 575     cmd = [target, '-artifact_prefix={}/'.format(artifact)]
 576     cmd += corpora + extra_args
 577     print(' '.join(cmd))
 578     subprocess.check_call(cmd)
 579
 580
 581 def libfuzzer_cmd(args):
 582     try:
 583         args = libfuzzer_parser(args)
 584     except Exception as e:
 585         print(e)
 586         return 1
 587     libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
 588     return 0
 589
 590
 591 def afl_parser(args):
 592     description = """
 593     Runs an afl-fuzz job.
 594     Passes all extra arguments to afl-fuzz.
 595     The fuzzer should have been built with CC/CXX set to the AFL compilers,
 596     and with LIB_FUZZING_ENGINE='libregression.a'.
 597     Takes input from CORPORA and writes output to OUTPUT.
 598     Uses AFL_FUZZ as the binary (set from flag or environment variable).
 599     """
 600     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 601     parser.add_argument(
 602         '--corpora',
 603         type=str,
 604         help='Override the default corpora dir (default: {})'.format(
 605             abs_join(CORPORA_DIR, 'TARGET')))
 606     parser.add_argument(
 607         '--output',
 608         type=str,
 609         help='Override the default AFL output dir (default: {})'.format(
 610             abs_join(CORPORA_DIR, 'TARGET-afl')))
 611     parser.add_argument(
 612         '--afl-fuzz',
 613         type=str,
 614         default=AFL_FUZZ,
 615         help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ))
 616     parser.add_argument(
 617         'TARGET',
 618         type=str,
 619         help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
 620     args, extra = parser.parse_known_args(args)
 621     args.extra = extra
 622
 623     if args.TARGET and args.TARGET not in TARGETS:
 624         raise RuntimeError('{} is not a valid target'.format(args.TARGET))
 625
 626     if not args.corpora:
 627         args.corpora = abs_join(CORPORA_DIR, args.TARGET)
 628     if not args.output:
 629         args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET))
 630
 631     return args
 632
 633
 634 def afl(args):
 635     try:
 636         args = afl_parser(args)
 637     except Exception as e:
 638         print(e)
 639         return 1
 640     target = abs_join(FUZZ_DIR, args.TARGET)
 641
 642     corpora = create(args.corpora)
 643     output = create(args.output)
 644
 645     cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra
 646     cmd += [target, '@@']
 647     print(' '.join(cmd))
 648     subprocess.call(cmd)
 649     return 0
 650
 651
 652 def regression(args):
 653     try:
 654         description = """
 655         Runs one or more regression tests.
 656         The fuzzer should have been built with
 657         LIB_FUZZING_ENGINE='libregression.a'.
 658         Takes input from CORPORA.
 659         """
 660         args = targets_parser(args, description)
 661     except Exception as e:
 662         print(e)
 663         return 1
 664     for target in args.TARGET:
 665         corpora = create(abs_join(CORPORA_DIR, target))
 666         target = abs_join(FUZZ_DIR, target)
 667         cmd = [target, corpora]
 668         print(' '.join(cmd))
 669         subprocess.check_call(cmd)
 670     return 0
 671
 672
 673 def gen_parser(args):
 674     description = """
 675     Generate a seed corpus appropriate for TARGET with data generated with
 676     decodecorpus.
 677     The fuzz inputs are prepended with a seed before the zstd data, so the
 678     output of decodecorpus shouldn't be used directly.
 679     Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and
 680     puts the output in SEED.
 681     DECODECORPUS is the decodecorpus binary, and must already be built.
 682     """
 683     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 684     parser.add_argument(
 685         '--number',
 686         '-n',
 687         type=int,
 688         default=100,
 689         help='Number of samples to generate')
 690     parser.add_argument(
 691         '--max-size-log',
 692         type=int,
 693         default=18,
 694         help='Maximum sample size to generate')
 695     parser.add_argument(
 696         '--seed',
 697         type=str,
 698         help='Override the default seed dir (default: {})'.format(
 699             abs_join(CORPORA_DIR, 'TARGET-seed')))
 700     parser.add_argument(
 701         '--decodecorpus',
 702         type=str,
 703         default=DECODECORPUS,
 704         help="decodecorpus binary (default: $DECODECORPUS='{}')".format(
 705             DECODECORPUS))
 706     parser.add_argument(
 707         '--zstd',
 708         type=str,
 709         default=ZSTD,
 710         help="zstd binary (default: $ZSTD='{}')".format(ZSTD))
 711     parser.add_argument(
 712         '--fuzz-rng-seed-size',
 713         type=int,
 714         default=4,
 715         help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)"
 716     )
 717     parser.add_argument(
 718         'TARGET',
 719         type=str,
 720         help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
 721     args, extra = parser.parse_known_args(args)
 722     args.extra = extra
 723
 724     if args.TARGET and args.TARGET not in TARGETS:
 725         raise RuntimeError('{} is not a valid target'.format(args.TARGET))
 726
 727     if not args.seed:
 728         args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
 729
 730     if not os.path.isfile(args.decodecorpus):
 731         raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'".
 732                            format(args.decodecorpus, abs_join(FUZZ_DIR, '..')))
 733
 734     return args
 735
 736
 737 def gen(args):
 738     try:
 739         args = gen_parser(args)
 740     except Exception as e:
 741         print(e)
 742         return 1
 743
 744     seed = create(args.seed)
 745     with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict:
 746         info = TARGET_INFO[args.TARGET]
 747
 748         if info.input_type == InputType.DICTIONARY_DATA:
 749             number = max(args.number, 1000)
 750         else:
 751             number = args.number
 752         cmd = [
 753             args.decodecorpus,
 754             '-n{}'.format(args.number),
 755             '-p{}/'.format(compressed),
 756             '-o{}'.format(decompressed),
 757         ]
 758
 759         if info.frame_type == FrameType.BLOCK:
 760             cmd += [
 761                 '--gen-blocks',
 762                 '--max-block-size-log={}'.format(min(args.max_size_log, 17))
 763             ]
 764         else:
 765             cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
 766
 767         print(' '.join(cmd))
 768         subprocess.check_call(cmd)
 769
 770         if info.input_type == InputType.RAW_DATA:
 771             print('using decompressed data in {}'.format(decompressed))
 772             samples = decompressed
 773         elif info.input_type == InputType.COMPRESSED_DATA:
 774             print('using compressed data in {}'.format(compressed))
 775             samples = compressed
 776         else:
 777             assert info.input_type == InputType.DICTIONARY_DATA
 778             print('making dictionary data from {}'.format(decompressed))
 779             samples = dict
 780             min_dict_size_log = 9
 781             max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log)
 782             for dict_size_log in range(min_dict_size_log, max_dict_size_log):
 783                 dict_size = 1 << dict_size_log
 784                 cmd = [
 785                     args.zstd,
 786                     '--train',
 787                     '-r', decompressed,
 788                     '--maxdict={}'.format(dict_size),
 789                     '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size))
 790                 ]
 791                 print(' '.join(cmd))
 792                 subprocess.check_call(cmd)
 793
 794         # Copy the samples over and prepend the RNG seeds
 795         for name in os.listdir(samples):
 796             samplename = abs_join(samples, name)
 797             outname = abs_join(seed, name)
 798             with open(samplename, 'rb') as sample:
 799                 with open(outname, 'wb') as out:
 800                     CHUNK_SIZE = 131072
 801                     chunk = sample.read(CHUNK_SIZE)
 802                     while len(chunk) > 0:
 803                         out.write(chunk)
 804                         chunk = sample.read(CHUNK_SIZE)
 805     return 0
 806
 807
 808 def minimize(args):
 809     try:
 810         description = """
 811         Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
 812         TARGET_seed_corpus. All extra args are passed to libfuzzer.
 813         """
 814         args = targets_parser(args, description)
 815     except Exception as e:
 816         print(e)
 817         return 1
 818
 819     for target in args.TARGET:
 820         # Merge the corpus + anything else into the seed_corpus
 821         corpus = abs_join(CORPORA_DIR, target)
 822         seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
 823         extra_args = [corpus, "-merge=1"] + args.extra
 824         libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
 825         seeds = set(os.listdir(seed_corpus))
 826         # Copy all crashes directly into the seed_corpus if not already present
 827         crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
 828         for crash in os.listdir(crashes):
 829             if crash not in seeds:
 830                 shutil.copy(abs_join(crashes, crash), seed_corpus)
 831                 seeds.add(crash)
 832
 833
 834 def zip_cmd(args):
 835     try:
 836         description = """
 837         Zips up the seed corpus.
 838         """
 839         args = targets_parser(args, description)
 840     except Exception as e:
 841         print(e)
 842         return 1
 843
 844     for target in args.TARGET:
 845         # Zip the seed_corpus
 846         seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
 847         zip_file = "{}.zip".format(seed_corpus)
 848         cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."]
 849         print(' '.join(cmd))
 850         subprocess.check_call(cmd, cwd=seed_corpus)
 851
 852
 853 def list_cmd(args):
 854     print("\n".join(TARGETS))
 855
 856
 857 def short_help(args):
 858     name = args[0]
 859     print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
 860
 861
 862 def help(args):
 863     short_help(args)
 864     print("\tfuzzing helpers (select a command and pass -h for help)\n")
 865     print("Options:")
 866     print("\t-h, --help\tPrint this message")
 867     print("")
 868     print("Commands:")
 869     print("\tbuild\t\tBuild a fuzzer")
 870     print("\tlibfuzzer\tRun a libFuzzer fuzzer")
 871     print("\tafl\t\tRun an AFL fuzzer")
 872     print("\tregression\tRun a regression test")
 873     print("\tgen\t\tGenerate a seed corpus for a fuzzer")
 874     print("\tminimize\tMinimize the test corpora")
 875     print("\tzip\t\tZip the minimized corpora up")
 876     print("\tlist\t\tList the available targets")
 877
 878
 879 def main():
 880     args = sys.argv
 881     if len(args) < 2:
 882         help(args)
 883         return 1
 884     if args[1] == '-h' or args[1] == '--help' or args[1] == '-H':
 885         help(args)
 886         return 1
 887     command = args.pop(1)
 888     args[0] = "{} {}".format(args[0], command)
 889     if command == "build":
 890         return build(args)
 891     if command == "libfuzzer":
 892         return libfuzzer_cmd(args)
 893     if command == "regression":
 894         return regression(args)
 895     if command == "afl":
 896         return afl(args)
 897     if command == "gen":
 898         return gen(args)
 899     if command == "minimize":
 900         return minimize(args)
 901     if command == "zip":
 902         return zip_cmd(args)
 903     if command == "list":
 904         return list_cmd(args)
 905     short_help(args)
 906     print("Error: No such command {} (pass -h for help)".format(command))
 907     return 1
 908
 909
 910 if __name__ == "__main__":
 911     sys.exit(main())