deps/libchdr/deps/zstd-1.5.5/tests/fuzz/fuzz.py

   1 #!/usr/bin/env python
   2
   3 # ################################################################
   4 # Copyright (c) Meta Platforms, Inc. and affiliates.
   5 # All rights reserved.
   6 #
   7 # This source code is licensed under both the BSD-style license (found in the
   8 # LICENSE file in the root directory of this source tree) and the GPLv2 (found
   9 # in the COPYING file in the root directory of this source tree).
  10 # You may select, at your option, one of the above-listed licenses.
  11 # ##########################################################################
  12
  13 import argparse
  14 import contextlib
  15 import os
  16 import re
  17 import shlex
  18 import shutil
  19 import subprocess
  20 import sys
  21 import tempfile
  22
  23
  24 def abs_join(a, *p):
  25     return os.path.abspath(os.path.join(a, *p))
  26
  27
  28 class InputType(object):
  29     RAW_DATA = 1
  30     COMPRESSED_DATA = 2
  31     DICTIONARY_DATA = 3
  32
  33
  34 class FrameType(object):
  35     ZSTD = 1
  36     BLOCK = 2
  37
  38
  39 class TargetInfo(object):
  40     def __init__(self, input_type, frame_type=FrameType.ZSTD):
  41         self.input_type = input_type
  42         self.frame_type = frame_type
  43
  44
  45 # Constants
  46 FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
  47 CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
  48 TARGET_INFO = {
  49     'simple_round_trip': TargetInfo(InputType.RAW_DATA),
  50     'stream_round_trip': TargetInfo(InputType.RAW_DATA),
  51     'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
  52     'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
  53     'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
  54     'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
  55     'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
  56     'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
  57     'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
  58     'simple_compress': TargetInfo(InputType.RAW_DATA),
  59     'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA),
  60     'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
  61     'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA),
  62     'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA),
  63     'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
  64     'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
  65     'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
  66     'huf_round_trip': TargetInfo(InputType.RAW_DATA),
  67     'huf_decompress': TargetInfo(InputType.RAW_DATA),
  68 }
  69 TARGETS = list(TARGET_INFO.keys())
  70 ALL_TARGETS = TARGETS + ['all']
  71 FUZZ_RNG_SEED_SIZE = 4
  72
  73 # Standard environment variables
  74 CC = os.environ.get('CC', 'cc')
  75 CXX = os.environ.get('CXX', 'c++')
  76 CPPFLAGS = os.environ.get('CPPFLAGS', '')
  77 CFLAGS = os.environ.get('CFLAGS', '-O3')
  78 CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
  79 LDFLAGS = os.environ.get('LDFLAGS', '')
  80 MFLAGS = os.environ.get('MFLAGS', '-j')
  81 THIRD_PARTY_SEQ_PROD_OBJ = os.environ.get('THIRD_PARTY_SEQ_PROD_OBJ', '')
  82
  83 # Fuzzing environment variables
  84 LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
  85 AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz')
  86 DECODECORPUS = os.environ.get('DECODECORPUS',
  87                               abs_join(FUZZ_DIR, '..', 'decodecorpus'))
  88 ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd'))
  89
  90 # Sanitizer environment variables
  91 MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '')
  92 MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '')
  93 MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '')
  94 MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
  95
  96
  97 def create(r):
  98     d = os.path.abspath(r)
  99     if not os.path.isdir(d):
 100         os.makedirs(d)
 101     return d
 102
 103
 104 def check(r):
 105     d = os.path.abspath(r)
 106     if not os.path.isdir(d):
 107         return None
 108     return d
 109
 110
 111 @contextlib.contextmanager
 112 def tmpdir():
 113     dirpath = tempfile.mkdtemp()
 114     try:
 115         yield dirpath
 116     finally:
 117         shutil.rmtree(dirpath, ignore_errors=True)
 118
 119
 120 def parse_targets(in_targets):
 121     targets = set()
 122     for target in in_targets:
 123         if not target:
 124             continue
 125         if target == 'all':
 126             targets = targets.union(TARGETS)
 127         elif target in TARGETS:
 128             targets.add(target)
 129         else:
 130             raise RuntimeError('{} is not a valid target'.format(target))
 131     return list(targets)
 132
 133
 134 def targets_parser(args, description):
 135     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 136     parser.add_argument(
 137         'TARGET',
 138         nargs='*',
 139         type=str,
 140         help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
 141     args, extra = parser.parse_known_args(args)
 142     args.extra = extra
 143
 144     args.TARGET = parse_targets(args.TARGET)
 145
 146     return args
 147
 148
 149 def parse_env_flags(args, flags):
 150     """
 151     Look for flags set by environment variables.
 152     """
 153     san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
 154     nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
 155
 156     def set_sanitizer(sanitizer, default, san, nosan):
 157         if sanitizer in san and sanitizer in nosan:
 158             raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'.
 159                                format(s=sanitizer))
 160         if sanitizer in san:
 161             return True
 162         if sanitizer in nosan:
 163             return False
 164         return default
 165
 166     san = set(san_flags.split(','))
 167     nosan = set(nosan_flags.split(','))
 168
 169     args.asan = set_sanitizer('address', args.asan, san, nosan)
 170     args.msan = set_sanitizer('memory', args.msan, san, nosan)
 171     args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan)
 172
 173     args.sanitize = args.asan or args.msan or args.ubsan
 174
 175     return args
 176
 177
 178 def compiler_version(cc, cxx):
 179     """
 180     Determines the compiler and version.
 181     Only works for clang and gcc.
 182     """
 183     cc_version_bytes = subprocess.check_output([cc, "--version"])
 184     cxx_version_bytes = subprocess.check_output([cxx, "--version"])
 185     compiler = None
 186     version = None
 187     print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii')))
 188     if b'clang' in cc_version_bytes:
 189         assert(b'clang' in cxx_version_bytes)
 190         compiler = 'clang'
 191     elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes:
 192         assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
 193         compiler = 'gcc'
 194     if compiler is not None:
 195         version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)'
 196         version_match = re.search(version_regex, cc_version_bytes)
 197         version = tuple(int(version_match.group(i)) for i in range(1, 4))
 198     return compiler, version
 199
 200
 201 def overflow_ubsan_flags(cc, cxx):
 202     compiler, version = compiler_version(cc, cxx)
 203     if compiler == 'gcc' and version < (8, 0, 0):
 204         return ['-fno-sanitize=signed-integer-overflow']
 205     if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)):
 206         return ['-fno-sanitize=pointer-overflow']
 207     return []
 208
 209
 210 def build_parser(args):
 211     description = """
 212     Cleans the repository and builds a fuzz target (or all).
 213     Many flags default to environment variables (default says $X='y').
 214     Options that aren't enabling features default to the correct values for
 215     zstd.
 216     Enable sanitizers with --enable-*san.
 217     For regression testing just build.
 218     For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage.
 219     For AFL set CC and CXX to AFL's compilers and set
 220     LIB_FUZZING_ENGINE='libregression.a'.
 221     """
 222     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 223     parser.add_argument(
 224         '--lib-fuzzing-engine',
 225         dest='lib_fuzzing_engine',
 226         type=str,
 227         default=LIB_FUZZING_ENGINE,
 228         help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a '
 229               "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE)))
 230
 231     fuzz_group = parser.add_mutually_exclusive_group()
 232     fuzz_group.add_argument(
 233         '--enable-coverage',
 234         dest='coverage',
 235         action='store_true',
 236         help='Enable coverage instrumentation (-fsanitize-coverage)')
 237     fuzz_group.add_argument(
 238         '--enable-fuzzer',
 239         dest='fuzzer',
 240         action='store_true',
 241         help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled '
 242               'LIB_FUZZING_ENGINE is ignored')
 243     )
 244
 245     parser.add_argument(
 246         '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN')
 247     parser.add_argument(
 248         '--enable-ubsan',
 249         dest='ubsan',
 250         action='store_true',
 251         help='Enable UBSAN')
 252     parser.add_argument(
 253         '--enable-ubsan-pointer-overflow',
 254         dest='ubsan_pointer_overflow',
 255         action='store_true',
 256         help='Enable UBSAN pointer overflow check (known failure)')
 257     parser.add_argument(
 258         '--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
 259     parser.add_argument(
 260         '--enable-msan-track-origins', dest='msan_track_origins',
 261         action='store_true', help='Enable MSAN origin tracking')
 262     parser.add_argument(
 263         '--msan-extra-cppflags',
 264         dest='msan_extra_cppflags',
 265         type=str,
 266         default=MSAN_EXTRA_CPPFLAGS,
 267         help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')".
 268         format(MSAN_EXTRA_CPPFLAGS))
 269     parser.add_argument(
 270         '--msan-extra-cflags',
 271         dest='msan_extra_cflags',
 272         type=str,
 273         default=MSAN_EXTRA_CFLAGS,
 274         help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format(
 275             MSAN_EXTRA_CFLAGS))
 276     parser.add_argument(
 277         '--msan-extra-cxxflags',
 278         dest='msan_extra_cxxflags',
 279         type=str,
 280         default=MSAN_EXTRA_CXXFLAGS,
 281         help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')".
 282         format(MSAN_EXTRA_CXXFLAGS))
 283     parser.add_argument(
 284         '--msan-extra-ldflags',
 285         dest='msan_extra_ldflags',
 286         type=str,
 287         default=MSAN_EXTRA_LDFLAGS,
 288         help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')".
 289         format(MSAN_EXTRA_LDFLAGS))
 290     parser.add_argument(
 291         '--enable-sanitize-recover',
 292         dest='sanitize_recover',
 293         action='store_true',
 294         help='Non-fatal sanitizer errors where possible')
 295     parser.add_argument(
 296         '--debug',
 297         dest='debug',
 298         type=int,
 299         default=1,
 300         help='Set DEBUGLEVEL (default: 1)')
 301     parser.add_argument(
 302         '--force-memory-access',
 303         dest='memory_access',
 304         type=int,
 305         default=0,
 306         help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)')
 307     parser.add_argument(
 308         '--fuzz-rng-seed-size',
 309         dest='fuzz_rng_seed_size',
 310         type=int,
 311         default=4,
 312         help='Set FUZZ_RNG_SEED_SIZE (default: 4)')
 313     parser.add_argument(
 314         '--disable-fuzzing-mode',
 315         dest='fuzzing_mode',
 316         action='store_false',
 317         help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION')
 318     parser.add_argument(
 319         '--enable-stateful-fuzzing',
 320         dest='stateful_fuzzing',
 321         action='store_true',
 322         help='Reuse contexts between runs (makes reproduction impossible)')
 323     parser.add_argument(
 324         '--custom-seq-prod',
 325         dest='third_party_seq_prod_obj',
 326         type=str,
 327         default=THIRD_PARTY_SEQ_PROD_OBJ,
 328         help='Path to an object file with symbols for fuzzing your sequence producer plugin.')
 329     parser.add_argument(
 330         '--cc',
 331         dest='cc',
 332         type=str,
 333         default=CC,
 334         help="CC (default: $CC='{}')".format(CC))
 335     parser.add_argument(
 336         '--cxx',
 337         dest='cxx',
 338         type=str,
 339         default=CXX,
 340         help="CXX (default: $CXX='{}')".format(CXX))
 341     parser.add_argument(
 342         '--cppflags',
 343         dest='cppflags',
 344         type=str,
 345         default=CPPFLAGS,
 346         help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS))
 347     parser.add_argument(
 348         '--cflags',
 349         dest='cflags',
 350         type=str,
 351         default=CFLAGS,
 352         help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS))
 353     parser.add_argument(
 354         '--cxxflags',
 355         dest='cxxflags',
 356         type=str,
 357         default=CXXFLAGS,
 358         help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS))
 359     parser.add_argument(
 360         '--ldflags',
 361         dest='ldflags',
 362         type=str,
 363         default=LDFLAGS,
 364         help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS))
 365     parser.add_argument(
 366         '--mflags',
 367         dest='mflags',
 368         type=str,
 369         default=MFLAGS,
 370         help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS))
 371     parser.add_argument(
 372         'TARGET',
 373         nargs='*',
 374         type=str,
 375         help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))
 376     )
 377     args = parser.parse_args(args)
 378     args = parse_env_flags(args, ' '.join(
 379         [args.cppflags, args.cflags, args.cxxflags, args.ldflags]))
 380
 381     # Check option sanity
 382     if args.msan and (args.asan or args.ubsan):
 383         raise RuntimeError('MSAN may not be used with any other sanitizers')
 384     if args.msan_track_origins and not args.msan:
 385         raise RuntimeError('--enable-msan-track-origins requires MSAN')
 386     if args.ubsan_pointer_overflow and not args.ubsan:
 387         raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN')
 388     if args.sanitize_recover and not args.sanitize:
 389         raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
 390
 391     return args
 392
 393
 394 def build(args):
 395     try:
 396         args = build_parser(args)
 397     except Exception as e:
 398         print(e)
 399         return 1
 400     # The compilation flags we are setting
 401     targets = args.TARGET
 402     cc = args.cc
 403     cxx = args.cxx
 404     cppflags = shlex.split(args.cppflags)
 405     cflags = shlex.split(args.cflags)
 406     ldflags = shlex.split(args.ldflags)
 407     cxxflags = shlex.split(args.cxxflags)
 408     mflags = shlex.split(args.mflags)
 409     # Flags to be added to both cflags and cxxflags
 410     common_flags = []
 411
 412     cppflags += [
 413         '-DDEBUGLEVEL={}'.format(args.debug),
 414         '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access),
 415         '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size),
 416     ]
 417
 418     # Set flags for options
 419     assert not (args.fuzzer and args.coverage)
 420     if args.coverage:
 421         common_flags += [
 422             '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp'
 423         ]
 424     if args.fuzzer:
 425         common_flags += ['-fsanitize=fuzzer']
 426         args.lib_fuzzing_engine = ''
 427
 428     mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)]
 429
 430     if args.sanitize_recover:
 431         recover_flags = ['-fsanitize-recover=all']
 432     else:
 433         recover_flags = ['-fno-sanitize-recover=all']
 434     if args.sanitize:
 435         common_flags += recover_flags
 436
 437     if args.msan:
 438         msan_flags = ['-fsanitize=memory']
 439         if args.msan_track_origins:
 440             msan_flags += ['-fsanitize-memory-track-origins']
 441         common_flags += msan_flags
 442         # Append extra MSAN flags (it might require special setup)
 443         cppflags += [args.msan_extra_cppflags]
 444         cflags += [args.msan_extra_cflags]
 445         cxxflags += [args.msan_extra_cxxflags]
 446         ldflags += [args.msan_extra_ldflags]
 447
 448     if args.asan:
 449         common_flags += ['-fsanitize=address']
 450
 451     if args.ubsan:
 452         ubsan_flags = ['-fsanitize=undefined']
 453         if not args.ubsan_pointer_overflow:
 454             ubsan_flags += overflow_ubsan_flags(cc, cxx)
 455         common_flags += ubsan_flags
 456
 457     if args.stateful_fuzzing:
 458         cppflags += ['-DSTATEFUL_FUZZING']
 459
 460     if args.third_party_seq_prod_obj:
 461         cppflags += ['-DFUZZ_THIRD_PARTY_SEQ_PROD']
 462         mflags += ['THIRD_PARTY_SEQ_PROD_OBJ={}'.format(args.third_party_seq_prod_obj)]
 463
 464     if args.fuzzing_mode:
 465         cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']
 466
 467     if args.lib_fuzzing_engine == 'libregression.a':
 468         targets = ['libregression.a'] + targets
 469
 470     # Append the common flags
 471     cflags += common_flags
 472     cxxflags += common_flags
 473
 474     # Prepare the flags for Make
 475     cc_str = "CC={}".format(cc)
 476     cxx_str = "CXX={}".format(cxx)
 477     cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags))
 478     cflags_str = "CFLAGS={}".format(' '.join(cflags))
 479     cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags))
 480     ldflags_str = "LDFLAGS={}".format(' '.join(ldflags))
 481
 482     # Print the flags
 483     print('MFLAGS={}'.format(' '.join(mflags)))
 484     print(cc_str)
 485     print(cxx_str)
 486     print(cppflags_str)
 487     print(cflags_str)
 488     print(cxxflags_str)
 489     print(ldflags_str)
 490
 491     # Clean and build
 492     clean_cmd = ['make', 'clean'] + mflags
 493     print(' '.join(clean_cmd))
 494     subprocess.check_call(clean_cmd)
 495     build_cmd = [
 496         'make',
 497         cc_str,
 498         cxx_str,
 499         cppflags_str,
 500         cflags_str,
 501         cxxflags_str,
 502         ldflags_str,
 503     ] + mflags + targets
 504     print(' '.join(build_cmd))
 505     subprocess.check_call(build_cmd)
 506     return 0
 507
 508
 509 def libfuzzer_parser(args):
 510     description = """
 511     Runs a libfuzzer binary.
 512     Passes all extra arguments to libfuzzer.
 513     The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to
 514     libFuzzer.a.
 515     Generates output in the CORPORA directory, puts crashes in the ARTIFACT
 516     directory, and takes extra input from the SEED directory.
 517     To merge AFL's output pass the SEED as AFL's output directory and pass
 518     '-merge=1'.
 519     """
 520     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 521     parser.add_argument(
 522         '--corpora',
 523         type=str,
 524         help='Override the default corpora dir (default: {})'.format(
 525             abs_join(CORPORA_DIR, 'TARGET')))
 526     parser.add_argument(
 527         '--artifact',
 528         type=str,
 529         help='Override the default artifact dir (default: {})'.format(
 530             abs_join(CORPORA_DIR, 'TARGET-crash')))
 531     parser.add_argument(
 532         '--seed',
 533         type=str,
 534         help='Override the default seed dir (default: {})'.format(
 535             abs_join(CORPORA_DIR, 'TARGET-seed')))
 536     parser.add_argument(
 537         'TARGET',
 538         type=str,
 539         help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
 540     args, extra = parser.parse_known_args(args)
 541     args.extra = extra
 542
 543     if args.TARGET and args.TARGET not in TARGETS:
 544         raise RuntimeError('{} is not a valid target'.format(args.TARGET))
 545
 546     return args
 547
 548
 549 def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
 550     if corpora is None:
 551         corpora = abs_join(CORPORA_DIR, target)
 552     if artifact is None:
 553         artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
 554     if seed is None:
 555         seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
 556     if extra_args is None:
 557         extra_args = []
 558
 559     target = abs_join(FUZZ_DIR, target)
 560
 561     corpora = [create(corpora)]
 562     artifact = create(artifact)
 563     seed = check(seed)
 564
 565     corpora += [artifact]
 566     if seed is not None:
 567         corpora += [seed]
 568
 569     cmd = [target, '-artifact_prefix={}/'.format(artifact)]
 570     cmd += corpora + extra_args
 571     print(' '.join(cmd))
 572     subprocess.check_call(cmd)
 573
 574
 575 def libfuzzer_cmd(args):
 576     try:
 577         args = libfuzzer_parser(args)
 578     except Exception as e:
 579         print(e)
 580         return 1
 581     libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
 582     return 0
 583
 584
 585 def afl_parser(args):
 586     description = """
 587     Runs an afl-fuzz job.
 588     Passes all extra arguments to afl-fuzz.
 589     The fuzzer should have been built with CC/CXX set to the AFL compilers,
 590     and with LIB_FUZZING_ENGINE='libregression.a'.
 591     Takes input from CORPORA and writes output to OUTPUT.
 592     Uses AFL_FUZZ as the binary (set from flag or environment variable).
 593     """
 594     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 595     parser.add_argument(
 596         '--corpora',
 597         type=str,
 598         help='Override the default corpora dir (default: {})'.format(
 599             abs_join(CORPORA_DIR, 'TARGET')))
 600     parser.add_argument(
 601         '--output',
 602         type=str,
 603         help='Override the default AFL output dir (default: {})'.format(
 604             abs_join(CORPORA_DIR, 'TARGET-afl')))
 605     parser.add_argument(
 606         '--afl-fuzz',
 607         type=str,
 608         default=AFL_FUZZ,
 609         help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ))
 610     parser.add_argument(
 611         'TARGET',
 612         type=str,
 613         help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
 614     args, extra = parser.parse_known_args(args)
 615     args.extra = extra
 616
 617     if args.TARGET and args.TARGET not in TARGETS:
 618         raise RuntimeError('{} is not a valid target'.format(args.TARGET))
 619
 620     if not args.corpora:
 621         args.corpora = abs_join(CORPORA_DIR, args.TARGET)
 622     if not args.output:
 623         args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET))
 624
 625     return args
 626
 627
 628 def afl(args):
 629     try:
 630         args = afl_parser(args)
 631     except Exception as e:
 632         print(e)
 633         return 1
 634     target = abs_join(FUZZ_DIR, args.TARGET)
 635
 636     corpora = create(args.corpora)
 637     output = create(args.output)
 638
 639     cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra
 640     cmd += [target, '@@']
 641     print(' '.join(cmd))
 642     subprocess.call(cmd)
 643     return 0
 644
 645
 646 def regression(args):
 647     try:
 648         description = """
 649         Runs one or more regression tests.
 650         The fuzzer should have been built with
 651         LIB_FUZZING_ENGINE='libregression.a'.
 652         Takes input from CORPORA.
 653         """
 654         args = targets_parser(args, description)
 655     except Exception as e:
 656         print(e)
 657         return 1
 658     for target in args.TARGET:
 659         corpora = create(abs_join(CORPORA_DIR, target))
 660         target = abs_join(FUZZ_DIR, target)
 661         cmd = [target, corpora]
 662         print(' '.join(cmd))
 663         subprocess.check_call(cmd)
 664     return 0
 665
 666
 667 def gen_parser(args):
 668     description = """
 669     Generate a seed corpus appropriate for TARGET with data generated with
 670     decodecorpus.
 671     The fuzz inputs are prepended with a seed before the zstd data, so the
 672     output of decodecorpus shouldn't be used directly.
 673     Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and
 674     puts the output in SEED.
 675     DECODECORPUS is the decodecorpus binary, and must already be built.
 676     """
 677     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 678     parser.add_argument(
 679         '--number',
 680         '-n',
 681         type=int,
 682         default=100,
 683         help='Number of samples to generate')
 684     parser.add_argument(
 685         '--max-size-log',
 686         type=int,
 687         default=18,
 688         help='Maximum sample size to generate')
 689     parser.add_argument(
 690         '--seed',
 691         type=str,
 692         help='Override the default seed dir (default: {})'.format(
 693             abs_join(CORPORA_DIR, 'TARGET-seed')))
 694     parser.add_argument(
 695         '--decodecorpus',
 696         type=str,
 697         default=DECODECORPUS,
 698         help="decodecorpus binary (default: $DECODECORPUS='{}')".format(
 699             DECODECORPUS))
 700     parser.add_argument(
 701         '--zstd',
 702         type=str,
 703         default=ZSTD,
 704         help="zstd binary (default: $ZSTD='{}')".format(ZSTD))
 705     parser.add_argument(
 706         '--fuzz-rng-seed-size',
 707         type=int,
 708         default=4,
 709         help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)"
 710     )
 711     parser.add_argument(
 712         'TARGET',
 713         type=str,
 714         help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
 715     args, extra = parser.parse_known_args(args)
 716     args.extra = extra
 717
 718     if args.TARGET and args.TARGET not in TARGETS:
 719         raise RuntimeError('{} is not a valid target'.format(args.TARGET))
 720
 721     if not args.seed:
 722         args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
 723
 724     if not os.path.isfile(args.decodecorpus):
 725         raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'".
 726                            format(args.decodecorpus, abs_join(FUZZ_DIR, '..')))
 727
 728     return args
 729
 730
 731 def gen(args):
 732     try:
 733         args = gen_parser(args)
 734     except Exception as e:
 735         print(e)
 736         return 1
 737
 738     seed = create(args.seed)
 739     with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict:
 740         info = TARGET_INFO[args.TARGET]
 741
 742         if info.input_type == InputType.DICTIONARY_DATA:
 743             number = max(args.number, 1000)
 744         else:
 745             number = args.number
 746         cmd = [
 747             args.decodecorpus,
 748             '-n{}'.format(args.number),
 749             '-p{}/'.format(compressed),
 750             '-o{}'.format(decompressed),
 751         ]
 752
 753         if info.frame_type == FrameType.BLOCK:
 754             cmd += [
 755                 '--gen-blocks',
 756                 '--max-block-size-log={}'.format(min(args.max_size_log, 17))
 757             ]
 758         else:
 759             cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
 760
 761         print(' '.join(cmd))
 762         subprocess.check_call(cmd)
 763
 764         if info.input_type == InputType.RAW_DATA:
 765             print('using decompressed data in {}'.format(decompressed))
 766             samples = decompressed
 767         elif info.input_type == InputType.COMPRESSED_DATA:
 768             print('using compressed data in {}'.format(compressed))
 769             samples = compressed
 770         else:
 771             assert info.input_type == InputType.DICTIONARY_DATA
 772             print('making dictionary data from {}'.format(decompressed))
 773             samples = dict
 774             min_dict_size_log = 9
 775             max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log)
 776             for dict_size_log in range(min_dict_size_log, max_dict_size_log):
 777                 dict_size = 1 << dict_size_log
 778                 cmd = [
 779                     args.zstd,
 780                     '--train',
 781                     '-r', decompressed,
 782                     '--maxdict={}'.format(dict_size),
 783                     '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size))
 784                 ]
 785                 print(' '.join(cmd))
 786                 subprocess.check_call(cmd)
 787
 788         # Copy the samples over and prepend the RNG seeds
 789         for name in os.listdir(samples):
 790             samplename = abs_join(samples, name)
 791             outname = abs_join(seed, name)
 792             with open(samplename, 'rb') as sample:
 793                 with open(outname, 'wb') as out:
 794                     CHUNK_SIZE = 131072
 795                     chunk = sample.read(CHUNK_SIZE)
 796                     while len(chunk) > 0:
 797                         out.write(chunk)
 798                         chunk = sample.read(CHUNK_SIZE)
 799     return 0
 800
 801
 802 def minimize(args):
 803     try:
 804         description = """
 805         Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
 806         TARGET_seed_corpus. All extra args are passed to libfuzzer.
 807         """
 808         args = targets_parser(args, description)
 809     except Exception as e:
 810         print(e)
 811         return 1
 812
 813     for target in args.TARGET:
 814         # Merge the corpus + anything else into the seed_corpus
 815         corpus = abs_join(CORPORA_DIR, target)
 816         seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
 817         extra_args = [corpus, "-merge=1"] + args.extra
 818         libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
 819         seeds = set(os.listdir(seed_corpus))
 820         # Copy all crashes directly into the seed_corpus if not already present
 821         crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
 822         for crash in os.listdir(crashes):
 823             if crash not in seeds:
 824                 shutil.copy(abs_join(crashes, crash), seed_corpus)
 825                 seeds.add(crash)
 826
 827
 828 def zip_cmd(args):
 829     try:
 830         description = """
 831         Zips up the seed corpus.
 832         """
 833         args = targets_parser(args, description)
 834     except Exception as e:
 835         print(e)
 836         return 1
 837
 838     for target in args.TARGET:
 839         # Zip the seed_corpus
 840         seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
 841         zip_file = "{}.zip".format(seed_corpus)
 842         cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."]
 843         print(' '.join(cmd))
 844         subprocess.check_call(cmd, cwd=seed_corpus)
 845
 846
 847 def list_cmd(args):
 848     print("\n".join(TARGETS))
 849
 850
 851 def short_help(args):
 852     name = args[0]
 853     print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
 854
 855
 856 def help(args):
 857     short_help(args)
 858     print("\tfuzzing helpers (select a command and pass -h for help)\n")
 859     print("Options:")
 860     print("\t-h, --help\tPrint this message")
 861     print("")
 862     print("Commands:")
 863     print("\tbuild\t\tBuild a fuzzer")
 864     print("\tlibfuzzer\tRun a libFuzzer fuzzer")
 865     print("\tafl\t\tRun an AFL fuzzer")
 866     print("\tregression\tRun a regression test")
 867     print("\tgen\t\tGenerate a seed corpus for a fuzzer")
 868     print("\tminimize\tMinimize the test corpora")
 869     print("\tzip\t\tZip the minimized corpora up")
 870     print("\tlist\t\tList the available targets")
 871
 872
 873 def main():
 874     args = sys.argv
 875     if len(args) < 2:
 876         help(args)
 877         return 1
 878     if args[1] == '-h' or args[1] == '--help' or args[1] == '-H':
 879         help(args)
 880         return 1
 881     command = args.pop(1)
 882     args[0] = "{} {}".format(args[0], command)
 883     if command == "build":
 884         return build(args)
 885     if command == "libfuzzer":
 886         return libfuzzer_cmd(args)
 887     if command == "regression":
 888         return regression(args)
 889     if command == "afl":
 890         return afl(args)
 891     if command == "gen":
 892         return gen(args)
 893     if command == "minimize":
 894         return minimize(args)
 895     if command == "zip":
 896         return zip_cmd(args)
 897     if command == "list":
 898         return list_cmd(args)
 899     short_help(args)
 900     print("Error: No such command {} (pass -h for help)".format(command))
 901     return 1
 902
 903
 904 if __name__ == "__main__":
 905     sys.exit(main())