| 1 | #!/usr/bin/env python |
| 2 | |
| 3 | # ################################################################ |
| 4 | # Copyright (c) Meta Platforms, Inc. and affiliates. |
| 5 | # All rights reserved. |
| 6 | # |
| 7 | # This source code is licensed under both the BSD-style license (found in the |
| 8 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 9 | # in the COPYING file in the root directory of this source tree). |
| 10 | # You may select, at your option, one of the above-listed licenses. |
| 11 | # ########################################################################## |
| 12 | |
| 13 | import argparse |
| 14 | import contextlib |
| 15 | import os |
| 16 | import re |
| 17 | import shlex |
| 18 | import shutil |
| 19 | import subprocess |
| 20 | import sys |
| 21 | import tempfile |
| 22 | |
| 23 | |
| 24 | def abs_join(a, *p): |
| 25 | return os.path.abspath(os.path.join(a, *p)) |
| 26 | |
| 27 | |
| 28 | class InputType(object): |
| 29 | RAW_DATA = 1 |
| 30 | COMPRESSED_DATA = 2 |
| 31 | DICTIONARY_DATA = 3 |
| 32 | |
| 33 | |
| 34 | class FrameType(object): |
| 35 | ZSTD = 1 |
| 36 | BLOCK = 2 |
| 37 | |
| 38 | |
| 39 | class TargetInfo(object): |
| 40 | def __init__(self, input_type, frame_type=FrameType.ZSTD): |
| 41 | self.input_type = input_type |
| 42 | self.frame_type = frame_type |
| 43 | |
| 44 | |
| 45 | # Constants |
| 46 | FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) |
| 47 | CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') |
| 48 | TARGET_INFO = { |
| 49 | 'simple_round_trip': TargetInfo(InputType.RAW_DATA), |
| 50 | 'stream_round_trip': TargetInfo(InputType.RAW_DATA), |
| 51 | 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK), |
| 52 | 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA), |
| 53 | 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA), |
| 54 | 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK), |
| 55 | 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA), |
| 56 | 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), |
| 57 | 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), |
| 58 | 'simple_compress': TargetInfo(InputType.RAW_DATA), |
| 59 | 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), |
| 60 | 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA), |
| 61 | 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), |
| 62 | 'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA), |
| 63 | 'fse_read_ncount': TargetInfo(InputType.RAW_DATA), |
| 64 | 'sequence_compression_api': TargetInfo(InputType.RAW_DATA), |
| 65 | 'seekable_roundtrip': TargetInfo(InputType.RAW_DATA), |
| 66 | 'huf_round_trip': TargetInfo(InputType.RAW_DATA), |
| 67 | 'huf_decompress': TargetInfo(InputType.RAW_DATA), |
| 68 | } |
| 69 | TARGETS = list(TARGET_INFO.keys()) |
| 70 | ALL_TARGETS = TARGETS + ['all'] |
| 71 | FUZZ_RNG_SEED_SIZE = 4 |
| 72 | |
| 73 | # Standard environment variables |
| 74 | CC = os.environ.get('CC', 'cc') |
| 75 | CXX = os.environ.get('CXX', 'c++') |
| 76 | CPPFLAGS = os.environ.get('CPPFLAGS', '') |
| 77 | CFLAGS = os.environ.get('CFLAGS', '-O3') |
| 78 | CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) |
| 79 | LDFLAGS = os.environ.get('LDFLAGS', '') |
| 80 | MFLAGS = os.environ.get('MFLAGS', '-j') |
| 81 | THIRD_PARTY_SEQ_PROD_OBJ = os.environ.get('THIRD_PARTY_SEQ_PROD_OBJ', '') |
| 82 | |
| 83 | # Fuzzing environment variables |
| 84 | LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') |
| 85 | AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') |
| 86 | DECODECORPUS = os.environ.get('DECODECORPUS', |
| 87 | abs_join(FUZZ_DIR, '..', 'decodecorpus')) |
| 88 | ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd')) |
| 89 | |
| 90 | # Sanitizer environment variables |
| 91 | MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') |
| 92 | MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') |
| 93 | MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') |
| 94 | MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') |
| 95 | |
| 96 | |
| 97 | def create(r): |
| 98 | d = os.path.abspath(r) |
| 99 | if not os.path.isdir(d): |
| 100 | os.makedirs(d) |
| 101 | return d |
| 102 | |
| 103 | |
| 104 | def check(r): |
| 105 | d = os.path.abspath(r) |
| 106 | if not os.path.isdir(d): |
| 107 | return None |
| 108 | return d |
| 109 | |
| 110 | |
| 111 | @contextlib.contextmanager |
| 112 | def tmpdir(): |
| 113 | dirpath = tempfile.mkdtemp() |
| 114 | try: |
| 115 | yield dirpath |
| 116 | finally: |
| 117 | shutil.rmtree(dirpath, ignore_errors=True) |
| 118 | |
| 119 | |
| 120 | def parse_targets(in_targets): |
| 121 | targets = set() |
| 122 | for target in in_targets: |
| 123 | if not target: |
| 124 | continue |
| 125 | if target == 'all': |
| 126 | targets = targets.union(TARGETS) |
| 127 | elif target in TARGETS: |
| 128 | targets.add(target) |
| 129 | else: |
| 130 | raise RuntimeError('{} is not a valid target'.format(target)) |
| 131 | return list(targets) |
| 132 | |
| 133 | |
| 134 | def targets_parser(args, description): |
| 135 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
| 136 | parser.add_argument( |
| 137 | 'TARGET', |
| 138 | nargs='*', |
| 139 | type=str, |
| 140 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) |
| 141 | args, extra = parser.parse_known_args(args) |
| 142 | args.extra = extra |
| 143 | |
| 144 | args.TARGET = parse_targets(args.TARGET) |
| 145 | |
| 146 | return args |
| 147 | |
| 148 | |
| 149 | def parse_env_flags(args, flags): |
| 150 | """ |
| 151 | Look for flags set by environment variables. |
| 152 | """ |
| 153 | san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) |
| 154 | nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) |
| 155 | |
| 156 | def set_sanitizer(sanitizer, default, san, nosan): |
| 157 | if sanitizer in san and sanitizer in nosan: |
| 158 | raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. |
| 159 | format(s=sanitizer)) |
| 160 | if sanitizer in san: |
| 161 | return True |
| 162 | if sanitizer in nosan: |
| 163 | return False |
| 164 | return default |
| 165 | |
| 166 | san = set(san_flags.split(',')) |
| 167 | nosan = set(nosan_flags.split(',')) |
| 168 | |
| 169 | args.asan = set_sanitizer('address', args.asan, san, nosan) |
| 170 | args.msan = set_sanitizer('memory', args.msan, san, nosan) |
| 171 | args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) |
| 172 | |
| 173 | args.sanitize = args.asan or args.msan or args.ubsan |
| 174 | |
| 175 | return args |
| 176 | |
| 177 | |
| 178 | def compiler_version(cc, cxx): |
| 179 | """ |
| 180 | Determines the compiler and version. |
| 181 | Only works for clang and gcc. |
| 182 | """ |
| 183 | cc_version_bytes = subprocess.check_output([cc, "--version"]) |
| 184 | cxx_version_bytes = subprocess.check_output([cxx, "--version"]) |
| 185 | compiler = None |
| 186 | version = None |
| 187 | print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii'))) |
| 188 | if b'clang' in cc_version_bytes: |
| 189 | assert(b'clang' in cxx_version_bytes) |
| 190 | compiler = 'clang' |
| 191 | elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes: |
| 192 | assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) |
| 193 | compiler = 'gcc' |
| 194 | if compiler is not None: |
| 195 | version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)' |
| 196 | version_match = re.search(version_regex, cc_version_bytes) |
| 197 | version = tuple(int(version_match.group(i)) for i in range(1, 4)) |
| 198 | return compiler, version |
| 199 | |
| 200 | |
| 201 | def overflow_ubsan_flags(cc, cxx): |
| 202 | compiler, version = compiler_version(cc, cxx) |
| 203 | if compiler == 'gcc' and version < (8, 0, 0): |
| 204 | return ['-fno-sanitize=signed-integer-overflow'] |
| 205 | if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)): |
| 206 | return ['-fno-sanitize=pointer-overflow'] |
| 207 | return [] |
| 208 | |
| 209 | |
| 210 | def build_parser(args): |
| 211 | description = """ |
| 212 | Cleans the repository and builds a fuzz target (or all). |
| 213 | Many flags default to environment variables (default says $X='y'). |
| 214 | Options that aren't enabling features default to the correct values for |
| 215 | zstd. |
| 216 | Enable sanitizers with --enable-*san. |
| 217 | For regression testing just build. |
| 218 | For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. |
| 219 | For AFL set CC and CXX to AFL's compilers and set |
| 220 | LIB_FUZZING_ENGINE='libregression.a'. |
| 221 | """ |
| 222 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
| 223 | parser.add_argument( |
| 224 | '--lib-fuzzing-engine', |
| 225 | dest='lib_fuzzing_engine', |
| 226 | type=str, |
| 227 | default=LIB_FUZZING_ENGINE, |
| 228 | help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' |
| 229 | "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) |
| 230 | |
| 231 | fuzz_group = parser.add_mutually_exclusive_group() |
| 232 | fuzz_group.add_argument( |
| 233 | '--enable-coverage', |
| 234 | dest='coverage', |
| 235 | action='store_true', |
| 236 | help='Enable coverage instrumentation (-fsanitize-coverage)') |
| 237 | fuzz_group.add_argument( |
| 238 | '--enable-fuzzer', |
| 239 | dest='fuzzer', |
| 240 | action='store_true', |
| 241 | help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled ' |
| 242 | 'LIB_FUZZING_ENGINE is ignored') |
| 243 | ) |
| 244 | |
| 245 | parser.add_argument( |
| 246 | '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') |
| 247 | parser.add_argument( |
| 248 | '--enable-ubsan', |
| 249 | dest='ubsan', |
| 250 | action='store_true', |
| 251 | help='Enable UBSAN') |
| 252 | parser.add_argument( |
| 253 | '--enable-ubsan-pointer-overflow', |
| 254 | dest='ubsan_pointer_overflow', |
| 255 | action='store_true', |
| 256 | help='Enable UBSAN pointer overflow check (known failure)') |
| 257 | parser.add_argument( |
| 258 | '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') |
| 259 | parser.add_argument( |
| 260 | '--enable-msan-track-origins', dest='msan_track_origins', |
| 261 | action='store_true', help='Enable MSAN origin tracking') |
| 262 | parser.add_argument( |
| 263 | '--msan-extra-cppflags', |
| 264 | dest='msan_extra_cppflags', |
| 265 | type=str, |
| 266 | default=MSAN_EXTRA_CPPFLAGS, |
| 267 | help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". |
| 268 | format(MSAN_EXTRA_CPPFLAGS)) |
| 269 | parser.add_argument( |
| 270 | '--msan-extra-cflags', |
| 271 | dest='msan_extra_cflags', |
| 272 | type=str, |
| 273 | default=MSAN_EXTRA_CFLAGS, |
| 274 | help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( |
| 275 | MSAN_EXTRA_CFLAGS)) |
| 276 | parser.add_argument( |
| 277 | '--msan-extra-cxxflags', |
| 278 | dest='msan_extra_cxxflags', |
| 279 | type=str, |
| 280 | default=MSAN_EXTRA_CXXFLAGS, |
| 281 | help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". |
| 282 | format(MSAN_EXTRA_CXXFLAGS)) |
| 283 | parser.add_argument( |
| 284 | '--msan-extra-ldflags', |
| 285 | dest='msan_extra_ldflags', |
| 286 | type=str, |
| 287 | default=MSAN_EXTRA_LDFLAGS, |
| 288 | help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". |
| 289 | format(MSAN_EXTRA_LDFLAGS)) |
| 290 | parser.add_argument( |
| 291 | '--enable-sanitize-recover', |
| 292 | dest='sanitize_recover', |
| 293 | action='store_true', |
| 294 | help='Non-fatal sanitizer errors where possible') |
| 295 | parser.add_argument( |
| 296 | '--debug', |
| 297 | dest='debug', |
| 298 | type=int, |
| 299 | default=1, |
| 300 | help='Set DEBUGLEVEL (default: 1)') |
| 301 | parser.add_argument( |
| 302 | '--force-memory-access', |
| 303 | dest='memory_access', |
| 304 | type=int, |
| 305 | default=0, |
| 306 | help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') |
| 307 | parser.add_argument( |
| 308 | '--fuzz-rng-seed-size', |
| 309 | dest='fuzz_rng_seed_size', |
| 310 | type=int, |
| 311 | default=4, |
| 312 | help='Set FUZZ_RNG_SEED_SIZE (default: 4)') |
| 313 | parser.add_argument( |
| 314 | '--disable-fuzzing-mode', |
| 315 | dest='fuzzing_mode', |
| 316 | action='store_false', |
| 317 | help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION') |
| 318 | parser.add_argument( |
| 319 | '--enable-stateful-fuzzing', |
| 320 | dest='stateful_fuzzing', |
| 321 | action='store_true', |
| 322 | help='Reuse contexts between runs (makes reproduction impossible)') |
| 323 | parser.add_argument( |
| 324 | '--custom-seq-prod', |
| 325 | dest='third_party_seq_prod_obj', |
| 326 | type=str, |
| 327 | default=THIRD_PARTY_SEQ_PROD_OBJ, |
| 328 | help='Path to an object file with symbols for fuzzing your sequence producer plugin.') |
| 329 | parser.add_argument( |
| 330 | '--cc', |
| 331 | dest='cc', |
| 332 | type=str, |
| 333 | default=CC, |
| 334 | help="CC (default: $CC='{}')".format(CC)) |
| 335 | parser.add_argument( |
| 336 | '--cxx', |
| 337 | dest='cxx', |
| 338 | type=str, |
| 339 | default=CXX, |
| 340 | help="CXX (default: $CXX='{}')".format(CXX)) |
| 341 | parser.add_argument( |
| 342 | '--cppflags', |
| 343 | dest='cppflags', |
| 344 | type=str, |
| 345 | default=CPPFLAGS, |
| 346 | help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) |
| 347 | parser.add_argument( |
| 348 | '--cflags', |
| 349 | dest='cflags', |
| 350 | type=str, |
| 351 | default=CFLAGS, |
| 352 | help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) |
| 353 | parser.add_argument( |
| 354 | '--cxxflags', |
| 355 | dest='cxxflags', |
| 356 | type=str, |
| 357 | default=CXXFLAGS, |
| 358 | help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) |
| 359 | parser.add_argument( |
| 360 | '--ldflags', |
| 361 | dest='ldflags', |
| 362 | type=str, |
| 363 | default=LDFLAGS, |
| 364 | help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) |
| 365 | parser.add_argument( |
| 366 | '--mflags', |
| 367 | dest='mflags', |
| 368 | type=str, |
| 369 | default=MFLAGS, |
| 370 | help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) |
| 371 | parser.add_argument( |
| 372 | 'TARGET', |
| 373 | nargs='*', |
| 374 | type=str, |
| 375 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) |
| 376 | ) |
| 377 | args = parser.parse_args(args) |
| 378 | args = parse_env_flags(args, ' '.join( |
| 379 | [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) |
| 380 | |
| 381 | # Check option sanity |
| 382 | if args.msan and (args.asan or args.ubsan): |
| 383 | raise RuntimeError('MSAN may not be used with any other sanitizers') |
| 384 | if args.msan_track_origins and not args.msan: |
| 385 | raise RuntimeError('--enable-msan-track-origins requires MSAN') |
| 386 | if args.ubsan_pointer_overflow and not args.ubsan: |
| 387 | raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN') |
| 388 | if args.sanitize_recover and not args.sanitize: |
| 389 | raise RuntimeError('--enable-sanitize-recover but no sanitizers used') |
| 390 | |
| 391 | return args |
| 392 | |
| 393 | |
| 394 | def build(args): |
| 395 | try: |
| 396 | args = build_parser(args) |
| 397 | except Exception as e: |
| 398 | print(e) |
| 399 | return 1 |
| 400 | # The compilation flags we are setting |
| 401 | targets = args.TARGET |
| 402 | cc = args.cc |
| 403 | cxx = args.cxx |
| 404 | cppflags = shlex.split(args.cppflags) |
| 405 | cflags = shlex.split(args.cflags) |
| 406 | ldflags = shlex.split(args.ldflags) |
| 407 | cxxflags = shlex.split(args.cxxflags) |
| 408 | mflags = shlex.split(args.mflags) |
| 409 | # Flags to be added to both cflags and cxxflags |
| 410 | common_flags = [] |
| 411 | |
| 412 | cppflags += [ |
| 413 | '-DDEBUGLEVEL={}'.format(args.debug), |
| 414 | '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), |
| 415 | '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), |
| 416 | ] |
| 417 | |
| 418 | # Set flags for options |
| 419 | assert not (args.fuzzer and args.coverage) |
| 420 | if args.coverage: |
| 421 | common_flags += [ |
| 422 | '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' |
| 423 | ] |
| 424 | if args.fuzzer: |
| 425 | common_flags += ['-fsanitize=fuzzer'] |
| 426 | args.lib_fuzzing_engine = '' |
| 427 | |
| 428 | mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] |
| 429 | |
| 430 | if args.sanitize_recover: |
| 431 | recover_flags = ['-fsanitize-recover=all'] |
| 432 | else: |
| 433 | recover_flags = ['-fno-sanitize-recover=all'] |
| 434 | if args.sanitize: |
| 435 | common_flags += recover_flags |
| 436 | |
| 437 | if args.msan: |
| 438 | msan_flags = ['-fsanitize=memory'] |
| 439 | if args.msan_track_origins: |
| 440 | msan_flags += ['-fsanitize-memory-track-origins'] |
| 441 | common_flags += msan_flags |
| 442 | # Append extra MSAN flags (it might require special setup) |
| 443 | cppflags += [args.msan_extra_cppflags] |
| 444 | cflags += [args.msan_extra_cflags] |
| 445 | cxxflags += [args.msan_extra_cxxflags] |
| 446 | ldflags += [args.msan_extra_ldflags] |
| 447 | |
| 448 | if args.asan: |
| 449 | common_flags += ['-fsanitize=address'] |
| 450 | |
| 451 | if args.ubsan: |
| 452 | ubsan_flags = ['-fsanitize=undefined'] |
| 453 | if not args.ubsan_pointer_overflow: |
| 454 | ubsan_flags += overflow_ubsan_flags(cc, cxx) |
| 455 | common_flags += ubsan_flags |
| 456 | |
| 457 | if args.stateful_fuzzing: |
| 458 | cppflags += ['-DSTATEFUL_FUZZING'] |
| 459 | |
| 460 | if args.third_party_seq_prod_obj: |
| 461 | cppflags += ['-DFUZZ_THIRD_PARTY_SEQ_PROD'] |
| 462 | mflags += ['THIRD_PARTY_SEQ_PROD_OBJ={}'.format(args.third_party_seq_prod_obj)] |
| 463 | |
| 464 | if args.fuzzing_mode: |
| 465 | cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION'] |
| 466 | |
| 467 | if args.lib_fuzzing_engine == 'libregression.a': |
| 468 | targets = ['libregression.a'] + targets |
| 469 | |
| 470 | # Append the common flags |
| 471 | cflags += common_flags |
| 472 | cxxflags += common_flags |
| 473 | |
| 474 | # Prepare the flags for Make |
| 475 | cc_str = "CC={}".format(cc) |
| 476 | cxx_str = "CXX={}".format(cxx) |
| 477 | cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) |
| 478 | cflags_str = "CFLAGS={}".format(' '.join(cflags)) |
| 479 | cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) |
| 480 | ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) |
| 481 | |
| 482 | # Print the flags |
| 483 | print('MFLAGS={}'.format(' '.join(mflags))) |
| 484 | print(cc_str) |
| 485 | print(cxx_str) |
| 486 | print(cppflags_str) |
| 487 | print(cflags_str) |
| 488 | print(cxxflags_str) |
| 489 | print(ldflags_str) |
| 490 | |
| 491 | # Clean and build |
| 492 | clean_cmd = ['make', 'clean'] + mflags |
| 493 | print(' '.join(clean_cmd)) |
| 494 | subprocess.check_call(clean_cmd) |
| 495 | build_cmd = [ |
| 496 | 'make', |
| 497 | cc_str, |
| 498 | cxx_str, |
| 499 | cppflags_str, |
| 500 | cflags_str, |
| 501 | cxxflags_str, |
| 502 | ldflags_str, |
| 503 | ] + mflags + targets |
| 504 | print(' '.join(build_cmd)) |
| 505 | subprocess.check_call(build_cmd) |
| 506 | return 0 |
| 507 | |
| 508 | |
| 509 | def libfuzzer_parser(args): |
| 510 | description = """ |
| 511 | Runs a libfuzzer binary. |
| 512 | Passes all extra arguments to libfuzzer. |
| 513 | The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to |
| 514 | libFuzzer.a. |
| 515 | Generates output in the CORPORA directory, puts crashes in the ARTIFACT |
| 516 | directory, and takes extra input from the SEED directory. |
| 517 | To merge AFL's output pass the SEED as AFL's output directory and pass |
| 518 | '-merge=1'. |
| 519 | """ |
| 520 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
| 521 | parser.add_argument( |
| 522 | '--corpora', |
| 523 | type=str, |
| 524 | help='Override the default corpora dir (default: {})'.format( |
| 525 | abs_join(CORPORA_DIR, 'TARGET'))) |
| 526 | parser.add_argument( |
| 527 | '--artifact', |
| 528 | type=str, |
| 529 | help='Override the default artifact dir (default: {})'.format( |
| 530 | abs_join(CORPORA_DIR, 'TARGET-crash'))) |
| 531 | parser.add_argument( |
| 532 | '--seed', |
| 533 | type=str, |
| 534 | help='Override the default seed dir (default: {})'.format( |
| 535 | abs_join(CORPORA_DIR, 'TARGET-seed'))) |
| 536 | parser.add_argument( |
| 537 | 'TARGET', |
| 538 | type=str, |
| 539 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) |
| 540 | args, extra = parser.parse_known_args(args) |
| 541 | args.extra = extra |
| 542 | |
| 543 | if args.TARGET and args.TARGET not in TARGETS: |
| 544 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) |
| 545 | |
| 546 | return args |
| 547 | |
| 548 | |
| 549 | def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): |
| 550 | if corpora is None: |
| 551 | corpora = abs_join(CORPORA_DIR, target) |
| 552 | if artifact is None: |
| 553 | artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) |
| 554 | if seed is None: |
| 555 | seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) |
| 556 | if extra_args is None: |
| 557 | extra_args = [] |
| 558 | |
| 559 | target = abs_join(FUZZ_DIR, target) |
| 560 | |
| 561 | corpora = [create(corpora)] |
| 562 | artifact = create(artifact) |
| 563 | seed = check(seed) |
| 564 | |
| 565 | corpora += [artifact] |
| 566 | if seed is not None: |
| 567 | corpora += [seed] |
| 568 | |
| 569 | cmd = [target, '-artifact_prefix={}/'.format(artifact)] |
| 570 | cmd += corpora + extra_args |
| 571 | print(' '.join(cmd)) |
| 572 | subprocess.check_call(cmd) |
| 573 | |
| 574 | |
| 575 | def libfuzzer_cmd(args): |
| 576 | try: |
| 577 | args = libfuzzer_parser(args) |
| 578 | except Exception as e: |
| 579 | print(e) |
| 580 | return 1 |
| 581 | libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) |
| 582 | return 0 |
| 583 | |
| 584 | |
| 585 | def afl_parser(args): |
| 586 | description = """ |
| 587 | Runs an afl-fuzz job. |
| 588 | Passes all extra arguments to afl-fuzz. |
| 589 | The fuzzer should have been built with CC/CXX set to the AFL compilers, |
| 590 | and with LIB_FUZZING_ENGINE='libregression.a'. |
| 591 | Takes input from CORPORA and writes output to OUTPUT. |
| 592 | Uses AFL_FUZZ as the binary (set from flag or environment variable). |
| 593 | """ |
| 594 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
| 595 | parser.add_argument( |
| 596 | '--corpora', |
| 597 | type=str, |
| 598 | help='Override the default corpora dir (default: {})'.format( |
| 599 | abs_join(CORPORA_DIR, 'TARGET'))) |
| 600 | parser.add_argument( |
| 601 | '--output', |
| 602 | type=str, |
| 603 | help='Override the default AFL output dir (default: {})'.format( |
| 604 | abs_join(CORPORA_DIR, 'TARGET-afl'))) |
| 605 | parser.add_argument( |
| 606 | '--afl-fuzz', |
| 607 | type=str, |
| 608 | default=AFL_FUZZ, |
| 609 | help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) |
| 610 | parser.add_argument( |
| 611 | 'TARGET', |
| 612 | type=str, |
| 613 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) |
| 614 | args, extra = parser.parse_known_args(args) |
| 615 | args.extra = extra |
| 616 | |
| 617 | if args.TARGET and args.TARGET not in TARGETS: |
| 618 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) |
| 619 | |
| 620 | if not args.corpora: |
| 621 | args.corpora = abs_join(CORPORA_DIR, args.TARGET) |
| 622 | if not args.output: |
| 623 | args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) |
| 624 | |
| 625 | return args |
| 626 | |
| 627 | |
| 628 | def afl(args): |
| 629 | try: |
| 630 | args = afl_parser(args) |
| 631 | except Exception as e: |
| 632 | print(e) |
| 633 | return 1 |
| 634 | target = abs_join(FUZZ_DIR, args.TARGET) |
| 635 | |
| 636 | corpora = create(args.corpora) |
| 637 | output = create(args.output) |
| 638 | |
| 639 | cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra |
| 640 | cmd += [target, '@@'] |
| 641 | print(' '.join(cmd)) |
| 642 | subprocess.call(cmd) |
| 643 | return 0 |
| 644 | |
| 645 | |
| 646 | def regression(args): |
| 647 | try: |
| 648 | description = """ |
| 649 | Runs one or more regression tests. |
| 650 | The fuzzer should have been built with |
| 651 | LIB_FUZZING_ENGINE='libregression.a'. |
| 652 | Takes input from CORPORA. |
| 653 | """ |
| 654 | args = targets_parser(args, description) |
| 655 | except Exception as e: |
| 656 | print(e) |
| 657 | return 1 |
| 658 | for target in args.TARGET: |
| 659 | corpora = create(abs_join(CORPORA_DIR, target)) |
| 660 | target = abs_join(FUZZ_DIR, target) |
| 661 | cmd = [target, corpora] |
| 662 | print(' '.join(cmd)) |
| 663 | subprocess.check_call(cmd) |
| 664 | return 0 |
| 665 | |
| 666 | |
| 667 | def gen_parser(args): |
| 668 | description = """ |
| 669 | Generate a seed corpus appropriate for TARGET with data generated with |
| 670 | decodecorpus. |
| 671 | The fuzz inputs are prepended with a seed before the zstd data, so the |
| 672 | output of decodecorpus shouldn't be used directly. |
| 673 | Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and |
| 674 | puts the output in SEED. |
| 675 | DECODECORPUS is the decodecorpus binary, and must already be built. |
| 676 | """ |
| 677 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
| 678 | parser.add_argument( |
| 679 | '--number', |
| 680 | '-n', |
| 681 | type=int, |
| 682 | default=100, |
| 683 | help='Number of samples to generate') |
| 684 | parser.add_argument( |
| 685 | '--max-size-log', |
| 686 | type=int, |
| 687 | default=18, |
| 688 | help='Maximum sample size to generate') |
| 689 | parser.add_argument( |
| 690 | '--seed', |
| 691 | type=str, |
| 692 | help='Override the default seed dir (default: {})'.format( |
| 693 | abs_join(CORPORA_DIR, 'TARGET-seed'))) |
| 694 | parser.add_argument( |
| 695 | '--decodecorpus', |
| 696 | type=str, |
| 697 | default=DECODECORPUS, |
| 698 | help="decodecorpus binary (default: $DECODECORPUS='{}')".format( |
| 699 | DECODECORPUS)) |
| 700 | parser.add_argument( |
| 701 | '--zstd', |
| 702 | type=str, |
| 703 | default=ZSTD, |
| 704 | help="zstd binary (default: $ZSTD='{}')".format(ZSTD)) |
| 705 | parser.add_argument( |
| 706 | '--fuzz-rng-seed-size', |
| 707 | type=int, |
| 708 | default=4, |
| 709 | help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" |
| 710 | ) |
| 711 | parser.add_argument( |
| 712 | 'TARGET', |
| 713 | type=str, |
| 714 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) |
| 715 | args, extra = parser.parse_known_args(args) |
| 716 | args.extra = extra |
| 717 | |
| 718 | if args.TARGET and args.TARGET not in TARGETS: |
| 719 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) |
| 720 | |
| 721 | if not args.seed: |
| 722 | args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) |
| 723 | |
| 724 | if not os.path.isfile(args.decodecorpus): |
| 725 | raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". |
| 726 | format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) |
| 727 | |
| 728 | return args |
| 729 | |
| 730 | |
| 731 | def gen(args): |
| 732 | try: |
| 733 | args = gen_parser(args) |
| 734 | except Exception as e: |
| 735 | print(e) |
| 736 | return 1 |
| 737 | |
| 738 | seed = create(args.seed) |
| 739 | with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict: |
| 740 | info = TARGET_INFO[args.TARGET] |
| 741 | |
| 742 | if info.input_type == InputType.DICTIONARY_DATA: |
| 743 | number = max(args.number, 1000) |
| 744 | else: |
| 745 | number = args.number |
| 746 | cmd = [ |
| 747 | args.decodecorpus, |
| 748 | '-n{}'.format(args.number), |
| 749 | '-p{}/'.format(compressed), |
| 750 | '-o{}'.format(decompressed), |
| 751 | ] |
| 752 | |
| 753 | if info.frame_type == FrameType.BLOCK: |
| 754 | cmd += [ |
| 755 | '--gen-blocks', |
| 756 | '--max-block-size-log={}'.format(min(args.max_size_log, 17)) |
| 757 | ] |
| 758 | else: |
| 759 | cmd += ['--max-content-size-log={}'.format(args.max_size_log)] |
| 760 | |
| 761 | print(' '.join(cmd)) |
| 762 | subprocess.check_call(cmd) |
| 763 | |
| 764 | if info.input_type == InputType.RAW_DATA: |
| 765 | print('using decompressed data in {}'.format(decompressed)) |
| 766 | samples = decompressed |
| 767 | elif info.input_type == InputType.COMPRESSED_DATA: |
| 768 | print('using compressed data in {}'.format(compressed)) |
| 769 | samples = compressed |
| 770 | else: |
| 771 | assert info.input_type == InputType.DICTIONARY_DATA |
| 772 | print('making dictionary data from {}'.format(decompressed)) |
| 773 | samples = dict |
| 774 | min_dict_size_log = 9 |
| 775 | max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log) |
| 776 | for dict_size_log in range(min_dict_size_log, max_dict_size_log): |
| 777 | dict_size = 1 << dict_size_log |
| 778 | cmd = [ |
| 779 | args.zstd, |
| 780 | '--train', |
| 781 | '-r', decompressed, |
| 782 | '--maxdict={}'.format(dict_size), |
| 783 | '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size)) |
| 784 | ] |
| 785 | print(' '.join(cmd)) |
| 786 | subprocess.check_call(cmd) |
| 787 | |
| 788 | # Copy the samples over and prepend the RNG seeds |
| 789 | for name in os.listdir(samples): |
| 790 | samplename = abs_join(samples, name) |
| 791 | outname = abs_join(seed, name) |
| 792 | with open(samplename, 'rb') as sample: |
| 793 | with open(outname, 'wb') as out: |
| 794 | CHUNK_SIZE = 131072 |
| 795 | chunk = sample.read(CHUNK_SIZE) |
| 796 | while len(chunk) > 0: |
| 797 | out.write(chunk) |
| 798 | chunk = sample.read(CHUNK_SIZE) |
| 799 | return 0 |
| 800 | |
| 801 | |
| 802 | def minimize(args): |
| 803 | try: |
| 804 | description = """ |
| 805 | Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in |
| 806 | TARGET_seed_corpus. All extra args are passed to libfuzzer. |
| 807 | """ |
| 808 | args = targets_parser(args, description) |
| 809 | except Exception as e: |
| 810 | print(e) |
| 811 | return 1 |
| 812 | |
| 813 | for target in args.TARGET: |
| 814 | # Merge the corpus + anything else into the seed_corpus |
| 815 | corpus = abs_join(CORPORA_DIR, target) |
| 816 | seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) |
| 817 | extra_args = [corpus, "-merge=1"] + args.extra |
| 818 | libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) |
| 819 | seeds = set(os.listdir(seed_corpus)) |
| 820 | # Copy all crashes directly into the seed_corpus if not already present |
| 821 | crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) |
| 822 | for crash in os.listdir(crashes): |
| 823 | if crash not in seeds: |
| 824 | shutil.copy(abs_join(crashes, crash), seed_corpus) |
| 825 | seeds.add(crash) |
| 826 | |
| 827 | |
| 828 | def zip_cmd(args): |
| 829 | try: |
| 830 | description = """ |
| 831 | Zips up the seed corpus. |
| 832 | """ |
| 833 | args = targets_parser(args, description) |
| 834 | except Exception as e: |
| 835 | print(e) |
| 836 | return 1 |
| 837 | |
| 838 | for target in args.TARGET: |
| 839 | # Zip the seed_corpus |
| 840 | seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) |
| 841 | zip_file = "{}.zip".format(seed_corpus) |
| 842 | cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."] |
| 843 | print(' '.join(cmd)) |
| 844 | subprocess.check_call(cmd, cwd=seed_corpus) |
| 845 | |
| 846 | |
| 847 | def list_cmd(args): |
| 848 | print("\n".join(TARGETS)) |
| 849 | |
| 850 | |
| 851 | def short_help(args): |
| 852 | name = args[0] |
| 853 | print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) |
| 854 | |
| 855 | |
| 856 | def help(args): |
| 857 | short_help(args) |
| 858 | print("\tfuzzing helpers (select a command and pass -h for help)\n") |
| 859 | print("Options:") |
| 860 | print("\t-h, --help\tPrint this message") |
| 861 | print("") |
| 862 | print("Commands:") |
| 863 | print("\tbuild\t\tBuild a fuzzer") |
| 864 | print("\tlibfuzzer\tRun a libFuzzer fuzzer") |
| 865 | print("\tafl\t\tRun an AFL fuzzer") |
| 866 | print("\tregression\tRun a regression test") |
| 867 | print("\tgen\t\tGenerate a seed corpus for a fuzzer") |
| 868 | print("\tminimize\tMinimize the test corpora") |
| 869 | print("\tzip\t\tZip the minimized corpora up") |
| 870 | print("\tlist\t\tList the available targets") |
| 871 | |
| 872 | |
| 873 | def main(): |
| 874 | args = sys.argv |
| 875 | if len(args) < 2: |
| 876 | help(args) |
| 877 | return 1 |
| 878 | if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': |
| 879 | help(args) |
| 880 | return 1 |
| 881 | command = args.pop(1) |
| 882 | args[0] = "{} {}".format(args[0], command) |
| 883 | if command == "build": |
| 884 | return build(args) |
| 885 | if command == "libfuzzer": |
| 886 | return libfuzzer_cmd(args) |
| 887 | if command == "regression": |
| 888 | return regression(args) |
| 889 | if command == "afl": |
| 890 | return afl(args) |
| 891 | if command == "gen": |
| 892 | return gen(args) |
| 893 | if command == "minimize": |
| 894 | return minimize(args) |
| 895 | if command == "zip": |
| 896 | return zip_cmd(args) |
| 897 | if command == "list": |
| 898 | return list_cmd(args) |
| 899 | short_help(args) |
| 900 | print("Error: No such command {} (pass -h for help)".format(command)) |
| 901 | return 1 |
| 902 | |
| 903 | |
| 904 | if __name__ == "__main__": |
| 905 | sys.exit(main()) |