| 1 | #!/usr/bin/env python |
| 2 | |
| 3 | # ################################################################ |
| 4 | # Copyright (c) Meta Platforms, Inc. and affiliates. |
| 5 | # All rights reserved. |
| 6 | # |
| 7 | # This source code is licensed under both the BSD-style license (found in the |
| 8 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 9 | # in the COPYING file in the root directory of this source tree). |
| 10 | # You may select, at your option, one of the above-listed licenses. |
| 11 | # ########################################################################## |
| 12 | |
| 13 | import argparse |
| 14 | import contextlib |
| 15 | import os |
| 16 | import re |
| 17 | import shlex |
| 18 | import shutil |
| 19 | import subprocess |
| 20 | import sys |
| 21 | import tempfile |
| 22 | |
| 23 | |
| 24 | def abs_join(a, *p): |
| 25 | return os.path.abspath(os.path.join(a, *p)) |
| 26 | |
| 27 | |
| 28 | class InputType(object): |
| 29 | RAW_DATA = 1 |
| 30 | COMPRESSED_DATA = 2 |
| 31 | DICTIONARY_DATA = 3 |
| 32 | |
| 33 | |
| 34 | class FrameType(object): |
| 35 | ZSTD = 1 |
| 36 | BLOCK = 2 |
| 37 | |
| 38 | |
| 39 | class TargetInfo(object): |
| 40 | def __init__(self, input_type, frame_type=FrameType.ZSTD): |
| 41 | self.input_type = input_type |
| 42 | self.frame_type = frame_type |
| 43 | |
| 44 | |
| 45 | # Constants |
| 46 | FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) |
| 47 | CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') |
| 48 | TARGET_INFO = { |
| 49 | 'simple_round_trip': TargetInfo(InputType.RAW_DATA), |
| 50 | 'stream_round_trip': TargetInfo(InputType.RAW_DATA), |
| 51 | 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK), |
| 52 | 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA), |
| 53 | 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA), |
| 54 | 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK), |
| 55 | 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA), |
| 56 | 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), |
| 57 | 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), |
| 58 | 'simple_compress': TargetInfo(InputType.RAW_DATA), |
| 59 | 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), |
| 60 | 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA), |
| 61 | 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), |
| 62 | 'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA), |
| 63 | 'fse_read_ncount': TargetInfo(InputType.RAW_DATA), |
| 64 | 'sequence_compression_api': TargetInfo(InputType.RAW_DATA), |
| 65 | 'seekable_roundtrip': TargetInfo(InputType.RAW_DATA), |
| 66 | 'huf_round_trip': TargetInfo(InputType.RAW_DATA), |
| 67 | 'huf_decompress': TargetInfo(InputType.RAW_DATA), |
| 68 | 'decompress_cross_format': TargetInfo(InputType.RAW_DATA), |
| 69 | 'generate_sequences': TargetInfo(InputType.RAW_DATA), |
| 70 | } |
| 71 | TARGETS = list(TARGET_INFO.keys()) |
| 72 | ALL_TARGETS = TARGETS + ['all'] |
| 73 | FUZZ_RNG_SEED_SIZE = 4 |
| 74 | |
| 75 | # Standard environment variables |
| 76 | CC = os.environ.get('CC', 'cc') |
| 77 | CXX = os.environ.get('CXX', 'c++') |
| 78 | CPPFLAGS = os.environ.get('CPPFLAGS', '') |
| 79 | CFLAGS = os.environ.get('CFLAGS', '-O3') |
| 80 | CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) |
| 81 | LDFLAGS = os.environ.get('LDFLAGS', '') |
| 82 | MFLAGS = os.environ.get('MFLAGS', '-j') |
| 83 | THIRD_PARTY_SEQ_PROD_OBJ = os.environ.get('THIRD_PARTY_SEQ_PROD_OBJ', '') |
| 84 | |
| 85 | # Fuzzing environment variables |
| 86 | LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') |
| 87 | AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') |
| 88 | DECODECORPUS = os.environ.get('DECODECORPUS', |
| 89 | abs_join(FUZZ_DIR, '..', 'decodecorpus')) |
| 90 | ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd')) |
| 91 | |
| 92 | # Sanitizer environment variables |
| 93 | MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') |
| 94 | MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') |
| 95 | MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') |
| 96 | MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') |
| 97 | |
| 98 | |
| 99 | def create(r): |
| 100 | d = os.path.abspath(r) |
| 101 | if not os.path.isdir(d): |
| 102 | os.makedirs(d) |
| 103 | return d |
| 104 | |
| 105 | |
| 106 | def check(r): |
| 107 | d = os.path.abspath(r) |
| 108 | if not os.path.isdir(d): |
| 109 | return None |
| 110 | return d |
| 111 | |
| 112 | |
| 113 | @contextlib.contextmanager |
| 114 | def tmpdir(): |
| 115 | dirpath = tempfile.mkdtemp() |
| 116 | try: |
| 117 | yield dirpath |
| 118 | finally: |
| 119 | shutil.rmtree(dirpath, ignore_errors=True) |
| 120 | |
| 121 | |
| 122 | def parse_targets(in_targets): |
| 123 | targets = set() |
| 124 | for target in in_targets: |
| 125 | if not target: |
| 126 | continue |
| 127 | if target == 'all': |
| 128 | targets = targets.union(TARGETS) |
| 129 | elif target in TARGETS: |
| 130 | targets.add(target) |
| 131 | else: |
| 132 | raise RuntimeError('{} is not a valid target'.format(target)) |
| 133 | return list(targets) |
| 134 | |
| 135 | |
| 136 | def targets_parser(args, description): |
| 137 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
| 138 | parser.add_argument( |
| 139 | 'TARGET', |
| 140 | nargs='*', |
| 141 | type=str, |
| 142 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) |
| 143 | args, extra = parser.parse_known_args(args) |
| 144 | args.extra = extra |
| 145 | |
| 146 | args.TARGET = parse_targets(args.TARGET) |
| 147 | |
| 148 | return args |
| 149 | |
| 150 | |
| 151 | def parse_env_flags(args, flags): |
| 152 | """ |
| 153 | Look for flags set by environment variables. |
| 154 | """ |
| 155 | san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) |
| 156 | nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) |
| 157 | |
| 158 | def set_sanitizer(sanitizer, default, san, nosan): |
| 159 | if sanitizer in san and sanitizer in nosan: |
| 160 | raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. |
| 161 | format(s=sanitizer)) |
| 162 | if sanitizer in san: |
| 163 | return True |
| 164 | if sanitizer in nosan: |
| 165 | return False |
| 166 | return default |
| 167 | |
| 168 | san = set(san_flags.split(',')) |
| 169 | nosan = set(nosan_flags.split(',')) |
| 170 | |
| 171 | args.asan = set_sanitizer('address', args.asan, san, nosan) |
| 172 | args.msan = set_sanitizer('memory', args.msan, san, nosan) |
| 173 | args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) |
| 174 | |
| 175 | args.sanitize = args.asan or args.msan or args.ubsan |
| 176 | |
| 177 | return args |
| 178 | |
| 179 | |
| 180 | def compiler_version(cc, cxx): |
| 181 | """ |
| 182 | Determines the compiler and version. |
| 183 | Only works for clang and gcc. |
| 184 | """ |
| 185 | cc_version_bytes = subprocess.check_output([cc, "--version"]) |
| 186 | cxx_version_bytes = subprocess.check_output([cxx, "--version"]) |
| 187 | compiler = None |
| 188 | version = None |
| 189 | print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii'))) |
| 190 | if b'clang' in cc_version_bytes: |
| 191 | assert(b'clang' in cxx_version_bytes) |
| 192 | compiler = 'clang' |
| 193 | elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes: |
| 194 | assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) |
| 195 | compiler = 'gcc' |
| 196 | if compiler is not None: |
| 197 | version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)' |
| 198 | version_match = re.search(version_regex, cc_version_bytes) |
| 199 | version = tuple(int(version_match.group(i)) for i in range(1, 4)) |
| 200 | return compiler, version |
| 201 | |
| 202 | |
| 203 | def overflow_ubsan_flags(cc, cxx): |
| 204 | compiler, version = compiler_version(cc, cxx) |
| 205 | if compiler == 'gcc' and version < (8, 0, 0): |
| 206 | return ['-fno-sanitize=signed-integer-overflow'] |
| 207 | if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)): |
| 208 | return ['-fno-sanitize=pointer-overflow'] |
| 209 | return [] |
| 210 | |
| 211 | |
| 212 | def build_parser(args): |
| 213 | description = """ |
| 214 | Cleans the repository and builds a fuzz target (or all). |
| 215 | Many flags default to environment variables (default says $X='y'). |
| 216 | Options that aren't enabling features default to the correct values for |
| 217 | zstd. |
| 218 | Enable sanitizers with --enable-*san. |
| 219 | For regression testing just build. |
| 220 | For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. |
| 221 | For AFL set CC and CXX to AFL's compilers and set |
| 222 | LIB_FUZZING_ENGINE='libregression.a'. |
| 223 | """ |
| 224 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
| 225 | parser.add_argument( |
| 226 | '--lib-fuzzing-engine', |
| 227 | dest='lib_fuzzing_engine', |
| 228 | type=str, |
| 229 | default=LIB_FUZZING_ENGINE, |
| 230 | help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' |
| 231 | "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) |
| 232 | |
| 233 | fuzz_group = parser.add_mutually_exclusive_group() |
| 234 | fuzz_group.add_argument( |
| 235 | '--enable-coverage', |
| 236 | dest='coverage', |
| 237 | action='store_true', |
| 238 | help='Enable coverage instrumentation (-fsanitize-coverage)') |
| 239 | fuzz_group.add_argument( |
| 240 | '--enable-fuzzer', |
| 241 | dest='fuzzer', |
| 242 | action='store_true', |
| 243 | help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled ' |
| 244 | 'LIB_FUZZING_ENGINE is ignored') |
| 245 | ) |
| 246 | |
| 247 | parser.add_argument( |
| 248 | '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') |
| 249 | parser.add_argument( |
| 250 | '--enable-ubsan', |
| 251 | dest='ubsan', |
| 252 | action='store_true', |
| 253 | help='Enable UBSAN') |
| 254 | parser.add_argument( |
| 255 | '--disable-ubsan-pointer-overflow', |
| 256 | dest='ubsan_pointer_overflow', |
| 257 | action='store_false', |
| 258 | help='Disable UBSAN pointer overflow check (known failure)') |
| 259 | parser.add_argument( |
| 260 | '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') |
| 261 | parser.add_argument( |
| 262 | '--enable-msan-track-origins', dest='msan_track_origins', |
| 263 | action='store_true', help='Enable MSAN origin tracking') |
| 264 | parser.add_argument( |
| 265 | '--msan-extra-cppflags', |
| 266 | dest='msan_extra_cppflags', |
| 267 | type=str, |
| 268 | default=MSAN_EXTRA_CPPFLAGS, |
| 269 | help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". |
| 270 | format(MSAN_EXTRA_CPPFLAGS)) |
| 271 | parser.add_argument( |
| 272 | '--msan-extra-cflags', |
| 273 | dest='msan_extra_cflags', |
| 274 | type=str, |
| 275 | default=MSAN_EXTRA_CFLAGS, |
| 276 | help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( |
| 277 | MSAN_EXTRA_CFLAGS)) |
| 278 | parser.add_argument( |
| 279 | '--msan-extra-cxxflags', |
| 280 | dest='msan_extra_cxxflags', |
| 281 | type=str, |
| 282 | default=MSAN_EXTRA_CXXFLAGS, |
| 283 | help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". |
| 284 | format(MSAN_EXTRA_CXXFLAGS)) |
| 285 | parser.add_argument( |
| 286 | '--msan-extra-ldflags', |
| 287 | dest='msan_extra_ldflags', |
| 288 | type=str, |
| 289 | default=MSAN_EXTRA_LDFLAGS, |
| 290 | help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". |
| 291 | format(MSAN_EXTRA_LDFLAGS)) |
| 292 | parser.add_argument( |
| 293 | '--enable-sanitize-recover', |
| 294 | dest='sanitize_recover', |
| 295 | action='store_true', |
| 296 | help='Non-fatal sanitizer errors where possible') |
| 297 | parser.add_argument( |
| 298 | '--debug', |
| 299 | dest='debug', |
| 300 | type=int, |
| 301 | default=1, |
| 302 | help='Set DEBUGLEVEL (default: 1)') |
| 303 | parser.add_argument( |
| 304 | '--force-memory-access', |
| 305 | dest='memory_access', |
| 306 | type=int, |
| 307 | default=0, |
| 308 | help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') |
| 309 | parser.add_argument( |
| 310 | '--fuzz-rng-seed-size', |
| 311 | dest='fuzz_rng_seed_size', |
| 312 | type=int, |
| 313 | default=4, |
| 314 | help='Set FUZZ_RNG_SEED_SIZE (default: 4)') |
| 315 | parser.add_argument( |
| 316 | '--disable-fuzzing-mode', |
| 317 | dest='fuzzing_mode', |
| 318 | action='store_false', |
| 319 | help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION') |
| 320 | parser.add_argument( |
| 321 | '--enable-stateful-fuzzing', |
| 322 | dest='stateful_fuzzing', |
| 323 | action='store_true', |
| 324 | help='Reuse contexts between runs (makes reproduction impossible)') |
| 325 | parser.add_argument( |
| 326 | '--custom-seq-prod', |
| 327 | dest='third_party_seq_prod_obj', |
| 328 | type=str, |
| 329 | default=THIRD_PARTY_SEQ_PROD_OBJ, |
| 330 | help='Path to an object file with symbols for fuzzing your sequence producer plugin.') |
| 331 | parser.add_argument( |
| 332 | '--cc', |
| 333 | dest='cc', |
| 334 | type=str, |
| 335 | default=CC, |
| 336 | help="CC (default: $CC='{}')".format(CC)) |
| 337 | parser.add_argument( |
| 338 | '--cxx', |
| 339 | dest='cxx', |
| 340 | type=str, |
| 341 | default=CXX, |
| 342 | help="CXX (default: $CXX='{}')".format(CXX)) |
| 343 | parser.add_argument( |
| 344 | '--cppflags', |
| 345 | dest='cppflags', |
| 346 | type=str, |
| 347 | default=CPPFLAGS, |
| 348 | help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) |
| 349 | parser.add_argument( |
| 350 | '--cflags', |
| 351 | dest='cflags', |
| 352 | type=str, |
| 353 | default=CFLAGS, |
| 354 | help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) |
| 355 | parser.add_argument( |
| 356 | '--cxxflags', |
| 357 | dest='cxxflags', |
| 358 | type=str, |
| 359 | default=CXXFLAGS, |
| 360 | help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) |
| 361 | parser.add_argument( |
| 362 | '--ldflags', |
| 363 | dest='ldflags', |
| 364 | type=str, |
| 365 | default=LDFLAGS, |
| 366 | help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) |
| 367 | parser.add_argument( |
| 368 | '--mflags', |
| 369 | dest='mflags', |
| 370 | type=str, |
| 371 | default=MFLAGS, |
| 372 | help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) |
| 373 | parser.add_argument( |
| 374 | 'TARGET', |
| 375 | nargs='*', |
| 376 | type=str, |
| 377 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) |
| 378 | ) |
| 379 | args = parser.parse_args(args) |
| 380 | args = parse_env_flags(args, ' '.join( |
| 381 | [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) |
| 382 | |
| 383 | # Check option sanity |
| 384 | if args.msan and (args.asan or args.ubsan): |
| 385 | raise RuntimeError('MSAN may not be used with any other sanitizers') |
| 386 | if args.msan_track_origins and not args.msan: |
| 387 | raise RuntimeError('--enable-msan-track-origins requires MSAN') |
| 388 | if args.sanitize_recover and not args.sanitize: |
| 389 | raise RuntimeError('--enable-sanitize-recover but no sanitizers used') |
| 390 | |
| 391 | return args |
| 392 | |
| 393 | |
| 394 | def build(args): |
| 395 | try: |
| 396 | args = build_parser(args) |
| 397 | except Exception as e: |
| 398 | print(e) |
| 399 | return 1 |
| 400 | # The compilation flags we are setting |
| 401 | targets = args.TARGET |
| 402 | cc = args.cc |
| 403 | cxx = args.cxx |
| 404 | cppflags = shlex.split(args.cppflags) |
| 405 | cflags = shlex.split(args.cflags) |
| 406 | ldflags = shlex.split(args.ldflags) |
| 407 | cxxflags = shlex.split(args.cxxflags) |
| 408 | mflags = shlex.split(args.mflags) |
| 409 | # Flags to be added to both cflags and cxxflags |
| 410 | common_flags = [ |
| 411 | '-Werror', |
| 412 | '-Wno-error=declaration-after-statement', |
| 413 | '-Wno-error=c++-compat', |
| 414 | '-Wno-error=deprecated' # C files are sometimes compiled with CXX |
| 415 | ] |
| 416 | |
| 417 | cppflags += [ |
| 418 | '-DDEBUGLEVEL={}'.format(args.debug), |
| 419 | '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), |
| 420 | '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), |
| 421 | ] |
| 422 | |
| 423 | # Set flags for options |
| 424 | assert not (args.fuzzer and args.coverage) |
| 425 | if args.coverage: |
| 426 | common_flags += [ |
| 427 | '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' |
| 428 | ] |
| 429 | if args.fuzzer: |
| 430 | common_flags += ['-fsanitize=fuzzer'] |
| 431 | args.lib_fuzzing_engine = '' |
| 432 | |
| 433 | mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] |
| 434 | |
| 435 | if args.sanitize_recover: |
| 436 | recover_flags = ['-fsanitize-recover=all'] |
| 437 | else: |
| 438 | recover_flags = ['-fno-sanitize-recover=all'] |
| 439 | if args.sanitize: |
| 440 | common_flags += recover_flags |
| 441 | |
| 442 | if args.msan: |
| 443 | msan_flags = ['-fsanitize=memory'] |
| 444 | if args.msan_track_origins: |
| 445 | msan_flags += ['-fsanitize-memory-track-origins'] |
| 446 | common_flags += msan_flags |
| 447 | # Append extra MSAN flags (it might require special setup) |
| 448 | cppflags += [args.msan_extra_cppflags] |
| 449 | cflags += [args.msan_extra_cflags] |
| 450 | cxxflags += [args.msan_extra_cxxflags] |
| 451 | ldflags += [args.msan_extra_ldflags] |
| 452 | |
| 453 | if args.asan: |
| 454 | common_flags += ['-fsanitize=address'] |
| 455 | |
| 456 | if args.ubsan: |
| 457 | ubsan_flags = ['-fsanitize=undefined'] |
| 458 | if not args.ubsan_pointer_overflow: |
| 459 | ubsan_flags += overflow_ubsan_flags(cc, cxx) |
| 460 | common_flags += ubsan_flags |
| 461 | |
| 462 | if args.stateful_fuzzing: |
| 463 | cppflags += ['-DSTATEFUL_FUZZING'] |
| 464 | |
| 465 | if args.third_party_seq_prod_obj: |
| 466 | cppflags += ['-DFUZZ_THIRD_PARTY_SEQ_PROD'] |
| 467 | mflags += ['THIRD_PARTY_SEQ_PROD_OBJ={}'.format(args.third_party_seq_prod_obj)] |
| 468 | |
| 469 | if args.fuzzing_mode: |
| 470 | cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION'] |
| 471 | |
| 472 | if args.lib_fuzzing_engine == 'libregression.a': |
| 473 | targets = ['libregression.a'] + targets |
| 474 | |
| 475 | # Append the common flags |
| 476 | cflags += common_flags |
| 477 | cxxflags += common_flags |
| 478 | |
| 479 | # Prepare the flags for Make |
| 480 | cc_str = "CC={}".format(cc) |
| 481 | cxx_str = "CXX={}".format(cxx) |
| 482 | cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) |
| 483 | cflags_str = "CFLAGS={}".format(' '.join(cflags)) |
| 484 | cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) |
| 485 | ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) |
| 486 | |
| 487 | # Print the flags |
| 488 | print('MFLAGS={}'.format(' '.join(mflags))) |
| 489 | print(cc_str) |
| 490 | print(cxx_str) |
| 491 | print(cppflags_str) |
| 492 | print(cflags_str) |
| 493 | print(cxxflags_str) |
| 494 | print(ldflags_str) |
| 495 | |
| 496 | # Clean and build |
| 497 | clean_cmd = ['make', 'clean'] + mflags |
| 498 | print(' '.join(clean_cmd)) |
| 499 | subprocess.check_call(clean_cmd) |
| 500 | build_cmd = [ |
| 501 | 'make', |
| 502 | '-j', |
| 503 | cc_str, |
| 504 | cxx_str, |
| 505 | cppflags_str, |
| 506 | cflags_str, |
| 507 | cxxflags_str, |
| 508 | ldflags_str, |
| 509 | ] + mflags + targets |
| 510 | print(' '.join(build_cmd)) |
| 511 | subprocess.check_call(build_cmd) |
| 512 | return 0 |
| 513 | |
| 514 | |
| 515 | def libfuzzer_parser(args): |
| 516 | description = """ |
| 517 | Runs a libfuzzer binary. |
| 518 | Passes all extra arguments to libfuzzer. |
| 519 | The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to |
| 520 | libFuzzer.a. |
| 521 | Generates output in the CORPORA directory, puts crashes in the ARTIFACT |
| 522 | directory, and takes extra input from the SEED directory. |
| 523 | To merge AFL's output pass the SEED as AFL's output directory and pass |
| 524 | '-merge=1'. |
| 525 | """ |
| 526 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
| 527 | parser.add_argument( |
| 528 | '--corpora', |
| 529 | type=str, |
| 530 | help='Override the default corpora dir (default: {})'.format( |
| 531 | abs_join(CORPORA_DIR, 'TARGET'))) |
| 532 | parser.add_argument( |
| 533 | '--artifact', |
| 534 | type=str, |
| 535 | help='Override the default artifact dir (default: {})'.format( |
| 536 | abs_join(CORPORA_DIR, 'TARGET-crash'))) |
| 537 | parser.add_argument( |
| 538 | '--seed', |
| 539 | type=str, |
| 540 | help='Override the default seed dir (default: {})'.format( |
| 541 | abs_join(CORPORA_DIR, 'TARGET-seed'))) |
| 542 | parser.add_argument( |
| 543 | 'TARGET', |
| 544 | type=str, |
| 545 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) |
| 546 | args, extra = parser.parse_known_args(args) |
| 547 | args.extra = extra |
| 548 | |
| 549 | if args.TARGET and args.TARGET not in TARGETS: |
| 550 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) |
| 551 | |
| 552 | return args |
| 553 | |
| 554 | |
| 555 | def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): |
| 556 | if corpora is None: |
| 557 | corpora = abs_join(CORPORA_DIR, target) |
| 558 | if artifact is None: |
| 559 | artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) |
| 560 | if seed is None: |
| 561 | seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) |
| 562 | if extra_args is None: |
| 563 | extra_args = [] |
| 564 | |
| 565 | target = abs_join(FUZZ_DIR, target) |
| 566 | |
| 567 | corpora = [create(corpora)] |
| 568 | artifact = create(artifact) |
| 569 | seed = check(seed) |
| 570 | |
| 571 | corpora += [artifact] |
| 572 | if seed is not None: |
| 573 | corpora += [seed] |
| 574 | |
| 575 | cmd = [target, '-artifact_prefix={}/'.format(artifact)] |
| 576 | cmd += corpora + extra_args |
| 577 | print(' '.join(cmd)) |
| 578 | subprocess.check_call(cmd) |
| 579 | |
| 580 | |
| 581 | def libfuzzer_cmd(args): |
| 582 | try: |
| 583 | args = libfuzzer_parser(args) |
| 584 | except Exception as e: |
| 585 | print(e) |
| 586 | return 1 |
| 587 | libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) |
| 588 | return 0 |
| 589 | |
| 590 | |
| 591 | def afl_parser(args): |
| 592 | description = """ |
| 593 | Runs an afl-fuzz job. |
| 594 | Passes all extra arguments to afl-fuzz. |
| 595 | The fuzzer should have been built with CC/CXX set to the AFL compilers, |
| 596 | and with LIB_FUZZING_ENGINE='libregression.a'. |
| 597 | Takes input from CORPORA and writes output to OUTPUT. |
| 598 | Uses AFL_FUZZ as the binary (set from flag or environment variable). |
| 599 | """ |
| 600 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
| 601 | parser.add_argument( |
| 602 | '--corpora', |
| 603 | type=str, |
| 604 | help='Override the default corpora dir (default: {})'.format( |
| 605 | abs_join(CORPORA_DIR, 'TARGET'))) |
| 606 | parser.add_argument( |
| 607 | '--output', |
| 608 | type=str, |
| 609 | help='Override the default AFL output dir (default: {})'.format( |
| 610 | abs_join(CORPORA_DIR, 'TARGET-afl'))) |
| 611 | parser.add_argument( |
| 612 | '--afl-fuzz', |
| 613 | type=str, |
| 614 | default=AFL_FUZZ, |
| 615 | help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) |
| 616 | parser.add_argument( |
| 617 | 'TARGET', |
| 618 | type=str, |
| 619 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) |
| 620 | args, extra = parser.parse_known_args(args) |
| 621 | args.extra = extra |
| 622 | |
| 623 | if args.TARGET and args.TARGET not in TARGETS: |
| 624 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) |
| 625 | |
| 626 | if not args.corpora: |
| 627 | args.corpora = abs_join(CORPORA_DIR, args.TARGET) |
| 628 | if not args.output: |
| 629 | args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) |
| 630 | |
| 631 | return args |
| 632 | |
| 633 | |
| 634 | def afl(args): |
| 635 | try: |
| 636 | args = afl_parser(args) |
| 637 | except Exception as e: |
| 638 | print(e) |
| 639 | return 1 |
| 640 | target = abs_join(FUZZ_DIR, args.TARGET) |
| 641 | |
| 642 | corpora = create(args.corpora) |
| 643 | output = create(args.output) |
| 644 | |
| 645 | cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra |
| 646 | cmd += [target, '@@'] |
| 647 | print(' '.join(cmd)) |
| 648 | subprocess.call(cmd) |
| 649 | return 0 |
| 650 | |
| 651 | |
| 652 | def regression(args): |
| 653 | try: |
| 654 | description = """ |
| 655 | Runs one or more regression tests. |
| 656 | The fuzzer should have been built with |
| 657 | LIB_FUZZING_ENGINE='libregression.a'. |
| 658 | Takes input from CORPORA. |
| 659 | """ |
| 660 | args = targets_parser(args, description) |
| 661 | except Exception as e: |
| 662 | print(e) |
| 663 | return 1 |
| 664 | for target in args.TARGET: |
| 665 | corpora = create(abs_join(CORPORA_DIR, target)) |
| 666 | target = abs_join(FUZZ_DIR, target) |
| 667 | cmd = [target, corpora] |
| 668 | print(' '.join(cmd)) |
| 669 | subprocess.check_call(cmd) |
| 670 | return 0 |
| 671 | |
| 672 | |
| 673 | def gen_parser(args): |
| 674 | description = """ |
| 675 | Generate a seed corpus appropriate for TARGET with data generated with |
| 676 | decodecorpus. |
| 677 | The fuzz inputs are prepended with a seed before the zstd data, so the |
| 678 | output of decodecorpus shouldn't be used directly. |
| 679 | Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and |
| 680 | puts the output in SEED. |
| 681 | DECODECORPUS is the decodecorpus binary, and must already be built. |
| 682 | """ |
| 683 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
| 684 | parser.add_argument( |
| 685 | '--number', |
| 686 | '-n', |
| 687 | type=int, |
| 688 | default=100, |
| 689 | help='Number of samples to generate') |
| 690 | parser.add_argument( |
| 691 | '--max-size-log', |
| 692 | type=int, |
| 693 | default=18, |
| 694 | help='Maximum sample size to generate') |
| 695 | parser.add_argument( |
| 696 | '--seed', |
| 697 | type=str, |
| 698 | help='Override the default seed dir (default: {})'.format( |
| 699 | abs_join(CORPORA_DIR, 'TARGET-seed'))) |
| 700 | parser.add_argument( |
| 701 | '--decodecorpus', |
| 702 | type=str, |
| 703 | default=DECODECORPUS, |
| 704 | help="decodecorpus binary (default: $DECODECORPUS='{}')".format( |
| 705 | DECODECORPUS)) |
| 706 | parser.add_argument( |
| 707 | '--zstd', |
| 708 | type=str, |
| 709 | default=ZSTD, |
| 710 | help="zstd binary (default: $ZSTD='{}')".format(ZSTD)) |
| 711 | parser.add_argument( |
| 712 | '--fuzz-rng-seed-size', |
| 713 | type=int, |
| 714 | default=4, |
| 715 | help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" |
| 716 | ) |
| 717 | parser.add_argument( |
| 718 | 'TARGET', |
| 719 | type=str, |
| 720 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) |
| 721 | args, extra = parser.parse_known_args(args) |
| 722 | args.extra = extra |
| 723 | |
| 724 | if args.TARGET and args.TARGET not in TARGETS: |
| 725 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) |
| 726 | |
| 727 | if not args.seed: |
| 728 | args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) |
| 729 | |
| 730 | if not os.path.isfile(args.decodecorpus): |
| 731 | raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". |
| 732 | format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) |
| 733 | |
| 734 | return args |
| 735 | |
| 736 | |
| 737 | def gen(args): |
| 738 | try: |
| 739 | args = gen_parser(args) |
| 740 | except Exception as e: |
| 741 | print(e) |
| 742 | return 1 |
| 743 | |
| 744 | seed = create(args.seed) |
| 745 | with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict: |
| 746 | info = TARGET_INFO[args.TARGET] |
| 747 | |
| 748 | if info.input_type == InputType.DICTIONARY_DATA: |
| 749 | number = max(args.number, 1000) |
| 750 | else: |
| 751 | number = args.number |
| 752 | cmd = [ |
| 753 | args.decodecorpus, |
| 754 | '-n{}'.format(args.number), |
| 755 | '-p{}/'.format(compressed), |
| 756 | '-o{}'.format(decompressed), |
| 757 | ] |
| 758 | |
| 759 | if info.frame_type == FrameType.BLOCK: |
| 760 | cmd += [ |
| 761 | '--gen-blocks', |
| 762 | '--max-block-size-log={}'.format(min(args.max_size_log, 17)) |
| 763 | ] |
| 764 | else: |
| 765 | cmd += ['--max-content-size-log={}'.format(args.max_size_log)] |
| 766 | |
| 767 | print(' '.join(cmd)) |
| 768 | subprocess.check_call(cmd) |
| 769 | |
| 770 | if info.input_type == InputType.RAW_DATA: |
| 771 | print('using decompressed data in {}'.format(decompressed)) |
| 772 | samples = decompressed |
| 773 | elif info.input_type == InputType.COMPRESSED_DATA: |
| 774 | print('using compressed data in {}'.format(compressed)) |
| 775 | samples = compressed |
| 776 | else: |
| 777 | assert info.input_type == InputType.DICTIONARY_DATA |
| 778 | print('making dictionary data from {}'.format(decompressed)) |
| 779 | samples = dict |
| 780 | min_dict_size_log = 9 |
| 781 | max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log) |
| 782 | for dict_size_log in range(min_dict_size_log, max_dict_size_log): |
| 783 | dict_size = 1 << dict_size_log |
| 784 | cmd = [ |
| 785 | args.zstd, |
| 786 | '--train', |
| 787 | '-r', decompressed, |
| 788 | '--maxdict={}'.format(dict_size), |
| 789 | '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size)) |
| 790 | ] |
| 791 | print(' '.join(cmd)) |
| 792 | subprocess.check_call(cmd) |
| 793 | |
| 794 | # Copy the samples over and prepend the RNG seeds |
| 795 | for name in os.listdir(samples): |
| 796 | samplename = abs_join(samples, name) |
| 797 | outname = abs_join(seed, name) |
| 798 | with open(samplename, 'rb') as sample: |
| 799 | with open(outname, 'wb') as out: |
| 800 | CHUNK_SIZE = 131072 |
| 801 | chunk = sample.read(CHUNK_SIZE) |
| 802 | while len(chunk) > 0: |
| 803 | out.write(chunk) |
| 804 | chunk = sample.read(CHUNK_SIZE) |
| 805 | return 0 |
| 806 | |
| 807 | |
| 808 | def minimize(args): |
| 809 | try: |
| 810 | description = """ |
| 811 | Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in |
| 812 | TARGET_seed_corpus. All extra args are passed to libfuzzer. |
| 813 | """ |
| 814 | args = targets_parser(args, description) |
| 815 | except Exception as e: |
| 816 | print(e) |
| 817 | return 1 |
| 818 | |
| 819 | for target in args.TARGET: |
| 820 | # Merge the corpus + anything else into the seed_corpus |
| 821 | corpus = abs_join(CORPORA_DIR, target) |
| 822 | seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) |
| 823 | extra_args = [corpus, "-merge=1"] + args.extra |
| 824 | libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) |
| 825 | seeds = set(os.listdir(seed_corpus)) |
| 826 | # Copy all crashes directly into the seed_corpus if not already present |
| 827 | crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) |
| 828 | for crash in os.listdir(crashes): |
| 829 | if crash not in seeds: |
| 830 | shutil.copy(abs_join(crashes, crash), seed_corpus) |
| 831 | seeds.add(crash) |
| 832 | |
| 833 | |
| 834 | def zip_cmd(args): |
| 835 | try: |
| 836 | description = """ |
| 837 | Zips up the seed corpus. |
| 838 | """ |
| 839 | args = targets_parser(args, description) |
| 840 | except Exception as e: |
| 841 | print(e) |
| 842 | return 1 |
| 843 | |
| 844 | for target in args.TARGET: |
| 845 | # Zip the seed_corpus |
| 846 | seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) |
| 847 | zip_file = "{}.zip".format(seed_corpus) |
| 848 | cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."] |
| 849 | print(' '.join(cmd)) |
| 850 | subprocess.check_call(cmd, cwd=seed_corpus) |
| 851 | |
| 852 | |
| 853 | def list_cmd(args): |
| 854 | print("\n".join(TARGETS)) |
| 855 | |
| 856 | |
| 857 | def short_help(args): |
| 858 | name = args[0] |
| 859 | print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) |
| 860 | |
| 861 | |
| 862 | def help(args): |
| 863 | short_help(args) |
| 864 | print("\tfuzzing helpers (select a command and pass -h for help)\n") |
| 865 | print("Options:") |
| 866 | print("\t-h, --help\tPrint this message") |
| 867 | print("") |
| 868 | print("Commands:") |
| 869 | print("\tbuild\t\tBuild a fuzzer") |
| 870 | print("\tlibfuzzer\tRun a libFuzzer fuzzer") |
| 871 | print("\tafl\t\tRun an AFL fuzzer") |
| 872 | print("\tregression\tRun a regression test") |
| 873 | print("\tgen\t\tGenerate a seed corpus for a fuzzer") |
| 874 | print("\tminimize\tMinimize the test corpora") |
| 875 | print("\tzip\t\tZip the minimized corpora up") |
| 876 | print("\tlist\t\tList the available targets") |
| 877 | |
| 878 | |
| 879 | def main(): |
| 880 | args = sys.argv |
| 881 | if len(args) < 2: |
| 882 | help(args) |
| 883 | return 1 |
| 884 | if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': |
| 885 | help(args) |
| 886 | return 1 |
| 887 | command = args.pop(1) |
| 888 | args[0] = "{} {}".format(args[0], command) |
| 889 | if command == "build": |
| 890 | return build(args) |
| 891 | if command == "libfuzzer": |
| 892 | return libfuzzer_cmd(args) |
| 893 | if command == "regression": |
| 894 | return regression(args) |
| 895 | if command == "afl": |
| 896 | return afl(args) |
| 897 | if command == "gen": |
| 898 | return gen(args) |
| 899 | if command == "minimize": |
| 900 | return minimize(args) |
| 901 | if command == "zip": |
| 902 | return zip_cmd(args) |
| 903 | if command == "list": |
| 904 | return list_cmd(args) |
| 905 | short_help(args) |
| 906 | print("Error: No such command {} (pass -h for help)".format(command)) |
| 907 | return 1 |
| 908 | |
| 909 | |
| 910 | if __name__ == "__main__": |
| 911 | sys.exit(main()) |