git subrepo pull (merge) --force deps/libchdr
[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.6 / tests / fuzz / fuzz.py
CommitLineData
648db22b 1#!/usr/bin/env python
2
3# ################################################################
4# Copyright (c) Meta Platforms, Inc. and affiliates.
5# All rights reserved.
6#
7# This source code is licensed under both the BSD-style license (found in the
8# LICENSE file in the root directory of this source tree) and the GPLv2 (found
9# in the COPYING file in the root directory of this source tree).
10# You may select, at your option, one of the above-listed licenses.
11# ##########################################################################
12
13import argparse
14import contextlib
15import os
16import re
17import shlex
18import shutil
19import subprocess
20import sys
21import tempfile
22
23
24def abs_join(a, *p):
25 return os.path.abspath(os.path.join(a, *p))
26
27
28class InputType(object):
29 RAW_DATA = 1
30 COMPRESSED_DATA = 2
31 DICTIONARY_DATA = 3
32
33
34class FrameType(object):
35 ZSTD = 1
36 BLOCK = 2
37
38
39class TargetInfo(object):
40 def __init__(self, input_type, frame_type=FrameType.ZSTD):
41 self.input_type = input_type
42 self.frame_type = frame_type
43
44
45# Constants
46FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
47CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
48TARGET_INFO = {
49 'simple_round_trip': TargetInfo(InputType.RAW_DATA),
50 'stream_round_trip': TargetInfo(InputType.RAW_DATA),
51 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
52 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
53 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
54 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
55 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
56 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
57 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
58 'simple_compress': TargetInfo(InputType.RAW_DATA),
59 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA),
60 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
61 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA),
62 'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA),
63 'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
64 'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
65 'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
66 'huf_round_trip': TargetInfo(InputType.RAW_DATA),
67 'huf_decompress': TargetInfo(InputType.RAW_DATA),
f535537f 68 'decompress_cross_format': TargetInfo(InputType.RAW_DATA),
69 'generate_sequences': TargetInfo(InputType.RAW_DATA),
648db22b 70}
71TARGETS = list(TARGET_INFO.keys())
72ALL_TARGETS = TARGETS + ['all']
73FUZZ_RNG_SEED_SIZE = 4
74
75# Standard environment variables
76CC = os.environ.get('CC', 'cc')
77CXX = os.environ.get('CXX', 'c++')
78CPPFLAGS = os.environ.get('CPPFLAGS', '')
79CFLAGS = os.environ.get('CFLAGS', '-O3')
80CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
81LDFLAGS = os.environ.get('LDFLAGS', '')
82MFLAGS = os.environ.get('MFLAGS', '-j')
83THIRD_PARTY_SEQ_PROD_OBJ = os.environ.get('THIRD_PARTY_SEQ_PROD_OBJ', '')
84
85# Fuzzing environment variables
86LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
87AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz')
88DECODECORPUS = os.environ.get('DECODECORPUS',
89 abs_join(FUZZ_DIR, '..', 'decodecorpus'))
90ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd'))
91
92# Sanitizer environment variables
93MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '')
94MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '')
95MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '')
96MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
97
98
99def create(r):
100 d = os.path.abspath(r)
101 if not os.path.isdir(d):
102 os.makedirs(d)
103 return d
104
105
106def check(r):
107 d = os.path.abspath(r)
108 if not os.path.isdir(d):
109 return None
110 return d
111
112
113@contextlib.contextmanager
114def tmpdir():
115 dirpath = tempfile.mkdtemp()
116 try:
117 yield dirpath
118 finally:
119 shutil.rmtree(dirpath, ignore_errors=True)
120
121
122def parse_targets(in_targets):
123 targets = set()
124 for target in in_targets:
125 if not target:
126 continue
127 if target == 'all':
128 targets = targets.union(TARGETS)
129 elif target in TARGETS:
130 targets.add(target)
131 else:
132 raise RuntimeError('{} is not a valid target'.format(target))
133 return list(targets)
134
135
136def targets_parser(args, description):
137 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
138 parser.add_argument(
139 'TARGET',
140 nargs='*',
141 type=str,
142 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
143 args, extra = parser.parse_known_args(args)
144 args.extra = extra
145
146 args.TARGET = parse_targets(args.TARGET)
147
148 return args
149
150
151def parse_env_flags(args, flags):
152 """
153 Look for flags set by environment variables.
154 """
155 san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
156 nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
157
158 def set_sanitizer(sanitizer, default, san, nosan):
159 if sanitizer in san and sanitizer in nosan:
160 raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'.
161 format(s=sanitizer))
162 if sanitizer in san:
163 return True
164 if sanitizer in nosan:
165 return False
166 return default
167
168 san = set(san_flags.split(','))
169 nosan = set(nosan_flags.split(','))
170
171 args.asan = set_sanitizer('address', args.asan, san, nosan)
172 args.msan = set_sanitizer('memory', args.msan, san, nosan)
173 args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan)
174
175 args.sanitize = args.asan or args.msan or args.ubsan
176
177 return args
178
179
180def compiler_version(cc, cxx):
181 """
182 Determines the compiler and version.
183 Only works for clang and gcc.
184 """
185 cc_version_bytes = subprocess.check_output([cc, "--version"])
186 cxx_version_bytes = subprocess.check_output([cxx, "--version"])
187 compiler = None
188 version = None
189 print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii')))
190 if b'clang' in cc_version_bytes:
191 assert(b'clang' in cxx_version_bytes)
192 compiler = 'clang'
193 elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes:
194 assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
195 compiler = 'gcc'
196 if compiler is not None:
197 version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)'
198 version_match = re.search(version_regex, cc_version_bytes)
199 version = tuple(int(version_match.group(i)) for i in range(1, 4))
200 return compiler, version
201
202
203def overflow_ubsan_flags(cc, cxx):
204 compiler, version = compiler_version(cc, cxx)
205 if compiler == 'gcc' and version < (8, 0, 0):
206 return ['-fno-sanitize=signed-integer-overflow']
207 if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)):
208 return ['-fno-sanitize=pointer-overflow']
209 return []
210
211
212def build_parser(args):
213 description = """
214 Cleans the repository and builds a fuzz target (or all).
215 Many flags default to environment variables (default says $X='y').
216 Options that aren't enabling features default to the correct values for
217 zstd.
218 Enable sanitizers with --enable-*san.
219 For regression testing just build.
220 For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage.
221 For AFL set CC and CXX to AFL's compilers and set
222 LIB_FUZZING_ENGINE='libregression.a'.
223 """
224 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
225 parser.add_argument(
226 '--lib-fuzzing-engine',
227 dest='lib_fuzzing_engine',
228 type=str,
229 default=LIB_FUZZING_ENGINE,
230 help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a '
231 "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE)))
232
233 fuzz_group = parser.add_mutually_exclusive_group()
234 fuzz_group.add_argument(
235 '--enable-coverage',
236 dest='coverage',
237 action='store_true',
238 help='Enable coverage instrumentation (-fsanitize-coverage)')
239 fuzz_group.add_argument(
240 '--enable-fuzzer',
241 dest='fuzzer',
242 action='store_true',
243 help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled '
244 'LIB_FUZZING_ENGINE is ignored')
245 )
246
247 parser.add_argument(
248 '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN')
249 parser.add_argument(
250 '--enable-ubsan',
251 dest='ubsan',
252 action='store_true',
253 help='Enable UBSAN')
254 parser.add_argument(
f535537f 255 '--disable-ubsan-pointer-overflow',
648db22b 256 dest='ubsan_pointer_overflow',
f535537f 257 action='store_false',
258 help='Disable UBSAN pointer overflow check (known failure)')
648db22b 259 parser.add_argument(
260 '--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
261 parser.add_argument(
262 '--enable-msan-track-origins', dest='msan_track_origins',
263 action='store_true', help='Enable MSAN origin tracking')
264 parser.add_argument(
265 '--msan-extra-cppflags',
266 dest='msan_extra_cppflags',
267 type=str,
268 default=MSAN_EXTRA_CPPFLAGS,
269 help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')".
270 format(MSAN_EXTRA_CPPFLAGS))
271 parser.add_argument(
272 '--msan-extra-cflags',
273 dest='msan_extra_cflags',
274 type=str,
275 default=MSAN_EXTRA_CFLAGS,
276 help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format(
277 MSAN_EXTRA_CFLAGS))
278 parser.add_argument(
279 '--msan-extra-cxxflags',
280 dest='msan_extra_cxxflags',
281 type=str,
282 default=MSAN_EXTRA_CXXFLAGS,
283 help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')".
284 format(MSAN_EXTRA_CXXFLAGS))
285 parser.add_argument(
286 '--msan-extra-ldflags',
287 dest='msan_extra_ldflags',
288 type=str,
289 default=MSAN_EXTRA_LDFLAGS,
290 help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')".
291 format(MSAN_EXTRA_LDFLAGS))
292 parser.add_argument(
293 '--enable-sanitize-recover',
294 dest='sanitize_recover',
295 action='store_true',
296 help='Non-fatal sanitizer errors where possible')
297 parser.add_argument(
298 '--debug',
299 dest='debug',
300 type=int,
301 default=1,
302 help='Set DEBUGLEVEL (default: 1)')
303 parser.add_argument(
304 '--force-memory-access',
305 dest='memory_access',
306 type=int,
307 default=0,
308 help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)')
309 parser.add_argument(
310 '--fuzz-rng-seed-size',
311 dest='fuzz_rng_seed_size',
312 type=int,
313 default=4,
314 help='Set FUZZ_RNG_SEED_SIZE (default: 4)')
315 parser.add_argument(
316 '--disable-fuzzing-mode',
317 dest='fuzzing_mode',
318 action='store_false',
319 help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION')
320 parser.add_argument(
321 '--enable-stateful-fuzzing',
322 dest='stateful_fuzzing',
323 action='store_true',
324 help='Reuse contexts between runs (makes reproduction impossible)')
325 parser.add_argument(
326 '--custom-seq-prod',
327 dest='third_party_seq_prod_obj',
328 type=str,
329 default=THIRD_PARTY_SEQ_PROD_OBJ,
330 help='Path to an object file with symbols for fuzzing your sequence producer plugin.')
331 parser.add_argument(
332 '--cc',
333 dest='cc',
334 type=str,
335 default=CC,
336 help="CC (default: $CC='{}')".format(CC))
337 parser.add_argument(
338 '--cxx',
339 dest='cxx',
340 type=str,
341 default=CXX,
342 help="CXX (default: $CXX='{}')".format(CXX))
343 parser.add_argument(
344 '--cppflags',
345 dest='cppflags',
346 type=str,
347 default=CPPFLAGS,
348 help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS))
349 parser.add_argument(
350 '--cflags',
351 dest='cflags',
352 type=str,
353 default=CFLAGS,
354 help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS))
355 parser.add_argument(
356 '--cxxflags',
357 dest='cxxflags',
358 type=str,
359 default=CXXFLAGS,
360 help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS))
361 parser.add_argument(
362 '--ldflags',
363 dest='ldflags',
364 type=str,
365 default=LDFLAGS,
366 help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS))
367 parser.add_argument(
368 '--mflags',
369 dest='mflags',
370 type=str,
371 default=MFLAGS,
372 help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS))
373 parser.add_argument(
374 'TARGET',
375 nargs='*',
376 type=str,
377 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))
378 )
379 args = parser.parse_args(args)
380 args = parse_env_flags(args, ' '.join(
381 [args.cppflags, args.cflags, args.cxxflags, args.ldflags]))
382
383 # Check option sanity
384 if args.msan and (args.asan or args.ubsan):
385 raise RuntimeError('MSAN may not be used with any other sanitizers')
386 if args.msan_track_origins and not args.msan:
387 raise RuntimeError('--enable-msan-track-origins requires MSAN')
648db22b 388 if args.sanitize_recover and not args.sanitize:
389 raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
390
391 return args
392
393
394def build(args):
395 try:
396 args = build_parser(args)
397 except Exception as e:
398 print(e)
399 return 1
400 # The compilation flags we are setting
401 targets = args.TARGET
402 cc = args.cc
403 cxx = args.cxx
404 cppflags = shlex.split(args.cppflags)
405 cflags = shlex.split(args.cflags)
406 ldflags = shlex.split(args.ldflags)
407 cxxflags = shlex.split(args.cxxflags)
408 mflags = shlex.split(args.mflags)
409 # Flags to be added to both cflags and cxxflags
f535537f 410 common_flags = [
411 '-Werror',
412 '-Wno-error=declaration-after-statement',
413 '-Wno-error=c++-compat',
414 '-Wno-error=deprecated' # C files are sometimes compiled with CXX
415 ]
648db22b 416
417 cppflags += [
418 '-DDEBUGLEVEL={}'.format(args.debug),
419 '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access),
420 '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size),
421 ]
422
423 # Set flags for options
424 assert not (args.fuzzer and args.coverage)
425 if args.coverage:
426 common_flags += [
427 '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp'
428 ]
429 if args.fuzzer:
430 common_flags += ['-fsanitize=fuzzer']
431 args.lib_fuzzing_engine = ''
432
433 mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)]
434
435 if args.sanitize_recover:
436 recover_flags = ['-fsanitize-recover=all']
437 else:
438 recover_flags = ['-fno-sanitize-recover=all']
439 if args.sanitize:
440 common_flags += recover_flags
441
442 if args.msan:
443 msan_flags = ['-fsanitize=memory']
444 if args.msan_track_origins:
445 msan_flags += ['-fsanitize-memory-track-origins']
446 common_flags += msan_flags
447 # Append extra MSAN flags (it might require special setup)
448 cppflags += [args.msan_extra_cppflags]
449 cflags += [args.msan_extra_cflags]
450 cxxflags += [args.msan_extra_cxxflags]
451 ldflags += [args.msan_extra_ldflags]
452
453 if args.asan:
454 common_flags += ['-fsanitize=address']
455
456 if args.ubsan:
457 ubsan_flags = ['-fsanitize=undefined']
458 if not args.ubsan_pointer_overflow:
459 ubsan_flags += overflow_ubsan_flags(cc, cxx)
460 common_flags += ubsan_flags
461
462 if args.stateful_fuzzing:
463 cppflags += ['-DSTATEFUL_FUZZING']
464
465 if args.third_party_seq_prod_obj:
466 cppflags += ['-DFUZZ_THIRD_PARTY_SEQ_PROD']
467 mflags += ['THIRD_PARTY_SEQ_PROD_OBJ={}'.format(args.third_party_seq_prod_obj)]
468
469 if args.fuzzing_mode:
470 cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']
471
472 if args.lib_fuzzing_engine == 'libregression.a':
473 targets = ['libregression.a'] + targets
474
475 # Append the common flags
476 cflags += common_flags
477 cxxflags += common_flags
478
479 # Prepare the flags for Make
480 cc_str = "CC={}".format(cc)
481 cxx_str = "CXX={}".format(cxx)
482 cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags))
483 cflags_str = "CFLAGS={}".format(' '.join(cflags))
484 cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags))
485 ldflags_str = "LDFLAGS={}".format(' '.join(ldflags))
486
487 # Print the flags
488 print('MFLAGS={}'.format(' '.join(mflags)))
489 print(cc_str)
490 print(cxx_str)
491 print(cppflags_str)
492 print(cflags_str)
493 print(cxxflags_str)
494 print(ldflags_str)
495
496 # Clean and build
497 clean_cmd = ['make', 'clean'] + mflags
498 print(' '.join(clean_cmd))
499 subprocess.check_call(clean_cmd)
500 build_cmd = [
501 'make',
f535537f 502 '-j',
648db22b 503 cc_str,
504 cxx_str,
505 cppflags_str,
506 cflags_str,
507 cxxflags_str,
508 ldflags_str,
509 ] + mflags + targets
510 print(' '.join(build_cmd))
511 subprocess.check_call(build_cmd)
512 return 0
513
514
515def libfuzzer_parser(args):
516 description = """
517 Runs a libfuzzer binary.
518 Passes all extra arguments to libfuzzer.
519 The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to
520 libFuzzer.a.
521 Generates output in the CORPORA directory, puts crashes in the ARTIFACT
522 directory, and takes extra input from the SEED directory.
523 To merge AFL's output pass the SEED as AFL's output directory and pass
524 '-merge=1'.
525 """
526 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
527 parser.add_argument(
528 '--corpora',
529 type=str,
530 help='Override the default corpora dir (default: {})'.format(
531 abs_join(CORPORA_DIR, 'TARGET')))
532 parser.add_argument(
533 '--artifact',
534 type=str,
535 help='Override the default artifact dir (default: {})'.format(
536 abs_join(CORPORA_DIR, 'TARGET-crash')))
537 parser.add_argument(
538 '--seed',
539 type=str,
540 help='Override the default seed dir (default: {})'.format(
541 abs_join(CORPORA_DIR, 'TARGET-seed')))
542 parser.add_argument(
543 'TARGET',
544 type=str,
545 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
546 args, extra = parser.parse_known_args(args)
547 args.extra = extra
548
549 if args.TARGET and args.TARGET not in TARGETS:
550 raise RuntimeError('{} is not a valid target'.format(args.TARGET))
551
552 return args
553
554
555def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
556 if corpora is None:
557 corpora = abs_join(CORPORA_DIR, target)
558 if artifact is None:
559 artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
560 if seed is None:
561 seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
562 if extra_args is None:
563 extra_args = []
564
565 target = abs_join(FUZZ_DIR, target)
566
567 corpora = [create(corpora)]
568 artifact = create(artifact)
569 seed = check(seed)
570
571 corpora += [artifact]
572 if seed is not None:
573 corpora += [seed]
574
575 cmd = [target, '-artifact_prefix={}/'.format(artifact)]
576 cmd += corpora + extra_args
577 print(' '.join(cmd))
578 subprocess.check_call(cmd)
579
580
581def libfuzzer_cmd(args):
582 try:
583 args = libfuzzer_parser(args)
584 except Exception as e:
585 print(e)
586 return 1
587 libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
588 return 0
589
590
591def afl_parser(args):
592 description = """
593 Runs an afl-fuzz job.
594 Passes all extra arguments to afl-fuzz.
595 The fuzzer should have been built with CC/CXX set to the AFL compilers,
596 and with LIB_FUZZING_ENGINE='libregression.a'.
597 Takes input from CORPORA and writes output to OUTPUT.
598 Uses AFL_FUZZ as the binary (set from flag or environment variable).
599 """
600 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
601 parser.add_argument(
602 '--corpora',
603 type=str,
604 help='Override the default corpora dir (default: {})'.format(
605 abs_join(CORPORA_DIR, 'TARGET')))
606 parser.add_argument(
607 '--output',
608 type=str,
609 help='Override the default AFL output dir (default: {})'.format(
610 abs_join(CORPORA_DIR, 'TARGET-afl')))
611 parser.add_argument(
612 '--afl-fuzz',
613 type=str,
614 default=AFL_FUZZ,
615 help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ))
616 parser.add_argument(
617 'TARGET',
618 type=str,
619 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
620 args, extra = parser.parse_known_args(args)
621 args.extra = extra
622
623 if args.TARGET and args.TARGET not in TARGETS:
624 raise RuntimeError('{} is not a valid target'.format(args.TARGET))
625
626 if not args.corpora:
627 args.corpora = abs_join(CORPORA_DIR, args.TARGET)
628 if not args.output:
629 args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET))
630
631 return args
632
633
634def afl(args):
635 try:
636 args = afl_parser(args)
637 except Exception as e:
638 print(e)
639 return 1
640 target = abs_join(FUZZ_DIR, args.TARGET)
641
642 corpora = create(args.corpora)
643 output = create(args.output)
644
645 cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra
646 cmd += [target, '@@']
647 print(' '.join(cmd))
648 subprocess.call(cmd)
649 return 0
650
651
652def regression(args):
653 try:
654 description = """
655 Runs one or more regression tests.
656 The fuzzer should have been built with
657 LIB_FUZZING_ENGINE='libregression.a'.
658 Takes input from CORPORA.
659 """
660 args = targets_parser(args, description)
661 except Exception as e:
662 print(e)
663 return 1
664 for target in args.TARGET:
665 corpora = create(abs_join(CORPORA_DIR, target))
666 target = abs_join(FUZZ_DIR, target)
667 cmd = [target, corpora]
668 print(' '.join(cmd))
669 subprocess.check_call(cmd)
670 return 0
671
672
673def gen_parser(args):
674 description = """
675 Generate a seed corpus appropriate for TARGET with data generated with
676 decodecorpus.
677 The fuzz inputs are prepended with a seed before the zstd data, so the
678 output of decodecorpus shouldn't be used directly.
679 Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and
680 puts the output in SEED.
681 DECODECORPUS is the decodecorpus binary, and must already be built.
682 """
683 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
684 parser.add_argument(
685 '--number',
686 '-n',
687 type=int,
688 default=100,
689 help='Number of samples to generate')
690 parser.add_argument(
691 '--max-size-log',
692 type=int,
693 default=18,
694 help='Maximum sample size to generate')
695 parser.add_argument(
696 '--seed',
697 type=str,
698 help='Override the default seed dir (default: {})'.format(
699 abs_join(CORPORA_DIR, 'TARGET-seed')))
700 parser.add_argument(
701 '--decodecorpus',
702 type=str,
703 default=DECODECORPUS,
704 help="decodecorpus binary (default: $DECODECORPUS='{}')".format(
705 DECODECORPUS))
706 parser.add_argument(
707 '--zstd',
708 type=str,
709 default=ZSTD,
710 help="zstd binary (default: $ZSTD='{}')".format(ZSTD))
711 parser.add_argument(
712 '--fuzz-rng-seed-size',
713 type=int,
714 default=4,
715 help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)"
716 )
717 parser.add_argument(
718 'TARGET',
719 type=str,
720 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
721 args, extra = parser.parse_known_args(args)
722 args.extra = extra
723
724 if args.TARGET and args.TARGET not in TARGETS:
725 raise RuntimeError('{} is not a valid target'.format(args.TARGET))
726
727 if not args.seed:
728 args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
729
730 if not os.path.isfile(args.decodecorpus):
731 raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'".
732 format(args.decodecorpus, abs_join(FUZZ_DIR, '..')))
733
734 return args
735
736
737def gen(args):
738 try:
739 args = gen_parser(args)
740 except Exception as e:
741 print(e)
742 return 1
743
744 seed = create(args.seed)
745 with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict:
746 info = TARGET_INFO[args.TARGET]
747
748 if info.input_type == InputType.DICTIONARY_DATA:
749 number = max(args.number, 1000)
750 else:
751 number = args.number
752 cmd = [
753 args.decodecorpus,
754 '-n{}'.format(args.number),
755 '-p{}/'.format(compressed),
756 '-o{}'.format(decompressed),
757 ]
758
759 if info.frame_type == FrameType.BLOCK:
760 cmd += [
761 '--gen-blocks',
762 '--max-block-size-log={}'.format(min(args.max_size_log, 17))
763 ]
764 else:
765 cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
766
767 print(' '.join(cmd))
768 subprocess.check_call(cmd)
769
770 if info.input_type == InputType.RAW_DATA:
771 print('using decompressed data in {}'.format(decompressed))
772 samples = decompressed
773 elif info.input_type == InputType.COMPRESSED_DATA:
774 print('using compressed data in {}'.format(compressed))
775 samples = compressed
776 else:
777 assert info.input_type == InputType.DICTIONARY_DATA
778 print('making dictionary data from {}'.format(decompressed))
779 samples = dict
780 min_dict_size_log = 9
781 max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log)
782 for dict_size_log in range(min_dict_size_log, max_dict_size_log):
783 dict_size = 1 << dict_size_log
784 cmd = [
785 args.zstd,
786 '--train',
787 '-r', decompressed,
788 '--maxdict={}'.format(dict_size),
789 '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size))
790 ]
791 print(' '.join(cmd))
792 subprocess.check_call(cmd)
793
794 # Copy the samples over and prepend the RNG seeds
795 for name in os.listdir(samples):
796 samplename = abs_join(samples, name)
797 outname = abs_join(seed, name)
798 with open(samplename, 'rb') as sample:
799 with open(outname, 'wb') as out:
800 CHUNK_SIZE = 131072
801 chunk = sample.read(CHUNK_SIZE)
802 while len(chunk) > 0:
803 out.write(chunk)
804 chunk = sample.read(CHUNK_SIZE)
805 return 0
806
807
808def minimize(args):
809 try:
810 description = """
811 Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
812 TARGET_seed_corpus. All extra args are passed to libfuzzer.
813 """
814 args = targets_parser(args, description)
815 except Exception as e:
816 print(e)
817 return 1
818
819 for target in args.TARGET:
820 # Merge the corpus + anything else into the seed_corpus
821 corpus = abs_join(CORPORA_DIR, target)
822 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
823 extra_args = [corpus, "-merge=1"] + args.extra
824 libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
825 seeds = set(os.listdir(seed_corpus))
826 # Copy all crashes directly into the seed_corpus if not already present
827 crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
828 for crash in os.listdir(crashes):
829 if crash not in seeds:
830 shutil.copy(abs_join(crashes, crash), seed_corpus)
831 seeds.add(crash)
832
833
834def zip_cmd(args):
835 try:
836 description = """
837 Zips up the seed corpus.
838 """
839 args = targets_parser(args, description)
840 except Exception as e:
841 print(e)
842 return 1
843
844 for target in args.TARGET:
845 # Zip the seed_corpus
846 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
847 zip_file = "{}.zip".format(seed_corpus)
848 cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."]
849 print(' '.join(cmd))
850 subprocess.check_call(cmd, cwd=seed_corpus)
851
852
853def list_cmd(args):
854 print("\n".join(TARGETS))
855
856
857def short_help(args):
858 name = args[0]
859 print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
860
861
862def help(args):
863 short_help(args)
864 print("\tfuzzing helpers (select a command and pass -h for help)\n")
865 print("Options:")
866 print("\t-h, --help\tPrint this message")
867 print("")
868 print("Commands:")
869 print("\tbuild\t\tBuild a fuzzer")
870 print("\tlibfuzzer\tRun a libFuzzer fuzzer")
871 print("\tafl\t\tRun an AFL fuzzer")
872 print("\tregression\tRun a regression test")
873 print("\tgen\t\tGenerate a seed corpus for a fuzzer")
874 print("\tminimize\tMinimize the test corpora")
875 print("\tzip\t\tZip the minimized corpora up")
876 print("\tlist\t\tList the available targets")
877
878
879def main():
880 args = sys.argv
881 if len(args) < 2:
882 help(args)
883 return 1
884 if args[1] == '-h' or args[1] == '--help' or args[1] == '-H':
885 help(args)
886 return 1
887 command = args.pop(1)
888 args[0] = "{} {}".format(args[0], command)
889 if command == "build":
890 return build(args)
891 if command == "libfuzzer":
892 return libfuzzer_cmd(args)
893 if command == "regression":
894 return regression(args)
895 if command == "afl":
896 return afl(args)
897 if command == "gen":
898 return gen(args)
899 if command == "minimize":
900 return minimize(args)
901 if command == "zip":
902 return zip_cmd(args)
903 if command == "list":
904 return list_cmd(args)
905 short_help(args)
906 print("Error: No such command {} (pass -h for help)".format(command))
907 return 1
908
909
910if __name__ == "__main__":
911 sys.exit(main())