648db22b |
1 | #!/usr/bin/env python |
2 | |
3 | # ################################################################ |
4 | # Copyright (c) Meta Platforms, Inc. and affiliates. |
5 | # All rights reserved. |
6 | # |
7 | # This source code is licensed under both the BSD-style license (found in the |
8 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found |
9 | # in the COPYING file in the root directory of this source tree). |
10 | # You may select, at your option, one of the above-listed licenses. |
11 | # ########################################################################## |
12 | |
13 | import argparse |
14 | import contextlib |
15 | import os |
16 | import re |
17 | import shlex |
18 | import shutil |
19 | import subprocess |
20 | import sys |
21 | import tempfile |
22 | |
23 | |
24 | def abs_join(a, *p): |
25 | return os.path.abspath(os.path.join(a, *p)) |
26 | |
27 | |
28 | class InputType(object): |
29 | RAW_DATA = 1 |
30 | COMPRESSED_DATA = 2 |
31 | DICTIONARY_DATA = 3 |
32 | |
33 | |
34 | class FrameType(object): |
35 | ZSTD = 1 |
36 | BLOCK = 2 |
37 | |
38 | |
39 | class TargetInfo(object): |
40 | def __init__(self, input_type, frame_type=FrameType.ZSTD): |
41 | self.input_type = input_type |
42 | self.frame_type = frame_type |
43 | |
44 | |
45 | # Constants |
46 | FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) |
47 | CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') |
48 | TARGET_INFO = { |
49 | 'simple_round_trip': TargetInfo(InputType.RAW_DATA), |
50 | 'stream_round_trip': TargetInfo(InputType.RAW_DATA), |
51 | 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK), |
52 | 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA), |
53 | 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA), |
54 | 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK), |
55 | 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA), |
56 | 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), |
57 | 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), |
58 | 'simple_compress': TargetInfo(InputType.RAW_DATA), |
59 | 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), |
60 | 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA), |
61 | 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), |
62 | 'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA), |
63 | 'fse_read_ncount': TargetInfo(InputType.RAW_DATA), |
64 | 'sequence_compression_api': TargetInfo(InputType.RAW_DATA), |
65 | 'seekable_roundtrip': TargetInfo(InputType.RAW_DATA), |
66 | 'huf_round_trip': TargetInfo(InputType.RAW_DATA), |
67 | 'huf_decompress': TargetInfo(InputType.RAW_DATA), |
f535537f |
68 | 'decompress_cross_format': TargetInfo(InputType.RAW_DATA), |
69 | 'generate_sequences': TargetInfo(InputType.RAW_DATA), |
648db22b |
70 | } |
71 | TARGETS = list(TARGET_INFO.keys()) |
72 | ALL_TARGETS = TARGETS + ['all'] |
73 | FUZZ_RNG_SEED_SIZE = 4 |
74 | |
75 | # Standard environment variables |
76 | CC = os.environ.get('CC', 'cc') |
77 | CXX = os.environ.get('CXX', 'c++') |
78 | CPPFLAGS = os.environ.get('CPPFLAGS', '') |
79 | CFLAGS = os.environ.get('CFLAGS', '-O3') |
80 | CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) |
81 | LDFLAGS = os.environ.get('LDFLAGS', '') |
82 | MFLAGS = os.environ.get('MFLAGS', '-j') |
83 | THIRD_PARTY_SEQ_PROD_OBJ = os.environ.get('THIRD_PARTY_SEQ_PROD_OBJ', '') |
84 | |
85 | # Fuzzing environment variables |
86 | LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') |
87 | AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') |
88 | DECODECORPUS = os.environ.get('DECODECORPUS', |
89 | abs_join(FUZZ_DIR, '..', 'decodecorpus')) |
90 | ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd')) |
91 | |
92 | # Sanitizer environment variables |
93 | MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') |
94 | MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') |
95 | MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') |
96 | MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') |
97 | |
98 | |
99 | def create(r): |
100 | d = os.path.abspath(r) |
101 | if not os.path.isdir(d): |
102 | os.makedirs(d) |
103 | return d |
104 | |
105 | |
106 | def check(r): |
107 | d = os.path.abspath(r) |
108 | if not os.path.isdir(d): |
109 | return None |
110 | return d |
111 | |
112 | |
113 | @contextlib.contextmanager |
114 | def tmpdir(): |
115 | dirpath = tempfile.mkdtemp() |
116 | try: |
117 | yield dirpath |
118 | finally: |
119 | shutil.rmtree(dirpath, ignore_errors=True) |
120 | |
121 | |
122 | def parse_targets(in_targets): |
123 | targets = set() |
124 | for target in in_targets: |
125 | if not target: |
126 | continue |
127 | if target == 'all': |
128 | targets = targets.union(TARGETS) |
129 | elif target in TARGETS: |
130 | targets.add(target) |
131 | else: |
132 | raise RuntimeError('{} is not a valid target'.format(target)) |
133 | return list(targets) |
134 | |
135 | |
136 | def targets_parser(args, description): |
137 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
138 | parser.add_argument( |
139 | 'TARGET', |
140 | nargs='*', |
141 | type=str, |
142 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) |
143 | args, extra = parser.parse_known_args(args) |
144 | args.extra = extra |
145 | |
146 | args.TARGET = parse_targets(args.TARGET) |
147 | |
148 | return args |
149 | |
150 | |
151 | def parse_env_flags(args, flags): |
152 | """ |
153 | Look for flags set by environment variables. |
154 | """ |
155 | san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) |
156 | nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) |
157 | |
158 | def set_sanitizer(sanitizer, default, san, nosan): |
159 | if sanitizer in san and sanitizer in nosan: |
160 | raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. |
161 | format(s=sanitizer)) |
162 | if sanitizer in san: |
163 | return True |
164 | if sanitizer in nosan: |
165 | return False |
166 | return default |
167 | |
168 | san = set(san_flags.split(',')) |
169 | nosan = set(nosan_flags.split(',')) |
170 | |
171 | args.asan = set_sanitizer('address', args.asan, san, nosan) |
172 | args.msan = set_sanitizer('memory', args.msan, san, nosan) |
173 | args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) |
174 | |
175 | args.sanitize = args.asan or args.msan or args.ubsan |
176 | |
177 | return args |
178 | |
179 | |
180 | def compiler_version(cc, cxx): |
181 | """ |
182 | Determines the compiler and version. |
183 | Only works for clang and gcc. |
184 | """ |
185 | cc_version_bytes = subprocess.check_output([cc, "--version"]) |
186 | cxx_version_bytes = subprocess.check_output([cxx, "--version"]) |
187 | compiler = None |
188 | version = None |
189 | print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii'))) |
190 | if b'clang' in cc_version_bytes: |
191 | assert(b'clang' in cxx_version_bytes) |
192 | compiler = 'clang' |
193 | elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes: |
194 | assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) |
195 | compiler = 'gcc' |
196 | if compiler is not None: |
197 | version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)' |
198 | version_match = re.search(version_regex, cc_version_bytes) |
199 | version = tuple(int(version_match.group(i)) for i in range(1, 4)) |
200 | return compiler, version |
201 | |
202 | |
203 | def overflow_ubsan_flags(cc, cxx): |
204 | compiler, version = compiler_version(cc, cxx) |
205 | if compiler == 'gcc' and version < (8, 0, 0): |
206 | return ['-fno-sanitize=signed-integer-overflow'] |
207 | if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)): |
208 | return ['-fno-sanitize=pointer-overflow'] |
209 | return [] |
210 | |
211 | |
212 | def build_parser(args): |
213 | description = """ |
214 | Cleans the repository and builds a fuzz target (or all). |
215 | Many flags default to environment variables (default says $X='y'). |
216 | Options that aren't enabling features default to the correct values for |
217 | zstd. |
218 | Enable sanitizers with --enable-*san. |
219 | For regression testing just build. |
220 | For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. |
221 | For AFL set CC and CXX to AFL's compilers and set |
222 | LIB_FUZZING_ENGINE='libregression.a'. |
223 | """ |
224 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
225 | parser.add_argument( |
226 | '--lib-fuzzing-engine', |
227 | dest='lib_fuzzing_engine', |
228 | type=str, |
229 | default=LIB_FUZZING_ENGINE, |
230 | help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' |
231 | "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) |
232 | |
233 | fuzz_group = parser.add_mutually_exclusive_group() |
234 | fuzz_group.add_argument( |
235 | '--enable-coverage', |
236 | dest='coverage', |
237 | action='store_true', |
238 | help='Enable coverage instrumentation (-fsanitize-coverage)') |
239 | fuzz_group.add_argument( |
240 | '--enable-fuzzer', |
241 | dest='fuzzer', |
242 | action='store_true', |
243 | help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled ' |
244 | 'LIB_FUZZING_ENGINE is ignored') |
245 | ) |
246 | |
247 | parser.add_argument( |
248 | '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') |
249 | parser.add_argument( |
250 | '--enable-ubsan', |
251 | dest='ubsan', |
252 | action='store_true', |
253 | help='Enable UBSAN') |
254 | parser.add_argument( |
f535537f |
255 | '--disable-ubsan-pointer-overflow', |
648db22b |
256 | dest='ubsan_pointer_overflow', |
f535537f |
257 | action='store_false', |
258 | help='Disable UBSAN pointer overflow check (known failure)') |
648db22b |
259 | parser.add_argument( |
260 | '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') |
261 | parser.add_argument( |
262 | '--enable-msan-track-origins', dest='msan_track_origins', |
263 | action='store_true', help='Enable MSAN origin tracking') |
264 | parser.add_argument( |
265 | '--msan-extra-cppflags', |
266 | dest='msan_extra_cppflags', |
267 | type=str, |
268 | default=MSAN_EXTRA_CPPFLAGS, |
269 | help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". |
270 | format(MSAN_EXTRA_CPPFLAGS)) |
271 | parser.add_argument( |
272 | '--msan-extra-cflags', |
273 | dest='msan_extra_cflags', |
274 | type=str, |
275 | default=MSAN_EXTRA_CFLAGS, |
276 | help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( |
277 | MSAN_EXTRA_CFLAGS)) |
278 | parser.add_argument( |
279 | '--msan-extra-cxxflags', |
280 | dest='msan_extra_cxxflags', |
281 | type=str, |
282 | default=MSAN_EXTRA_CXXFLAGS, |
283 | help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". |
284 | format(MSAN_EXTRA_CXXFLAGS)) |
285 | parser.add_argument( |
286 | '--msan-extra-ldflags', |
287 | dest='msan_extra_ldflags', |
288 | type=str, |
289 | default=MSAN_EXTRA_LDFLAGS, |
290 | help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". |
291 | format(MSAN_EXTRA_LDFLAGS)) |
292 | parser.add_argument( |
293 | '--enable-sanitize-recover', |
294 | dest='sanitize_recover', |
295 | action='store_true', |
296 | help='Non-fatal sanitizer errors where possible') |
297 | parser.add_argument( |
298 | '--debug', |
299 | dest='debug', |
300 | type=int, |
301 | default=1, |
302 | help='Set DEBUGLEVEL (default: 1)') |
303 | parser.add_argument( |
304 | '--force-memory-access', |
305 | dest='memory_access', |
306 | type=int, |
307 | default=0, |
308 | help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') |
309 | parser.add_argument( |
310 | '--fuzz-rng-seed-size', |
311 | dest='fuzz_rng_seed_size', |
312 | type=int, |
313 | default=4, |
314 | help='Set FUZZ_RNG_SEED_SIZE (default: 4)') |
315 | parser.add_argument( |
316 | '--disable-fuzzing-mode', |
317 | dest='fuzzing_mode', |
318 | action='store_false', |
319 | help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION') |
320 | parser.add_argument( |
321 | '--enable-stateful-fuzzing', |
322 | dest='stateful_fuzzing', |
323 | action='store_true', |
324 | help='Reuse contexts between runs (makes reproduction impossible)') |
325 | parser.add_argument( |
326 | '--custom-seq-prod', |
327 | dest='third_party_seq_prod_obj', |
328 | type=str, |
329 | default=THIRD_PARTY_SEQ_PROD_OBJ, |
330 | help='Path to an object file with symbols for fuzzing your sequence producer plugin.') |
331 | parser.add_argument( |
332 | '--cc', |
333 | dest='cc', |
334 | type=str, |
335 | default=CC, |
336 | help="CC (default: $CC='{}')".format(CC)) |
337 | parser.add_argument( |
338 | '--cxx', |
339 | dest='cxx', |
340 | type=str, |
341 | default=CXX, |
342 | help="CXX (default: $CXX='{}')".format(CXX)) |
343 | parser.add_argument( |
344 | '--cppflags', |
345 | dest='cppflags', |
346 | type=str, |
347 | default=CPPFLAGS, |
348 | help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) |
349 | parser.add_argument( |
350 | '--cflags', |
351 | dest='cflags', |
352 | type=str, |
353 | default=CFLAGS, |
354 | help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) |
355 | parser.add_argument( |
356 | '--cxxflags', |
357 | dest='cxxflags', |
358 | type=str, |
359 | default=CXXFLAGS, |
360 | help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) |
361 | parser.add_argument( |
362 | '--ldflags', |
363 | dest='ldflags', |
364 | type=str, |
365 | default=LDFLAGS, |
366 | help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) |
367 | parser.add_argument( |
368 | '--mflags', |
369 | dest='mflags', |
370 | type=str, |
371 | default=MFLAGS, |
372 | help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) |
373 | parser.add_argument( |
374 | 'TARGET', |
375 | nargs='*', |
376 | type=str, |
377 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) |
378 | ) |
379 | args = parser.parse_args(args) |
380 | args = parse_env_flags(args, ' '.join( |
381 | [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) |
382 | |
383 | # Check option sanity |
384 | if args.msan and (args.asan or args.ubsan): |
385 | raise RuntimeError('MSAN may not be used with any other sanitizers') |
386 | if args.msan_track_origins and not args.msan: |
387 | raise RuntimeError('--enable-msan-track-origins requires MSAN') |
648db22b |
388 | if args.sanitize_recover and not args.sanitize: |
389 | raise RuntimeError('--enable-sanitize-recover but no sanitizers used') |
390 | |
391 | return args |
392 | |
393 | |
394 | def build(args): |
395 | try: |
396 | args = build_parser(args) |
397 | except Exception as e: |
398 | print(e) |
399 | return 1 |
400 | # The compilation flags we are setting |
401 | targets = args.TARGET |
402 | cc = args.cc |
403 | cxx = args.cxx |
404 | cppflags = shlex.split(args.cppflags) |
405 | cflags = shlex.split(args.cflags) |
406 | ldflags = shlex.split(args.ldflags) |
407 | cxxflags = shlex.split(args.cxxflags) |
408 | mflags = shlex.split(args.mflags) |
409 | # Flags to be added to both cflags and cxxflags |
f535537f |
410 | common_flags = [ |
411 | '-Werror', |
412 | '-Wno-error=declaration-after-statement', |
413 | '-Wno-error=c++-compat', |
414 | '-Wno-error=deprecated' # C files are sometimes compiled with CXX |
415 | ] |
648db22b |
416 | |
417 | cppflags += [ |
418 | '-DDEBUGLEVEL={}'.format(args.debug), |
419 | '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), |
420 | '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), |
421 | ] |
422 | |
423 | # Set flags for options |
424 | assert not (args.fuzzer and args.coverage) |
425 | if args.coverage: |
426 | common_flags += [ |
427 | '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' |
428 | ] |
429 | if args.fuzzer: |
430 | common_flags += ['-fsanitize=fuzzer'] |
431 | args.lib_fuzzing_engine = '' |
432 | |
433 | mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] |
434 | |
435 | if args.sanitize_recover: |
436 | recover_flags = ['-fsanitize-recover=all'] |
437 | else: |
438 | recover_flags = ['-fno-sanitize-recover=all'] |
439 | if args.sanitize: |
440 | common_flags += recover_flags |
441 | |
442 | if args.msan: |
443 | msan_flags = ['-fsanitize=memory'] |
444 | if args.msan_track_origins: |
445 | msan_flags += ['-fsanitize-memory-track-origins'] |
446 | common_flags += msan_flags |
447 | # Append extra MSAN flags (it might require special setup) |
448 | cppflags += [args.msan_extra_cppflags] |
449 | cflags += [args.msan_extra_cflags] |
450 | cxxflags += [args.msan_extra_cxxflags] |
451 | ldflags += [args.msan_extra_ldflags] |
452 | |
453 | if args.asan: |
454 | common_flags += ['-fsanitize=address'] |
455 | |
456 | if args.ubsan: |
457 | ubsan_flags = ['-fsanitize=undefined'] |
458 | if not args.ubsan_pointer_overflow: |
459 | ubsan_flags += overflow_ubsan_flags(cc, cxx) |
460 | common_flags += ubsan_flags |
461 | |
462 | if args.stateful_fuzzing: |
463 | cppflags += ['-DSTATEFUL_FUZZING'] |
464 | |
465 | if args.third_party_seq_prod_obj: |
466 | cppflags += ['-DFUZZ_THIRD_PARTY_SEQ_PROD'] |
467 | mflags += ['THIRD_PARTY_SEQ_PROD_OBJ={}'.format(args.third_party_seq_prod_obj)] |
468 | |
469 | if args.fuzzing_mode: |
470 | cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION'] |
471 | |
472 | if args.lib_fuzzing_engine == 'libregression.a': |
473 | targets = ['libregression.a'] + targets |
474 | |
475 | # Append the common flags |
476 | cflags += common_flags |
477 | cxxflags += common_flags |
478 | |
479 | # Prepare the flags for Make |
480 | cc_str = "CC={}".format(cc) |
481 | cxx_str = "CXX={}".format(cxx) |
482 | cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) |
483 | cflags_str = "CFLAGS={}".format(' '.join(cflags)) |
484 | cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) |
485 | ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) |
486 | |
487 | # Print the flags |
488 | print('MFLAGS={}'.format(' '.join(mflags))) |
489 | print(cc_str) |
490 | print(cxx_str) |
491 | print(cppflags_str) |
492 | print(cflags_str) |
493 | print(cxxflags_str) |
494 | print(ldflags_str) |
495 | |
496 | # Clean and build |
497 | clean_cmd = ['make', 'clean'] + mflags |
498 | print(' '.join(clean_cmd)) |
499 | subprocess.check_call(clean_cmd) |
500 | build_cmd = [ |
501 | 'make', |
f535537f |
502 | '-j', |
648db22b |
503 | cc_str, |
504 | cxx_str, |
505 | cppflags_str, |
506 | cflags_str, |
507 | cxxflags_str, |
508 | ldflags_str, |
509 | ] + mflags + targets |
510 | print(' '.join(build_cmd)) |
511 | subprocess.check_call(build_cmd) |
512 | return 0 |
513 | |
514 | |
515 | def libfuzzer_parser(args): |
516 | description = """ |
517 | Runs a libfuzzer binary. |
518 | Passes all extra arguments to libfuzzer. |
519 | The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to |
520 | libFuzzer.a. |
521 | Generates output in the CORPORA directory, puts crashes in the ARTIFACT |
522 | directory, and takes extra input from the SEED directory. |
523 | To merge AFL's output pass the SEED as AFL's output directory and pass |
524 | '-merge=1'. |
525 | """ |
526 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
527 | parser.add_argument( |
528 | '--corpora', |
529 | type=str, |
530 | help='Override the default corpora dir (default: {})'.format( |
531 | abs_join(CORPORA_DIR, 'TARGET'))) |
532 | parser.add_argument( |
533 | '--artifact', |
534 | type=str, |
535 | help='Override the default artifact dir (default: {})'.format( |
536 | abs_join(CORPORA_DIR, 'TARGET-crash'))) |
537 | parser.add_argument( |
538 | '--seed', |
539 | type=str, |
540 | help='Override the default seed dir (default: {})'.format( |
541 | abs_join(CORPORA_DIR, 'TARGET-seed'))) |
542 | parser.add_argument( |
543 | 'TARGET', |
544 | type=str, |
545 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) |
546 | args, extra = parser.parse_known_args(args) |
547 | args.extra = extra |
548 | |
549 | if args.TARGET and args.TARGET not in TARGETS: |
550 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) |
551 | |
552 | return args |
553 | |
554 | |
555 | def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): |
556 | if corpora is None: |
557 | corpora = abs_join(CORPORA_DIR, target) |
558 | if artifact is None: |
559 | artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) |
560 | if seed is None: |
561 | seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) |
562 | if extra_args is None: |
563 | extra_args = [] |
564 | |
565 | target = abs_join(FUZZ_DIR, target) |
566 | |
567 | corpora = [create(corpora)] |
568 | artifact = create(artifact) |
569 | seed = check(seed) |
570 | |
571 | corpora += [artifact] |
572 | if seed is not None: |
573 | corpora += [seed] |
574 | |
575 | cmd = [target, '-artifact_prefix={}/'.format(artifact)] |
576 | cmd += corpora + extra_args |
577 | print(' '.join(cmd)) |
578 | subprocess.check_call(cmd) |
579 | |
580 | |
581 | def libfuzzer_cmd(args): |
582 | try: |
583 | args = libfuzzer_parser(args) |
584 | except Exception as e: |
585 | print(e) |
586 | return 1 |
587 | libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) |
588 | return 0 |
589 | |
590 | |
591 | def afl_parser(args): |
592 | description = """ |
593 | Runs an afl-fuzz job. |
594 | Passes all extra arguments to afl-fuzz. |
595 | The fuzzer should have been built with CC/CXX set to the AFL compilers, |
596 | and with LIB_FUZZING_ENGINE='libregression.a'. |
597 | Takes input from CORPORA and writes output to OUTPUT. |
598 | Uses AFL_FUZZ as the binary (set from flag or environment variable). |
599 | """ |
600 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
601 | parser.add_argument( |
602 | '--corpora', |
603 | type=str, |
604 | help='Override the default corpora dir (default: {})'.format( |
605 | abs_join(CORPORA_DIR, 'TARGET'))) |
606 | parser.add_argument( |
607 | '--output', |
608 | type=str, |
609 | help='Override the default AFL output dir (default: {})'.format( |
610 | abs_join(CORPORA_DIR, 'TARGET-afl'))) |
611 | parser.add_argument( |
612 | '--afl-fuzz', |
613 | type=str, |
614 | default=AFL_FUZZ, |
615 | help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) |
616 | parser.add_argument( |
617 | 'TARGET', |
618 | type=str, |
619 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) |
620 | args, extra = parser.parse_known_args(args) |
621 | args.extra = extra |
622 | |
623 | if args.TARGET and args.TARGET not in TARGETS: |
624 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) |
625 | |
626 | if not args.corpora: |
627 | args.corpora = abs_join(CORPORA_DIR, args.TARGET) |
628 | if not args.output: |
629 | args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) |
630 | |
631 | return args |
632 | |
633 | |
634 | def afl(args): |
635 | try: |
636 | args = afl_parser(args) |
637 | except Exception as e: |
638 | print(e) |
639 | return 1 |
640 | target = abs_join(FUZZ_DIR, args.TARGET) |
641 | |
642 | corpora = create(args.corpora) |
643 | output = create(args.output) |
644 | |
645 | cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra |
646 | cmd += [target, '@@'] |
647 | print(' '.join(cmd)) |
648 | subprocess.call(cmd) |
649 | return 0 |
650 | |
651 | |
652 | def regression(args): |
653 | try: |
654 | description = """ |
655 | Runs one or more regression tests. |
656 | The fuzzer should have been built with |
657 | LIB_FUZZING_ENGINE='libregression.a'. |
658 | Takes input from CORPORA. |
659 | """ |
660 | args = targets_parser(args, description) |
661 | except Exception as e: |
662 | print(e) |
663 | return 1 |
664 | for target in args.TARGET: |
665 | corpora = create(abs_join(CORPORA_DIR, target)) |
666 | target = abs_join(FUZZ_DIR, target) |
667 | cmd = [target, corpora] |
668 | print(' '.join(cmd)) |
669 | subprocess.check_call(cmd) |
670 | return 0 |
671 | |
672 | |
673 | def gen_parser(args): |
674 | description = """ |
675 | Generate a seed corpus appropriate for TARGET with data generated with |
676 | decodecorpus. |
677 | The fuzz inputs are prepended with a seed before the zstd data, so the |
678 | output of decodecorpus shouldn't be used directly. |
679 | Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and |
680 | puts the output in SEED. |
681 | DECODECORPUS is the decodecorpus binary, and must already be built. |
682 | """ |
683 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
684 | parser.add_argument( |
685 | '--number', |
686 | '-n', |
687 | type=int, |
688 | default=100, |
689 | help='Number of samples to generate') |
690 | parser.add_argument( |
691 | '--max-size-log', |
692 | type=int, |
693 | default=18, |
694 | help='Maximum sample size to generate') |
695 | parser.add_argument( |
696 | '--seed', |
697 | type=str, |
698 | help='Override the default seed dir (default: {})'.format( |
699 | abs_join(CORPORA_DIR, 'TARGET-seed'))) |
700 | parser.add_argument( |
701 | '--decodecorpus', |
702 | type=str, |
703 | default=DECODECORPUS, |
704 | help="decodecorpus binary (default: $DECODECORPUS='{}')".format( |
705 | DECODECORPUS)) |
706 | parser.add_argument( |
707 | '--zstd', |
708 | type=str, |
709 | default=ZSTD, |
710 | help="zstd binary (default: $ZSTD='{}')".format(ZSTD)) |
711 | parser.add_argument( |
712 | '--fuzz-rng-seed-size', |
713 | type=int, |
714 | default=4, |
715 | help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" |
716 | ) |
717 | parser.add_argument( |
718 | 'TARGET', |
719 | type=str, |
720 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) |
721 | args, extra = parser.parse_known_args(args) |
722 | args.extra = extra |
723 | |
724 | if args.TARGET and args.TARGET not in TARGETS: |
725 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) |
726 | |
727 | if not args.seed: |
728 | args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) |
729 | |
730 | if not os.path.isfile(args.decodecorpus): |
731 | raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". |
732 | format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) |
733 | |
734 | return args |
735 | |
736 | |
737 | def gen(args): |
738 | try: |
739 | args = gen_parser(args) |
740 | except Exception as e: |
741 | print(e) |
742 | return 1 |
743 | |
744 | seed = create(args.seed) |
745 | with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict: |
746 | info = TARGET_INFO[args.TARGET] |
747 | |
748 | if info.input_type == InputType.DICTIONARY_DATA: |
749 | number = max(args.number, 1000) |
750 | else: |
751 | number = args.number |
752 | cmd = [ |
753 | args.decodecorpus, |
754 | '-n{}'.format(args.number), |
755 | '-p{}/'.format(compressed), |
756 | '-o{}'.format(decompressed), |
757 | ] |
758 | |
759 | if info.frame_type == FrameType.BLOCK: |
760 | cmd += [ |
761 | '--gen-blocks', |
762 | '--max-block-size-log={}'.format(min(args.max_size_log, 17)) |
763 | ] |
764 | else: |
765 | cmd += ['--max-content-size-log={}'.format(args.max_size_log)] |
766 | |
767 | print(' '.join(cmd)) |
768 | subprocess.check_call(cmd) |
769 | |
770 | if info.input_type == InputType.RAW_DATA: |
771 | print('using decompressed data in {}'.format(decompressed)) |
772 | samples = decompressed |
773 | elif info.input_type == InputType.COMPRESSED_DATA: |
774 | print('using compressed data in {}'.format(compressed)) |
775 | samples = compressed |
776 | else: |
777 | assert info.input_type == InputType.DICTIONARY_DATA |
778 | print('making dictionary data from {}'.format(decompressed)) |
779 | samples = dict |
780 | min_dict_size_log = 9 |
781 | max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log) |
782 | for dict_size_log in range(min_dict_size_log, max_dict_size_log): |
783 | dict_size = 1 << dict_size_log |
784 | cmd = [ |
785 | args.zstd, |
786 | '--train', |
787 | '-r', decompressed, |
788 | '--maxdict={}'.format(dict_size), |
789 | '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size)) |
790 | ] |
791 | print(' '.join(cmd)) |
792 | subprocess.check_call(cmd) |
793 | |
794 | # Copy the samples over and prepend the RNG seeds |
795 | for name in os.listdir(samples): |
796 | samplename = abs_join(samples, name) |
797 | outname = abs_join(seed, name) |
798 | with open(samplename, 'rb') as sample: |
799 | with open(outname, 'wb') as out: |
800 | CHUNK_SIZE = 131072 |
801 | chunk = sample.read(CHUNK_SIZE) |
802 | while len(chunk) > 0: |
803 | out.write(chunk) |
804 | chunk = sample.read(CHUNK_SIZE) |
805 | return 0 |
806 | |
807 | |
808 | def minimize(args): |
809 | try: |
810 | description = """ |
811 | Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in |
812 | TARGET_seed_corpus. All extra args are passed to libfuzzer. |
813 | """ |
814 | args = targets_parser(args, description) |
815 | except Exception as e: |
816 | print(e) |
817 | return 1 |
818 | |
819 | for target in args.TARGET: |
820 | # Merge the corpus + anything else into the seed_corpus |
821 | corpus = abs_join(CORPORA_DIR, target) |
822 | seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) |
823 | extra_args = [corpus, "-merge=1"] + args.extra |
824 | libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) |
825 | seeds = set(os.listdir(seed_corpus)) |
826 | # Copy all crashes directly into the seed_corpus if not already present |
827 | crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) |
828 | for crash in os.listdir(crashes): |
829 | if crash not in seeds: |
830 | shutil.copy(abs_join(crashes, crash), seed_corpus) |
831 | seeds.add(crash) |
832 | |
833 | |
834 | def zip_cmd(args): |
835 | try: |
836 | description = """ |
837 | Zips up the seed corpus. |
838 | """ |
839 | args = targets_parser(args, description) |
840 | except Exception as e: |
841 | print(e) |
842 | return 1 |
843 | |
844 | for target in args.TARGET: |
845 | # Zip the seed_corpus |
846 | seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) |
847 | zip_file = "{}.zip".format(seed_corpus) |
848 | cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."] |
849 | print(' '.join(cmd)) |
850 | subprocess.check_call(cmd, cwd=seed_corpus) |
851 | |
852 | |
853 | def list_cmd(args): |
854 | print("\n".join(TARGETS)) |
855 | |
856 | |
857 | def short_help(args): |
858 | name = args[0] |
859 | print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) |
860 | |
861 | |
862 | def help(args): |
863 | short_help(args) |
864 | print("\tfuzzing helpers (select a command and pass -h for help)\n") |
865 | print("Options:") |
866 | print("\t-h, --help\tPrint this message") |
867 | print("") |
868 | print("Commands:") |
869 | print("\tbuild\t\tBuild a fuzzer") |
870 | print("\tlibfuzzer\tRun a libFuzzer fuzzer") |
871 | print("\tafl\t\tRun an AFL fuzzer") |
872 | print("\tregression\tRun a regression test") |
873 | print("\tgen\t\tGenerate a seed corpus for a fuzzer") |
874 | print("\tminimize\tMinimize the test corpora") |
875 | print("\tzip\t\tZip the minimized corpora up") |
876 | print("\tlist\t\tList the available targets") |
877 | |
878 | |
879 | def main(): |
880 | args = sys.argv |
881 | if len(args) < 2: |
882 | help(args) |
883 | return 1 |
884 | if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': |
885 | help(args) |
886 | return 1 |
887 | command = args.pop(1) |
888 | args[0] = "{} {}".format(args[0], command) |
889 | if command == "build": |
890 | return build(args) |
891 | if command == "libfuzzer": |
892 | return libfuzzer_cmd(args) |
893 | if command == "regression": |
894 | return regression(args) |
895 | if command == "afl": |
896 | return afl(args) |
897 | if command == "gen": |
898 | return gen(args) |
899 | if command == "minimize": |
900 | return minimize(args) |
901 | if command == "zip": |
902 | return zip_cmd(args) |
903 | if command == "list": |
904 | return list_cmd(args) |
905 | short_help(args) |
906 | print("Error: No such command {} (pass -h for help)".format(command)) |
907 | return 1 |
908 | |
909 | |
910 | if __name__ == "__main__": |
911 | sys.exit(main()) |