648db22b |
1 | #!/usr/bin/env python |
2 | |
3 | # ################################################################ |
4 | # Copyright (c) Meta Platforms, Inc. and affiliates. |
5 | # All rights reserved. |
6 | # |
7 | # This source code is licensed under both the BSD-style license (found in the |
8 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found |
9 | # in the COPYING file in the root directory of this source tree). |
10 | # You may select, at your option, one of the above-listed licenses. |
11 | # ########################################################################## |
12 | |
13 | import argparse |
14 | import contextlib |
15 | import os |
16 | import re |
17 | import shlex |
18 | import shutil |
19 | import subprocess |
20 | import sys |
21 | import tempfile |
22 | |
23 | |
24 | def abs_join(a, *p): |
25 | return os.path.abspath(os.path.join(a, *p)) |
26 | |
27 | |
28 | class InputType(object): |
29 | RAW_DATA = 1 |
30 | COMPRESSED_DATA = 2 |
31 | DICTIONARY_DATA = 3 |
32 | |
33 | |
34 | class FrameType(object): |
35 | ZSTD = 1 |
36 | BLOCK = 2 |
37 | |
38 | |
39 | class TargetInfo(object): |
40 | def __init__(self, input_type, frame_type=FrameType.ZSTD): |
41 | self.input_type = input_type |
42 | self.frame_type = frame_type |
43 | |
44 | |
45 | # Constants |
46 | FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) |
47 | CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') |
48 | TARGET_INFO = { |
49 | 'simple_round_trip': TargetInfo(InputType.RAW_DATA), |
50 | 'stream_round_trip': TargetInfo(InputType.RAW_DATA), |
51 | 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK), |
52 | 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA), |
53 | 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA), |
54 | 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK), |
55 | 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA), |
56 | 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), |
57 | 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), |
58 | 'simple_compress': TargetInfo(InputType.RAW_DATA), |
59 | 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), |
60 | 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA), |
61 | 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), |
62 | 'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA), |
63 | 'fse_read_ncount': TargetInfo(InputType.RAW_DATA), |
64 | 'sequence_compression_api': TargetInfo(InputType.RAW_DATA), |
65 | 'seekable_roundtrip': TargetInfo(InputType.RAW_DATA), |
66 | 'huf_round_trip': TargetInfo(InputType.RAW_DATA), |
67 | 'huf_decompress': TargetInfo(InputType.RAW_DATA), |
68 | } |
69 | TARGETS = list(TARGET_INFO.keys()) |
70 | ALL_TARGETS = TARGETS + ['all'] |
71 | FUZZ_RNG_SEED_SIZE = 4 |
72 | |
73 | # Standard environment variables |
74 | CC = os.environ.get('CC', 'cc') |
75 | CXX = os.environ.get('CXX', 'c++') |
76 | CPPFLAGS = os.environ.get('CPPFLAGS', '') |
77 | CFLAGS = os.environ.get('CFLAGS', '-O3') |
78 | CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) |
79 | LDFLAGS = os.environ.get('LDFLAGS', '') |
80 | MFLAGS = os.environ.get('MFLAGS', '-j') |
81 | THIRD_PARTY_SEQ_PROD_OBJ = os.environ.get('THIRD_PARTY_SEQ_PROD_OBJ', '') |
82 | |
83 | # Fuzzing environment variables |
84 | LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') |
85 | AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') |
86 | DECODECORPUS = os.environ.get('DECODECORPUS', |
87 | abs_join(FUZZ_DIR, '..', 'decodecorpus')) |
88 | ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd')) |
89 | |
90 | # Sanitizer environment variables |
91 | MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') |
92 | MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') |
93 | MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') |
94 | MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') |
95 | |
96 | |
97 | def create(r): |
98 | d = os.path.abspath(r) |
99 | if not os.path.isdir(d): |
100 | os.makedirs(d) |
101 | return d |
102 | |
103 | |
104 | def check(r): |
105 | d = os.path.abspath(r) |
106 | if not os.path.isdir(d): |
107 | return None |
108 | return d |
109 | |
110 | |
111 | @contextlib.contextmanager |
112 | def tmpdir(): |
113 | dirpath = tempfile.mkdtemp() |
114 | try: |
115 | yield dirpath |
116 | finally: |
117 | shutil.rmtree(dirpath, ignore_errors=True) |
118 | |
119 | |
120 | def parse_targets(in_targets): |
121 | targets = set() |
122 | for target in in_targets: |
123 | if not target: |
124 | continue |
125 | if target == 'all': |
126 | targets = targets.union(TARGETS) |
127 | elif target in TARGETS: |
128 | targets.add(target) |
129 | else: |
130 | raise RuntimeError('{} is not a valid target'.format(target)) |
131 | return list(targets) |
132 | |
133 | |
134 | def targets_parser(args, description): |
135 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
136 | parser.add_argument( |
137 | 'TARGET', |
138 | nargs='*', |
139 | type=str, |
140 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) |
141 | args, extra = parser.parse_known_args(args) |
142 | args.extra = extra |
143 | |
144 | args.TARGET = parse_targets(args.TARGET) |
145 | |
146 | return args |
147 | |
148 | |
149 | def parse_env_flags(args, flags): |
150 | """ |
151 | Look for flags set by environment variables. |
152 | """ |
153 | san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) |
154 | nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) |
155 | |
156 | def set_sanitizer(sanitizer, default, san, nosan): |
157 | if sanitizer in san and sanitizer in nosan: |
158 | raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. |
159 | format(s=sanitizer)) |
160 | if sanitizer in san: |
161 | return True |
162 | if sanitizer in nosan: |
163 | return False |
164 | return default |
165 | |
166 | san = set(san_flags.split(',')) |
167 | nosan = set(nosan_flags.split(',')) |
168 | |
169 | args.asan = set_sanitizer('address', args.asan, san, nosan) |
170 | args.msan = set_sanitizer('memory', args.msan, san, nosan) |
171 | args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) |
172 | |
173 | args.sanitize = args.asan or args.msan or args.ubsan |
174 | |
175 | return args |
176 | |
177 | |
178 | def compiler_version(cc, cxx): |
179 | """ |
180 | Determines the compiler and version. |
181 | Only works for clang and gcc. |
182 | """ |
183 | cc_version_bytes = subprocess.check_output([cc, "--version"]) |
184 | cxx_version_bytes = subprocess.check_output([cxx, "--version"]) |
185 | compiler = None |
186 | version = None |
187 | print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii'))) |
188 | if b'clang' in cc_version_bytes: |
189 | assert(b'clang' in cxx_version_bytes) |
190 | compiler = 'clang' |
191 | elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes: |
192 | assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) |
193 | compiler = 'gcc' |
194 | if compiler is not None: |
195 | version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)' |
196 | version_match = re.search(version_regex, cc_version_bytes) |
197 | version = tuple(int(version_match.group(i)) for i in range(1, 4)) |
198 | return compiler, version |
199 | |
200 | |
201 | def overflow_ubsan_flags(cc, cxx): |
202 | compiler, version = compiler_version(cc, cxx) |
203 | if compiler == 'gcc' and version < (8, 0, 0): |
204 | return ['-fno-sanitize=signed-integer-overflow'] |
205 | if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)): |
206 | return ['-fno-sanitize=pointer-overflow'] |
207 | return [] |
208 | |
209 | |
210 | def build_parser(args): |
211 | description = """ |
212 | Cleans the repository and builds a fuzz target (or all). |
213 | Many flags default to environment variables (default says $X='y'). |
214 | Options that aren't enabling features default to the correct values for |
215 | zstd. |
216 | Enable sanitizers with --enable-*san. |
217 | For regression testing just build. |
218 | For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. |
219 | For AFL set CC and CXX to AFL's compilers and set |
220 | LIB_FUZZING_ENGINE='libregression.a'. |
221 | """ |
222 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
223 | parser.add_argument( |
224 | '--lib-fuzzing-engine', |
225 | dest='lib_fuzzing_engine', |
226 | type=str, |
227 | default=LIB_FUZZING_ENGINE, |
228 | help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' |
229 | "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) |
230 | |
231 | fuzz_group = parser.add_mutually_exclusive_group() |
232 | fuzz_group.add_argument( |
233 | '--enable-coverage', |
234 | dest='coverage', |
235 | action='store_true', |
236 | help='Enable coverage instrumentation (-fsanitize-coverage)') |
237 | fuzz_group.add_argument( |
238 | '--enable-fuzzer', |
239 | dest='fuzzer', |
240 | action='store_true', |
241 | help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled ' |
242 | 'LIB_FUZZING_ENGINE is ignored') |
243 | ) |
244 | |
245 | parser.add_argument( |
246 | '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') |
247 | parser.add_argument( |
248 | '--enable-ubsan', |
249 | dest='ubsan', |
250 | action='store_true', |
251 | help='Enable UBSAN') |
252 | parser.add_argument( |
253 | '--enable-ubsan-pointer-overflow', |
254 | dest='ubsan_pointer_overflow', |
255 | action='store_true', |
256 | help='Enable UBSAN pointer overflow check (known failure)') |
257 | parser.add_argument( |
258 | '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') |
259 | parser.add_argument( |
260 | '--enable-msan-track-origins', dest='msan_track_origins', |
261 | action='store_true', help='Enable MSAN origin tracking') |
262 | parser.add_argument( |
263 | '--msan-extra-cppflags', |
264 | dest='msan_extra_cppflags', |
265 | type=str, |
266 | default=MSAN_EXTRA_CPPFLAGS, |
267 | help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". |
268 | format(MSAN_EXTRA_CPPFLAGS)) |
269 | parser.add_argument( |
270 | '--msan-extra-cflags', |
271 | dest='msan_extra_cflags', |
272 | type=str, |
273 | default=MSAN_EXTRA_CFLAGS, |
274 | help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( |
275 | MSAN_EXTRA_CFLAGS)) |
276 | parser.add_argument( |
277 | '--msan-extra-cxxflags', |
278 | dest='msan_extra_cxxflags', |
279 | type=str, |
280 | default=MSAN_EXTRA_CXXFLAGS, |
281 | help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". |
282 | format(MSAN_EXTRA_CXXFLAGS)) |
283 | parser.add_argument( |
284 | '--msan-extra-ldflags', |
285 | dest='msan_extra_ldflags', |
286 | type=str, |
287 | default=MSAN_EXTRA_LDFLAGS, |
288 | help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". |
289 | format(MSAN_EXTRA_LDFLAGS)) |
290 | parser.add_argument( |
291 | '--enable-sanitize-recover', |
292 | dest='sanitize_recover', |
293 | action='store_true', |
294 | help='Non-fatal sanitizer errors where possible') |
295 | parser.add_argument( |
296 | '--debug', |
297 | dest='debug', |
298 | type=int, |
299 | default=1, |
300 | help='Set DEBUGLEVEL (default: 1)') |
301 | parser.add_argument( |
302 | '--force-memory-access', |
303 | dest='memory_access', |
304 | type=int, |
305 | default=0, |
306 | help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') |
307 | parser.add_argument( |
308 | '--fuzz-rng-seed-size', |
309 | dest='fuzz_rng_seed_size', |
310 | type=int, |
311 | default=4, |
312 | help='Set FUZZ_RNG_SEED_SIZE (default: 4)') |
313 | parser.add_argument( |
314 | '--disable-fuzzing-mode', |
315 | dest='fuzzing_mode', |
316 | action='store_false', |
317 | help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION') |
318 | parser.add_argument( |
319 | '--enable-stateful-fuzzing', |
320 | dest='stateful_fuzzing', |
321 | action='store_true', |
322 | help='Reuse contexts between runs (makes reproduction impossible)') |
323 | parser.add_argument( |
324 | '--custom-seq-prod', |
325 | dest='third_party_seq_prod_obj', |
326 | type=str, |
327 | default=THIRD_PARTY_SEQ_PROD_OBJ, |
328 | help='Path to an object file with symbols for fuzzing your sequence producer plugin.') |
329 | parser.add_argument( |
330 | '--cc', |
331 | dest='cc', |
332 | type=str, |
333 | default=CC, |
334 | help="CC (default: $CC='{}')".format(CC)) |
335 | parser.add_argument( |
336 | '--cxx', |
337 | dest='cxx', |
338 | type=str, |
339 | default=CXX, |
340 | help="CXX (default: $CXX='{}')".format(CXX)) |
341 | parser.add_argument( |
342 | '--cppflags', |
343 | dest='cppflags', |
344 | type=str, |
345 | default=CPPFLAGS, |
346 | help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) |
347 | parser.add_argument( |
348 | '--cflags', |
349 | dest='cflags', |
350 | type=str, |
351 | default=CFLAGS, |
352 | help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) |
353 | parser.add_argument( |
354 | '--cxxflags', |
355 | dest='cxxflags', |
356 | type=str, |
357 | default=CXXFLAGS, |
358 | help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) |
359 | parser.add_argument( |
360 | '--ldflags', |
361 | dest='ldflags', |
362 | type=str, |
363 | default=LDFLAGS, |
364 | help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) |
365 | parser.add_argument( |
366 | '--mflags', |
367 | dest='mflags', |
368 | type=str, |
369 | default=MFLAGS, |
370 | help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) |
371 | parser.add_argument( |
372 | 'TARGET', |
373 | nargs='*', |
374 | type=str, |
375 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) |
376 | ) |
377 | args = parser.parse_args(args) |
378 | args = parse_env_flags(args, ' '.join( |
379 | [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) |
380 | |
381 | # Check option sanity |
382 | if args.msan and (args.asan or args.ubsan): |
383 | raise RuntimeError('MSAN may not be used with any other sanitizers') |
384 | if args.msan_track_origins and not args.msan: |
385 | raise RuntimeError('--enable-msan-track-origins requires MSAN') |
386 | if args.ubsan_pointer_overflow and not args.ubsan: |
387 | raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN') |
388 | if args.sanitize_recover and not args.sanitize: |
389 | raise RuntimeError('--enable-sanitize-recover but no sanitizers used') |
390 | |
391 | return args |
392 | |
393 | |
394 | def build(args): |
395 | try: |
396 | args = build_parser(args) |
397 | except Exception as e: |
398 | print(e) |
399 | return 1 |
400 | # The compilation flags we are setting |
401 | targets = args.TARGET |
402 | cc = args.cc |
403 | cxx = args.cxx |
404 | cppflags = shlex.split(args.cppflags) |
405 | cflags = shlex.split(args.cflags) |
406 | ldflags = shlex.split(args.ldflags) |
407 | cxxflags = shlex.split(args.cxxflags) |
408 | mflags = shlex.split(args.mflags) |
409 | # Flags to be added to both cflags and cxxflags |
410 | common_flags = [] |
411 | |
412 | cppflags += [ |
413 | '-DDEBUGLEVEL={}'.format(args.debug), |
414 | '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), |
415 | '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), |
416 | ] |
417 | |
418 | # Set flags for options |
419 | assert not (args.fuzzer and args.coverage) |
420 | if args.coverage: |
421 | common_flags += [ |
422 | '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' |
423 | ] |
424 | if args.fuzzer: |
425 | common_flags += ['-fsanitize=fuzzer'] |
426 | args.lib_fuzzing_engine = '' |
427 | |
428 | mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] |
429 | |
430 | if args.sanitize_recover: |
431 | recover_flags = ['-fsanitize-recover=all'] |
432 | else: |
433 | recover_flags = ['-fno-sanitize-recover=all'] |
434 | if args.sanitize: |
435 | common_flags += recover_flags |
436 | |
437 | if args.msan: |
438 | msan_flags = ['-fsanitize=memory'] |
439 | if args.msan_track_origins: |
440 | msan_flags += ['-fsanitize-memory-track-origins'] |
441 | common_flags += msan_flags |
442 | # Append extra MSAN flags (it might require special setup) |
443 | cppflags += [args.msan_extra_cppflags] |
444 | cflags += [args.msan_extra_cflags] |
445 | cxxflags += [args.msan_extra_cxxflags] |
446 | ldflags += [args.msan_extra_ldflags] |
447 | |
448 | if args.asan: |
449 | common_flags += ['-fsanitize=address'] |
450 | |
451 | if args.ubsan: |
452 | ubsan_flags = ['-fsanitize=undefined'] |
453 | if not args.ubsan_pointer_overflow: |
454 | ubsan_flags += overflow_ubsan_flags(cc, cxx) |
455 | common_flags += ubsan_flags |
456 | |
457 | if args.stateful_fuzzing: |
458 | cppflags += ['-DSTATEFUL_FUZZING'] |
459 | |
460 | if args.third_party_seq_prod_obj: |
461 | cppflags += ['-DFUZZ_THIRD_PARTY_SEQ_PROD'] |
462 | mflags += ['THIRD_PARTY_SEQ_PROD_OBJ={}'.format(args.third_party_seq_prod_obj)] |
463 | |
464 | if args.fuzzing_mode: |
465 | cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION'] |
466 | |
467 | if args.lib_fuzzing_engine == 'libregression.a': |
468 | targets = ['libregression.a'] + targets |
469 | |
470 | # Append the common flags |
471 | cflags += common_flags |
472 | cxxflags += common_flags |
473 | |
474 | # Prepare the flags for Make |
475 | cc_str = "CC={}".format(cc) |
476 | cxx_str = "CXX={}".format(cxx) |
477 | cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) |
478 | cflags_str = "CFLAGS={}".format(' '.join(cflags)) |
479 | cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) |
480 | ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) |
481 | |
482 | # Print the flags |
483 | print('MFLAGS={}'.format(' '.join(mflags))) |
484 | print(cc_str) |
485 | print(cxx_str) |
486 | print(cppflags_str) |
487 | print(cflags_str) |
488 | print(cxxflags_str) |
489 | print(ldflags_str) |
490 | |
491 | # Clean and build |
492 | clean_cmd = ['make', 'clean'] + mflags |
493 | print(' '.join(clean_cmd)) |
494 | subprocess.check_call(clean_cmd) |
495 | build_cmd = [ |
496 | 'make', |
497 | cc_str, |
498 | cxx_str, |
499 | cppflags_str, |
500 | cflags_str, |
501 | cxxflags_str, |
502 | ldflags_str, |
503 | ] + mflags + targets |
504 | print(' '.join(build_cmd)) |
505 | subprocess.check_call(build_cmd) |
506 | return 0 |
507 | |
508 | |
509 | def libfuzzer_parser(args): |
510 | description = """ |
511 | Runs a libfuzzer binary. |
512 | Passes all extra arguments to libfuzzer. |
513 | The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to |
514 | libFuzzer.a. |
515 | Generates output in the CORPORA directory, puts crashes in the ARTIFACT |
516 | directory, and takes extra input from the SEED directory. |
517 | To merge AFL's output pass the SEED as AFL's output directory and pass |
518 | '-merge=1'. |
519 | """ |
520 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
521 | parser.add_argument( |
522 | '--corpora', |
523 | type=str, |
524 | help='Override the default corpora dir (default: {})'.format( |
525 | abs_join(CORPORA_DIR, 'TARGET'))) |
526 | parser.add_argument( |
527 | '--artifact', |
528 | type=str, |
529 | help='Override the default artifact dir (default: {})'.format( |
530 | abs_join(CORPORA_DIR, 'TARGET-crash'))) |
531 | parser.add_argument( |
532 | '--seed', |
533 | type=str, |
534 | help='Override the default seed dir (default: {})'.format( |
535 | abs_join(CORPORA_DIR, 'TARGET-seed'))) |
536 | parser.add_argument( |
537 | 'TARGET', |
538 | type=str, |
539 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) |
540 | args, extra = parser.parse_known_args(args) |
541 | args.extra = extra |
542 | |
543 | if args.TARGET and args.TARGET not in TARGETS: |
544 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) |
545 | |
546 | return args |
547 | |
548 | |
549 | def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): |
550 | if corpora is None: |
551 | corpora = abs_join(CORPORA_DIR, target) |
552 | if artifact is None: |
553 | artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) |
554 | if seed is None: |
555 | seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) |
556 | if extra_args is None: |
557 | extra_args = [] |
558 | |
559 | target = abs_join(FUZZ_DIR, target) |
560 | |
561 | corpora = [create(corpora)] |
562 | artifact = create(artifact) |
563 | seed = check(seed) |
564 | |
565 | corpora += [artifact] |
566 | if seed is not None: |
567 | corpora += [seed] |
568 | |
569 | cmd = [target, '-artifact_prefix={}/'.format(artifact)] |
570 | cmd += corpora + extra_args |
571 | print(' '.join(cmd)) |
572 | subprocess.check_call(cmd) |
573 | |
574 | |
575 | def libfuzzer_cmd(args): |
576 | try: |
577 | args = libfuzzer_parser(args) |
578 | except Exception as e: |
579 | print(e) |
580 | return 1 |
581 | libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) |
582 | return 0 |
583 | |
584 | |
585 | def afl_parser(args): |
586 | description = """ |
587 | Runs an afl-fuzz job. |
588 | Passes all extra arguments to afl-fuzz. |
589 | The fuzzer should have been built with CC/CXX set to the AFL compilers, |
590 | and with LIB_FUZZING_ENGINE='libregression.a'. |
591 | Takes input from CORPORA and writes output to OUTPUT. |
592 | Uses AFL_FUZZ as the binary (set from flag or environment variable). |
593 | """ |
594 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
595 | parser.add_argument( |
596 | '--corpora', |
597 | type=str, |
598 | help='Override the default corpora dir (default: {})'.format( |
599 | abs_join(CORPORA_DIR, 'TARGET'))) |
600 | parser.add_argument( |
601 | '--output', |
602 | type=str, |
603 | help='Override the default AFL output dir (default: {})'.format( |
604 | abs_join(CORPORA_DIR, 'TARGET-afl'))) |
605 | parser.add_argument( |
606 | '--afl-fuzz', |
607 | type=str, |
608 | default=AFL_FUZZ, |
609 | help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) |
610 | parser.add_argument( |
611 | 'TARGET', |
612 | type=str, |
613 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) |
614 | args, extra = parser.parse_known_args(args) |
615 | args.extra = extra |
616 | |
617 | if args.TARGET and args.TARGET not in TARGETS: |
618 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) |
619 | |
620 | if not args.corpora: |
621 | args.corpora = abs_join(CORPORA_DIR, args.TARGET) |
622 | if not args.output: |
623 | args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) |
624 | |
625 | return args |
626 | |
627 | |
628 | def afl(args): |
629 | try: |
630 | args = afl_parser(args) |
631 | except Exception as e: |
632 | print(e) |
633 | return 1 |
634 | target = abs_join(FUZZ_DIR, args.TARGET) |
635 | |
636 | corpora = create(args.corpora) |
637 | output = create(args.output) |
638 | |
639 | cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra |
640 | cmd += [target, '@@'] |
641 | print(' '.join(cmd)) |
642 | subprocess.call(cmd) |
643 | return 0 |
644 | |
645 | |
646 | def regression(args): |
647 | try: |
648 | description = """ |
649 | Runs one or more regression tests. |
650 | The fuzzer should have been built with |
651 | LIB_FUZZING_ENGINE='libregression.a'. |
652 | Takes input from CORPORA. |
653 | """ |
654 | args = targets_parser(args, description) |
655 | except Exception as e: |
656 | print(e) |
657 | return 1 |
658 | for target in args.TARGET: |
659 | corpora = create(abs_join(CORPORA_DIR, target)) |
660 | target = abs_join(FUZZ_DIR, target) |
661 | cmd = [target, corpora] |
662 | print(' '.join(cmd)) |
663 | subprocess.check_call(cmd) |
664 | return 0 |
665 | |
666 | |
667 | def gen_parser(args): |
668 | description = """ |
669 | Generate a seed corpus appropriate for TARGET with data generated with |
670 | decodecorpus. |
671 | The fuzz inputs are prepended with a seed before the zstd data, so the |
672 | output of decodecorpus shouldn't be used directly. |
673 | Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and |
674 | puts the output in SEED. |
675 | DECODECORPUS is the decodecorpus binary, and must already be built. |
676 | """ |
677 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) |
678 | parser.add_argument( |
679 | '--number', |
680 | '-n', |
681 | type=int, |
682 | default=100, |
683 | help='Number of samples to generate') |
684 | parser.add_argument( |
685 | '--max-size-log', |
686 | type=int, |
687 | default=18, |
688 | help='Maximum sample size to generate') |
689 | parser.add_argument( |
690 | '--seed', |
691 | type=str, |
692 | help='Override the default seed dir (default: {})'.format( |
693 | abs_join(CORPORA_DIR, 'TARGET-seed'))) |
694 | parser.add_argument( |
695 | '--decodecorpus', |
696 | type=str, |
697 | default=DECODECORPUS, |
698 | help="decodecorpus binary (default: $DECODECORPUS='{}')".format( |
699 | DECODECORPUS)) |
700 | parser.add_argument( |
701 | '--zstd', |
702 | type=str, |
703 | default=ZSTD, |
704 | help="zstd binary (default: $ZSTD='{}')".format(ZSTD)) |
705 | parser.add_argument( |
706 | '--fuzz-rng-seed-size', |
707 | type=int, |
708 | default=4, |
709 | help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" |
710 | ) |
711 | parser.add_argument( |
712 | 'TARGET', |
713 | type=str, |
714 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) |
715 | args, extra = parser.parse_known_args(args) |
716 | args.extra = extra |
717 | |
718 | if args.TARGET and args.TARGET not in TARGETS: |
719 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) |
720 | |
721 | if not args.seed: |
722 | args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) |
723 | |
724 | if not os.path.isfile(args.decodecorpus): |
725 | raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". |
726 | format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) |
727 | |
728 | return args |
729 | |
730 | |
731 | def gen(args): |
732 | try: |
733 | args = gen_parser(args) |
734 | except Exception as e: |
735 | print(e) |
736 | return 1 |
737 | |
738 | seed = create(args.seed) |
739 | with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict: |
740 | info = TARGET_INFO[args.TARGET] |
741 | |
742 | if info.input_type == InputType.DICTIONARY_DATA: |
743 | number = max(args.number, 1000) |
744 | else: |
745 | number = args.number |
746 | cmd = [ |
747 | args.decodecorpus, |
748 | '-n{}'.format(args.number), |
749 | '-p{}/'.format(compressed), |
750 | '-o{}'.format(decompressed), |
751 | ] |
752 | |
753 | if info.frame_type == FrameType.BLOCK: |
754 | cmd += [ |
755 | '--gen-blocks', |
756 | '--max-block-size-log={}'.format(min(args.max_size_log, 17)) |
757 | ] |
758 | else: |
759 | cmd += ['--max-content-size-log={}'.format(args.max_size_log)] |
760 | |
761 | print(' '.join(cmd)) |
762 | subprocess.check_call(cmd) |
763 | |
764 | if info.input_type == InputType.RAW_DATA: |
765 | print('using decompressed data in {}'.format(decompressed)) |
766 | samples = decompressed |
767 | elif info.input_type == InputType.COMPRESSED_DATA: |
768 | print('using compressed data in {}'.format(compressed)) |
769 | samples = compressed |
770 | else: |
771 | assert info.input_type == InputType.DICTIONARY_DATA |
772 | print('making dictionary data from {}'.format(decompressed)) |
773 | samples = dict |
774 | min_dict_size_log = 9 |
775 | max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log) |
776 | for dict_size_log in range(min_dict_size_log, max_dict_size_log): |
777 | dict_size = 1 << dict_size_log |
778 | cmd = [ |
779 | args.zstd, |
780 | '--train', |
781 | '-r', decompressed, |
782 | '--maxdict={}'.format(dict_size), |
783 | '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size)) |
784 | ] |
785 | print(' '.join(cmd)) |
786 | subprocess.check_call(cmd) |
787 | |
788 | # Copy the samples over and prepend the RNG seeds |
789 | for name in os.listdir(samples): |
790 | samplename = abs_join(samples, name) |
791 | outname = abs_join(seed, name) |
792 | with open(samplename, 'rb') as sample: |
793 | with open(outname, 'wb') as out: |
794 | CHUNK_SIZE = 131072 |
795 | chunk = sample.read(CHUNK_SIZE) |
796 | while len(chunk) > 0: |
797 | out.write(chunk) |
798 | chunk = sample.read(CHUNK_SIZE) |
799 | return 0 |
800 | |
801 | |
802 | def minimize(args): |
803 | try: |
804 | description = """ |
805 | Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in |
806 | TARGET_seed_corpus. All extra args are passed to libfuzzer. |
807 | """ |
808 | args = targets_parser(args, description) |
809 | except Exception as e: |
810 | print(e) |
811 | return 1 |
812 | |
813 | for target in args.TARGET: |
814 | # Merge the corpus + anything else into the seed_corpus |
815 | corpus = abs_join(CORPORA_DIR, target) |
816 | seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) |
817 | extra_args = [corpus, "-merge=1"] + args.extra |
818 | libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) |
819 | seeds = set(os.listdir(seed_corpus)) |
820 | # Copy all crashes directly into the seed_corpus if not already present |
821 | crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) |
822 | for crash in os.listdir(crashes): |
823 | if crash not in seeds: |
824 | shutil.copy(abs_join(crashes, crash), seed_corpus) |
825 | seeds.add(crash) |
826 | |
827 | |
828 | def zip_cmd(args): |
829 | try: |
830 | description = """ |
831 | Zips up the seed corpus. |
832 | """ |
833 | args = targets_parser(args, description) |
834 | except Exception as e: |
835 | print(e) |
836 | return 1 |
837 | |
838 | for target in args.TARGET: |
839 | # Zip the seed_corpus |
840 | seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) |
841 | zip_file = "{}.zip".format(seed_corpus) |
842 | cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."] |
843 | print(' '.join(cmd)) |
844 | subprocess.check_call(cmd, cwd=seed_corpus) |
845 | |
846 | |
847 | def list_cmd(args): |
848 | print("\n".join(TARGETS)) |
849 | |
850 | |
851 | def short_help(args): |
852 | name = args[0] |
853 | print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) |
854 | |
855 | |
856 | def help(args): |
857 | short_help(args) |
858 | print("\tfuzzing helpers (select a command and pass -h for help)\n") |
859 | print("Options:") |
860 | print("\t-h, --help\tPrint this message") |
861 | print("") |
862 | print("Commands:") |
863 | print("\tbuild\t\tBuild a fuzzer") |
864 | print("\tlibfuzzer\tRun a libFuzzer fuzzer") |
865 | print("\tafl\t\tRun an AFL fuzzer") |
866 | print("\tregression\tRun a regression test") |
867 | print("\tgen\t\tGenerate a seed corpus for a fuzzer") |
868 | print("\tminimize\tMinimize the test corpora") |
869 | print("\tzip\t\tZip the minimized corpora up") |
870 | print("\tlist\t\tList the available targets") |
871 | |
872 | |
873 | def main(): |
874 | args = sys.argv |
875 | if len(args) < 2: |
876 | help(args) |
877 | return 1 |
878 | if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': |
879 | help(args) |
880 | return 1 |
881 | command = args.pop(1) |
882 | args[0] = "{} {}".format(args[0], command) |
883 | if command == "build": |
884 | return build(args) |
885 | if command == "libfuzzer": |
886 | return libfuzzer_cmd(args) |
887 | if command == "regression": |
888 | return regression(args) |
889 | if command == "afl": |
890 | return afl(args) |
891 | if command == "gen": |
892 | return gen(args) |
893 | if command == "minimize": |
894 | return minimize(args) |
895 | if command == "zip": |
896 | return zip_cmd(args) |
897 | if command == "list": |
898 | return list_cmd(args) |
899 | short_help(args) |
900 | print("Error: No such command {} (pass -h for help)".format(command)) |
901 | return 1 |
902 | |
903 | |
904 | if __name__ == "__main__": |
905 | sys.exit(main()) |