| 1 | #!/usr/bin/env python3 |
| 2 | |
| 3 | # Tool to bundle multiple C/C++ source files, inlining any includes. |
| 4 | # |
| 5 | # Note: there are two types of exclusion options: the '-x' flag, which besides |
| 6 | # excluding a file also adds an #error directive in place of the #include, and |
| 7 | # the '-k' flag, which keeps the #include and doesn't inline the file. The |
| 8 | # intended use cases are: '-x' for files that would normally be #if'd out, so |
| 9 | # features that 100% won't be used in the amalgamated file, for which every |
| 10 | # occurrence adds the error, and '-k' for headers that we wish to manually |
| 11 | # include, such as a project's public API, for which occurrences after the first |
| 12 | # are removed. |
| 13 | # |
| 14 | # Todo: the error handling could be better, which currently throws and halts |
| 15 | # (which is functional just not very friendly). |
| 16 | # |
| 17 | # Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain) |
| 18 | |
| 19 | import argparse, re, sys |
| 20 | |
| 21 | from pathlib import Path |
| 22 | from typing import Any, List, Optional, Pattern, Set, TextIO |
| 23 | |
| 24 | # Set of file roots when searching (equivalent to -I paths for the compiler). |
| 25 | roots: Set[Path] = set() |
| 26 | |
| 27 | # Set of (canonical) file Path objects to exclude from inlining (and not only |
| 28 | # exclude but to add a compiler error directive when they're encountered). |
| 29 | excludes: Set[Path] = set() |
| 30 | |
| 31 | # Set of (canonical) file Path objects to keep as include directives. |
| 32 | keeps: Set[Path] = set() |
| 33 | |
| 34 | # Whether to keep the #pragma once directives (unlikely, since this will result |
| 35 | # in a warning, but the option is there). |
| 36 | keep_pragma: bool = False |
| 37 | |
| 38 | # Destination file object (or stdout if no output file was supplied). |
| 39 | destn: TextIO = sys.stdout |
| 40 | |
| 41 | # Set of file Path objects previously inlined (and to ignore if reencountering). |
| 42 | found: Set[Path] = set() |
| 43 | |
| 44 | # Compiled regex Pattern to handle "#pragma once" in various formats: |
| 45 | # |
| 46 | # #pragma once |
| 47 | # #pragma once |
| 48 | # # pragma once |
| 49 | # #pragma once |
| 50 | # #pragma once // comment |
| 51 | # |
| 52 | # Ignoring commented versions, same as include_regex. |
| 53 | # |
| 54 | pragma_regex: Pattern = re.compile(r'^\s*#\s*pragma\s*once\s*') |
| 55 | |
| 56 | # Compiled regex Pattern to handle the following type of file includes: |
| 57 | # |
| 58 | # #include "file" |
| 59 | # #include "file" |
| 60 | # # include "file" |
| 61 | # #include "file" |
| 62 | # #include "file" // comment |
| 63 | # #include "file" // comment with quote " |
| 64 | # |
| 65 | # And all combinations of, as well as ignoring the following: |
| 66 | # |
| 67 | # #include <file> |
| 68 | # //#include "file" |
| 69 | # /*#include "file"*/ |
| 70 | # |
| 71 | # We don't try to catch errors since the compiler will do this (and the code is |
| 72 | # expected to be valid before processing) and we don't care what follows the |
| 73 | # file (whether it's a valid comment or not, since anything after the quoted |
| 74 | # string is ignored) |
| 75 | # |
| 76 | include_regex: Pattern = re.compile(r'^\s*#\s*include\s*"(.+?)"') |
| 77 | |
| 78 | # Simple tests to prove include_regex's cases. |
| 79 | # |
| 80 | def test_match_include() -> bool: |
| 81 | if (include_regex.match('#include "file"') and |
| 82 | include_regex.match(' #include "file"') and |
| 83 | include_regex.match('# include "file"') and |
| 84 | include_regex.match('#include "file"') and |
| 85 | include_regex.match('#include "file" // comment')): |
| 86 | if (not include_regex.match('#include <file>') and |
| 87 | not include_regex.match('//#include "file"') and |
| 88 | not include_regex.match('/*#include "file"*/')): |
| 89 | found = include_regex.match('#include "file" // "') |
| 90 | if (found and found.group(1) == 'file'): |
| 91 | print('#include match valid') |
| 92 | return True |
| 93 | return False |
| 94 | |
| 95 | # Simple tests to prove pragma_regex's cases. |
| 96 | # |
| 97 | def test_match_pragma() -> bool: |
| 98 | if (pragma_regex.match('#pragma once') and |
| 99 | pragma_regex.match(' #pragma once') and |
| 100 | pragma_regex.match('# pragma once') and |
| 101 | pragma_regex.match('#pragma once') and |
| 102 | pragma_regex.match('#pragma once // comment')): |
| 103 | if (not pragma_regex.match('//#pragma once') and |
| 104 | not pragma_regex.match('/*#pragma once*/')): |
| 105 | print('#pragma once match valid') |
| 106 | return True |
| 107 | return False |
| 108 | |
| 109 | # Finds 'file'. First the list of 'root' paths are searched, followed by the |
| 110 | # currently processing file's 'parent' path, returning a valid Path in |
| 111 | # canonical form. If no match is found None is returned. |
| 112 | # |
| 113 | def resolve_include(file: str, parent: Optional[Path] = None) -> Optional[Path]: |
| 114 | for root in roots: |
| 115 | found = root.joinpath(file).resolve() |
| 116 | if (found.is_file()): |
| 117 | return found |
| 118 | if (parent): |
| 119 | found = parent.joinpath(file).resolve(); |
| 120 | else: |
| 121 | found = Path(file) |
| 122 | if (found.is_file()): |
| 123 | return found |
| 124 | return None |
| 125 | |
| 126 | # Helper to resolve lists of files. 'file_list' is passed in from the arguments |
| 127 | # and each entry resolved to its canonical path (like any include entry, either |
| 128 | # from the list of root paths or the owning file's 'parent', which in this case |
| 129 | # is case is the input file). The results are stored in 'resolved'. |
| 130 | # |
| 131 | def resolve_excluded_files(file_list: Optional[List[str]], resolved: Set[Path], parent: Optional[Path] = None) -> None: |
| 132 | if (file_list): |
| 133 | for filename in file_list: |
| 134 | found = resolve_include(filename, parent) |
| 135 | if (found): |
| 136 | resolved.add(found) |
| 137 | else: |
| 138 | error_line(f'Warning: excluded file not found: {filename}') |
| 139 | |
| 140 | # Writes 'line' to the open 'destn' (or stdout). |
| 141 | # |
| 142 | def write_line(line: str) -> None: |
| 143 | print(line, file=destn) |
| 144 | |
| 145 | # Logs 'line' to stderr. This is also used for general notifications that we |
| 146 | # don't want to go to stdout (so the source can be piped). |
| 147 | # |
| 148 | def error_line(line: Any) -> None: |
| 149 | print(line, file=sys.stderr) |
| 150 | |
| 151 | # Inline the contents of 'file' (with any of its includes also inlined, etc.). |
| 152 | # |
| 153 | # Note: text encoding errors are ignored and replaced with ? when reading the |
| 154 | # input files. This isn't ideal, but it's more than likely in the comments than |
| 155 | # code and a) the text editor has probably also failed to read the same content, |
| 156 | # and b) the compiler probably did too. |
| 157 | # |
| 158 | def add_file(file: Path, file_name: str = None) -> None: |
| 159 | if (file.is_file()): |
| 160 | if (not file_name): |
| 161 | file_name = file.name |
| 162 | error_line(f'Processing: {file_name}') |
| 163 | with file.open('r', errors='replace') as opened: |
| 164 | for line in opened: |
| 165 | line = line.rstrip('\n') |
| 166 | match_include = include_regex.match(line); |
| 167 | if (match_include): |
| 168 | # We have a quoted include directive so grab the file |
| 169 | inc_name = match_include.group(1) |
| 170 | resolved = resolve_include(inc_name, file.parent) |
| 171 | if (resolved): |
| 172 | if (resolved in excludes): |
| 173 | # The file was excluded so error if the compiler uses it |
| 174 | write_line(f'#error Using excluded file: {inc_name} (re-amalgamate source to fix)') |
| 175 | error_line(f'Excluding: {inc_name}') |
| 176 | else: |
| 177 | if (resolved not in found): |
| 178 | # The file was not previously encountered |
| 179 | found.add(resolved) |
| 180 | if (resolved in keeps): |
| 181 | # But the include was flagged to keep as included |
| 182 | write_line(f'/**** *NOT* inlining {inc_name} ****/') |
| 183 | write_line(line) |
| 184 | error_line(f'Not inlining: {inc_name}') |
| 185 | else: |
| 186 | # The file was neither excluded nor seen before so inline it |
| 187 | write_line(f'/**** start inlining {inc_name} ****/') |
| 188 | add_file(resolved, inc_name) |
| 189 | write_line(f'/**** ended inlining {inc_name} ****/') |
| 190 | else: |
| 191 | write_line(f'/**** skipping file: {inc_name} ****/') |
| 192 | else: |
| 193 | # The include file didn't resolve to a file |
| 194 | write_line(f'#error Unable to find: {inc_name}') |
| 195 | error_line(f'Error: Unable to find: {inc_name}') |
| 196 | else: |
| 197 | # Skip any 'pragma once' directives, otherwise write the source line |
| 198 | if (keep_pragma or not pragma_regex.match(line)): |
| 199 | write_line(line) |
| 200 | else: |
| 201 | error_line(f'Error: Invalid file: {file}') |
| 202 | |
| 203 | # Start here |
| 204 | parser = argparse.ArgumentParser(description='Amalgamate Tool', epilog=f'example: {sys.argv[0]} -r ../my/path -r ../other/path -o out.c in.c') |
| 205 | parser.add_argument('-r', '--root', action='append', type=Path, help='file root search path') |
| 206 | parser.add_argument('-x', '--exclude', action='append', help='file to completely exclude from inlining') |
| 207 | parser.add_argument('-k', '--keep', action='append', help='file to exclude from inlining but keep the include directive') |
| 208 | parser.add_argument('-p', '--pragma', action='store_true', default=False, help='keep any "#pragma once" directives (removed by default)') |
| 209 | parser.add_argument('-o', '--output', type=argparse.FileType('w'), help='output file (otherwise stdout)') |
| 210 | parser.add_argument('input', type=Path, help='input file') |
| 211 | args = parser.parse_args() |
| 212 | |
| 213 | # Fail early on an invalid input (and store it so we don't recurse) |
| 214 | args.input = args.input.resolve(strict=True) |
| 215 | found.add(args.input) |
| 216 | |
| 217 | # Resolve all of the root paths upfront (we'll halt here on invalid roots) |
| 218 | if (args.root): |
| 219 | for path in args.root: |
| 220 | roots.add(path.resolve(strict=True)) |
| 221 | |
| 222 | # The remaining params: so resolve the excluded files and #pragma once directive |
| 223 | resolve_excluded_files(args.exclude, excludes, args.input.parent) |
| 224 | resolve_excluded_files(args.keep, keeps, args.input.parent) |
| 225 | keep_pragma = args.pragma; |
| 226 | |
| 227 | # Then recursively process the input file |
| 228 | try: |
| 229 | if (args.output): |
| 230 | destn = args.output |
| 231 | add_file(args.input) |
| 232 | finally: |
| 233 | if (destn): |
| 234 | destn.close() |