| 1 | #!/bin/sh -e |
| 2 | |
| 3 | # Tool to bundle multiple C/C++ source files, inlining any includes. |
| 4 | # |
| 5 | # TODO: ROOTS, FOUND, etc., as arrays (since they fail on paths with spaces) |
| 6 | # |
| 7 | # Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain) |
| 8 | |
| 9 | # Common file roots |
| 10 | ROOTS="." |
| 11 | |
| 12 | # -x option excluded includes |
| 13 | XINCS="" |
| 14 | |
| 15 | # -k option includes to keep as include directives |
| 16 | KINCS="" |
| 17 | |
| 18 | # Files previously visited |
| 19 | FOUND="" |
| 20 | |
| 21 | # Optional destination file (empty string to write to stdout) |
| 22 | DESTN="" |
| 23 | |
| 24 | # Whether the "#pragma once" directives should be written to the output |
| 25 | PONCE=0 |
| 26 | |
| 27 | # Prints the script usage then exits |
| 28 | usage() { |
| 29 | echo "Usage: $0 [-r <path>] [-x <header>] [-k <header>] [-o <outfile>] infile" |
| 30 | echo " -r file root search path" |
| 31 | echo " -x file to completely exclude from inlining" |
| 32 | echo " -k file to exclude from inlining but keep the include directive" |
| 33 | echo " -p keep any '#pragma once' directives (removed by default)" |
| 34 | echo " -o output file (otherwise stdout)" |
| 35 | echo "Example: $0 -r ../my/path - r ../other/path -o out.c in.c" |
| 36 | exit 1 |
| 37 | } |
| 38 | |
| 39 | # Tests that the grep implementation works as expected (older OSX grep fails) |
| 40 | test_deps() { |
| 41 | if ! echo '#include "foo"' | grep -Eq '^\s*#\s*include\s*".+"'; then |
| 42 | echo "Aborting: the grep implementation fails to parse include lines" |
| 43 | exit 1 |
| 44 | fi |
| 45 | if ! echo '"foo.h"' | sed -E 's/"([^"]+)"/\1/' | grep -Eq '^foo\.h$'; then |
| 46 | echo "Aborting: sed is unavailable or non-functional" |
| 47 | exit 1 |
| 48 | fi |
| 49 | } |
| 50 | |
| 51 | # Test if glob pattern $1 matches subject $2 (see fnmatch(3)) |
| 52 | fnmatch() { |
| 53 | case "$2" in |
| 54 | $1) |
| 55 | return 0 |
| 56 | ;; |
| 57 | esac |
| 58 | return 1 |
| 59 | } |
| 60 | |
| 61 | # Test if line $1 is local include directive |
| 62 | is_include_line() { |
| 63 | fnmatch "*#*include*" "$1" || return 1 |
| 64 | printf "%s\n" "$1" | grep -Eq '^\s*#\s*include\s*".+"' |
| 65 | } |
| 66 | |
| 67 | # Test if line $1 is pragma once directive |
| 68 | is_pragma_once_line() { |
| 69 | fnmatch "*#*pragma*once*" "$1" || return 1 |
| 70 | printf "%s\n" "$1" | grep -Eq '^\s*#\s*pragma\s*once\s*' |
| 71 | } |
| 72 | |
| 73 | # Tests if list $1 has item $2 (returning zero on a match) |
| 74 | # (originally used grep -Eq "(^|\s*)$2(\$|\s*)) |
| 75 | readonly list_FS="$IFS" |
| 76 | list_has_item() { |
| 77 | # Re: escaping glob pattern special characters in item string: |
| 78 | # |
| 79 | # bash (tested 3.2.57, 5.1.4), dash (tested 0.5.10.2), NetBSD /bin/sh |
| 80 | # (tested 8.2), and Solaris /bin/sh (tested 11.4) require escaping |
| 81 | # backslashes in a bracket expression despite POSIX specifying that |
| 82 | # backslash loses significance in a bracket expression. |
| 83 | # |
| 84 | # Conversely, neither FreeBSD /bin/sh (tested 12.2) nor OpenBSD /bin/sh |
| 85 | # (tested 7.1) obey backslash-escaping in case statement patterns even |
| 86 | # outside bracket expressions, so escape special characters using bracket |
| 87 | # expressions. |
| 88 | # |
| 89 | # Solaris /bin/sh (tested 11.4) requires vertical bar (|) to be escaped. |
| 90 | # |
| 91 | # All accommodations should behave as expected under strict POSIX semantics. |
| 92 | if fnmatch "*[\\*?[|]*" "$2"; then |
| 93 | set -- "$1" "$(printf '%s\n' "$2" | sed -e 's/[*?[|]/[&]/g; s/[\]/[\\&]/g')" |
| 94 | fi |
| 95 | for item_P in "*[$list_FS]$2[$list_FS]*" "*[$list_FS]$2" "$2[$list_FS]*" "$2"; do |
| 96 | fnmatch "${item_P}" "$1" && return 0 |
| 97 | done |
| 98 | return 1 |
| 99 | } |
| 100 | |
| 101 | # Adds a new line with the supplied arguments to $DESTN (or stdout) |
| 102 | write_line() { |
| 103 | if [ -n "$DESTN" ]; then |
| 104 | printf '%s\n' "$@" >> "$DESTN" |
| 105 | else |
| 106 | printf '%s\n' "$@" |
| 107 | fi |
| 108 | } |
| 109 | |
| 110 | log_line() { |
| 111 | echo $@ >&2 |
| 112 | } |
| 113 | |
| 114 | # Find this file! |
| 115 | resolve_include() { |
| 116 | local srcdir=$1 |
| 117 | local inc=$2 |
| 118 | for root in $srcdir $ROOTS; do |
| 119 | if [ -f "$root/$inc" ]; then |
| 120 | # Try to reduce the file path into a canonical form (so that multiple) |
| 121 | # includes of the same file are successfully deduplicated, even if they |
| 122 | # are expressed differently. |
| 123 | local relpath="$(realpath --relative-to . "$root/$inc" 2>/dev/null)" |
| 124 | if [ "$relpath" != "" ]; then # not all realpaths support --relative-to |
| 125 | echo "$relpath" |
| 126 | return 0 |
| 127 | fi |
| 128 | local relpath="$(realpath "$root/$inc" 2>/dev/null)" |
| 129 | if [ "$relpath" != "" ]; then # not all distros have realpath... |
| 130 | echo "$relpath" |
| 131 | return 0 |
| 132 | fi |
| 133 | # Fallback on Python to reduce the path if the above fails. |
| 134 | local relpath=$(python -c "import os,sys; print os.path.relpath(sys.argv[1])" "$root/$inc" 2>/dev/null) |
| 135 | if [ "$relpath" != "" ]; then # not all distros have realpath... |
| 136 | echo "$relpath" |
| 137 | return 0 |
| 138 | fi |
| 139 | # Worst case, fall back to just the root + relative include path. The |
| 140 | # problem with this is that it is possible to emit multiple different |
| 141 | # resolved paths to the same file, depending on exactly how its included. |
| 142 | # Since the main loop below keeps a list of the resolved paths it's |
| 143 | # already included, in order to avoid repeated includes, this failure to |
| 144 | # produce a canonical/reduced path can lead to multiple inclusions of the |
| 145 | # same file. But it seems like the resulting single file library still |
| 146 | # works (hurray include guards!), so I guess it's ok. |
| 147 | echo "$root/$inc" |
| 148 | return 0 |
| 149 | fi |
| 150 | done |
| 151 | return 1 |
| 152 | } |
| 153 | |
| 154 | # Adds the contents of $1 with any of its includes inlined |
| 155 | add_file() { |
| 156 | local file=$1 |
| 157 | if [ -n "$file" ]; then |
| 158 | log_line "Processing: $file" |
| 159 | # Get directory of the current so we can resolve relative includes |
| 160 | local srcdir="$(dirname "$file")" |
| 161 | # Read the file |
| 162 | local line= |
| 163 | while IFS= read -r line; do |
| 164 | if is_include_line "$line"; then |
| 165 | # We have an include directive so strip the (first) file |
| 166 | local inc=$(echo "$line" | grep -Eo '".*"' | sed -E 's/"([^"]+)"/\1/' | head -1) |
| 167 | local res_inc="$(resolve_include "$srcdir" "$inc")" |
| 168 | if list_has_item "$XINCS" "$inc"; then |
| 169 | # The file was excluded so error if the source attempts to use it |
| 170 | write_line "#error Using excluded file: $inc (re-amalgamate source to fix)" |
| 171 | log_line "Excluding: $inc" |
| 172 | else |
| 173 | if ! list_has_item "$FOUND" "$res_inc"; then |
| 174 | # The file was not previously encountered |
| 175 | FOUND="$FOUND $res_inc" |
| 176 | if list_has_item "$KINCS" "$inc"; then |
| 177 | # But the include was flagged to keep as included |
| 178 | write_line "/**** *NOT* inlining $inc ****/" |
| 179 | write_line "$line" |
| 180 | log_line "Not Inlining: $inc" |
| 181 | else |
| 182 | # The file was neither excluded nor seen before so inline it |
| 183 | write_line "/**** start inlining $inc ****/" |
| 184 | add_file "$res_inc" |
| 185 | write_line "/**** ended inlining $inc ****/" |
| 186 | fi |
| 187 | else |
| 188 | write_line "/**** skipping file: $inc ****/" |
| 189 | fi |
| 190 | fi |
| 191 | else |
| 192 | # Skip any 'pragma once' directives, otherwise write the source line |
| 193 | local write=$PONCE |
| 194 | if [ $write -eq 0 ]; then |
| 195 | if ! is_pragma_once_line "$line"; then |
| 196 | write=1 |
| 197 | fi |
| 198 | fi |
| 199 | if [ $write -ne 0 ]; then |
| 200 | write_line "$line" |
| 201 | fi |
| 202 | fi |
| 203 | done < "$file" |
| 204 | else |
| 205 | write_line "#error Unable to find \"$1\"" |
| 206 | log_line "Error: Unable to find: \"$1\"" |
| 207 | fi |
| 208 | } |
| 209 | |
| 210 | while getopts ":r:x:k:po:" opts; do |
| 211 | case $opts in |
| 212 | r) |
| 213 | ROOTS="$ROOTS $OPTARG" |
| 214 | ;; |
| 215 | x) |
| 216 | XINCS="$XINCS $OPTARG" |
| 217 | ;; |
| 218 | k) |
| 219 | KINCS="$KINCS $OPTARG" |
| 220 | ;; |
| 221 | p) |
| 222 | PONCE=1 |
| 223 | ;; |
| 224 | o) |
| 225 | DESTN="$OPTARG" |
| 226 | ;; |
| 227 | *) |
| 228 | usage |
| 229 | ;; |
| 230 | esac |
| 231 | done |
| 232 | shift $((OPTIND-1)) |
| 233 | |
| 234 | if [ -n "$1" ]; then |
| 235 | if [ -f "$1" ]; then |
| 236 | if [ -n "$DESTN" ]; then |
| 237 | printf "" > "$DESTN" |
| 238 | fi |
| 239 | test_deps |
| 240 | log_line "Processing using the slower shell script; this might take a while" |
| 241 | add_file "$1" |
| 242 | else |
| 243 | echo "Input file not found: \"$1\"" |
| 244 | exit 1 |
| 245 | fi |
| 246 | else |
| 247 | usage |
| 248 | fi |
| 249 | exit 0 |