Add script to dump system files used by build
In order to move towards builds which only use resources in the Hafnium
repo, this patch adds a script which runs the build with strace and
dumps all files touched in the process. Files in the Hafnium directory
and in /tmp are automatically filtered out.
Bug: 132428451
Test: ./build/strace_open.sh opened_files.txt
Change-Id: I03a2df4eedf40c456b65920ec8bf98ad08e747c6
diff --git a/build/docker/Dockerfile b/build/docker/Dockerfile
index 93c8caa..0361e9c 100644
--- a/build/docker/Dockerfile
+++ b/build/docker/Dockerfile
@@ -40,4 +40,5 @@
python \
python-git `# for Linux checkpatch` \
python-ply `# for Linux checkpatch` \
+ strace `# for strace_open.sh` \
&& rm -rf /var/lib/apt/lists/*
diff --git a/build/parse_strace_open.py b/build/parse_strace_open.py
new file mode 100755
index 0000000..5dd878b
--- /dev/null
+++ b/build/parse_strace_open.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+# Copyright 2019 The Hafnium Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Script which parses the output of `strace` and dumping a list of files
+that were touched by the traced processes outside of whitelisted folders.
+It assumes that strace was invoked with the following arguments:
+ -e trace=%file,chdir,%process record required syscalls
+ -qq silence 'exit code' records
+ -o <file> output format is different when writing
+ to a file from printing to the console
+"""
+
+import argparse
+import os
+import sys
+
+FORK_SYSCALLS = [
+ "clone",
+ "fork",
+ "vfork",
+ ]
+OPEN_SYSCALLS = [
+ "access",
+ "creat",
+ "lstat",
+ "mkdir",
+ "open",
+ "openat",
+ "readlink",
+ "stat",
+ ]
+
+def get_unfinished(line):
+ pos = line.find("<unfinished ...>")
+ if pos < 0:
+ return None
+ else:
+ return line[:pos]
+
+def get_resumed(line):
+ pos = line.find(" resumed>")
+ if pos < 0:
+ return None
+ else:
+ return line[pos + len(" resumed>"):]
+
+def merge_unfinished_lines(lines):
+ """Process input lines and merge those split by an interrupting syscall."""
+ # Lines in the order they were started being written.
+ finished = []
+
+ # Pending unfinished lines. Map from PID to index in `finished`.
+ cursor = {}
+
+ for line in lines:
+ pid = int(line.split()[0])
+
+ resumed = get_resumed(line)
+ if resumed is not None:
+ assert(pid in cursor)
+ unfinished = get_unfinished(resumed)
+ if unfinished is not None:
+ finished[cursor[pid]] += unfinished
+ else:
+ finished[cursor[pid]] += resumed
+ del(cursor[pid])
+ else:
+ assert(pid not in cursor)
+ unfinished = get_unfinished(line)
+ if unfinished is not None:
+ # Line is unfinished. Store its location to `cursor`.
+ cursor[pid] = len(finished)
+ finished += [ unfinished ]
+ else:
+ finished += [ line ]
+ return finished
+
+def abs_path(cwd, path):
+ """If `path` is relative, resolve it against the current working directory.
+ Also normalize the resulting path."""
+ if path[0] != '/':
+ path = os.path.join(cwd, path)
+ path = os.path.abspath(path)
+ # while '//' in path:
+ # path = path.replace('//', '/')
+ path = os.path.realpath(path)
+ return path
+
+def get_touched_files(lines, orig_cwd):
+ """Parse strace output and return all files that an open()-like syscall was
+ called on."""
+ files = set()
+
+ # Map from PID to the current working directory.
+ cwd = {}
+
+ # Map from PID to executable name
+ executable = {}
+
+ # Map from PID to the PID of the process which forked it.
+ fork_of = {}
+
+ first_pid = True
+ for line in lines:
+ # Split line: <pid> <syscall info>
+ line = line.split()
+ pid = int(line[0])
+ syscall = " ".join(line[1:])
+
+ # If seeing a PID for the first time, derive its working directory
+ # from its parent.
+ if pid not in cwd:
+ if first_pid:
+ # Very first line of strace output. Set working directory from
+ # command line arguments (should match cwd of strace).
+ first_pid = False
+ cwd[pid] = orig_cwd
+ else:
+ # There should have been a fork/clone syscall which spawned this
+ # process. Inherit its working directory.
+ cwd[pid] = cwd[fork_of[pid]]
+
+ # We are looking for lines which match:
+ # name(arg1, arg2, ..., argN) = result
+ left_bracket = syscall.find("(")
+ right_bracket = syscall.rfind(")")
+ assign_sign = syscall.rfind("=")
+ if left_bracket < 0 or right_bracket < 0 or assign_sign < right_bracket:
+ continue
+
+ syscall_name = syscall[:left_bracket]
+ syscall_result = syscall[assign_sign+2:]
+
+ syscall_args = syscall[left_bracket+1:right_bracket].split(",")
+ syscall_args = list(map(lambda x: x.strip(), syscall_args))
+
+ if syscall_name in FORK_SYSCALLS:
+ # If this is a fork, keep track of the parent-child relationship.
+ # The child's PID is the syscall's return code.
+ new_pid = int(syscall_result)
+ fork_of[new_pid] = pid
+ executable[new_pid] = executable[pid]
+ elif syscall_name == "chdir":
+ # If this is a change of working directory, keep track of it.
+ # It is in the first argument in quotes.
+ new_dir = syscall_args[0][1:-1]
+ cwd[pid] = abs_path(cwd[pid], new_dir)
+ elif syscall_name == "execve":
+ # If this is executing a new program, record its name.
+ # It is in the first argument in quotes.
+ binary_name = syscall_args[0][1:-1]
+ executable[pid] = binary_name
+ elif syscall_name in OPEN_SYSCALLS:
+ # If this is a syscall touching a file, record the path.
+ # We ignore the result code, i.e. record the path even if the
+ # syscall failed to open it.
+ arg_idx = 0
+ if syscall_name == "openat":
+ # openat() can open a file (second arg) relative to a given
+ # folder (first arg). We only support passing AT_FDCWD, ie.
+ # resolve against the current working directory.
+ arg_idx = 1
+ assert(syscall_args[0] == "AT_FDCWD")
+ fname = abs_path(cwd[pid], syscall_args[arg_idx][1:-1])
+ # Record the file and the name of the program which touched it.
+ files.add((fname, executable[pid]))
+ return files
+
+def filter_results(files, root_dir):
+ """Remove paths which are whitelisted from the results."""
+ # Anything in the Hafnium directory is allowed.
+ files = filter(lambda x: not x[0].startswith(root_dir + "/"), files)
+ # Clang puts intermediate files in /tmp.
+ files = filter(lambda x: not x[0].startswith("/tmp/"), files)
+ return list(files)
+
+def main(args):
+ parser = argparse.ArgumentParser()
+ parser.add_argument("root_dir",
+ help="Root directory of Hafnium, cwd of strace")
+ args, make_args = parser.parse_known_args()
+
+ stdin = map(lambda x: x.strip(), sys.stdin.readlines())
+ stdin = merge_unfinished_lines(stdin)
+ files = get_touched_files(stdin, args.root_dir)
+ files = filter_results(files, args.root_dir)
+ files = sorted(list(files))
+
+ print("\n".join(map(lambda x: "{} ({})".format(x[0], x[1]), files)))
+
+if __name__ == "__main__":
+ main(sys.argv)
diff --git a/build/run_in_container.sh b/build/run_in_container.sh
index e3c6bd0..ae0850e 100755
--- a/build/run_in_container.sh
+++ b/build/run_in_container.sh
@@ -42,14 +42,30 @@
"${SCRIPT_DIR}/docker"
IMAGE_ID="$(cat ${IID_FILE})"
-# Check if script was invoked with '-i' as first argument. If so, run
-# container in interactive mode.
+# Parse command line arguments
INTERACTIVE=false
-if [ "${1:-}" == "-i" ]
-then
- INTERACTIVE=true
- shift
-fi
+ALLOW_PTRACE=false
+while true
+do
+ case "${1:-}" in
+ -i)
+ INTERACTIVE=true
+ shift
+ ;;
+ -p)
+ ALLOW_PTRACE=true
+ shift
+ ;;
+ -*)
+ echo "ERROR: Unknown command line flag: $1" 1>&2
+ echo "Usage: $0 [-i] [-p] <command>"
+ exit 1
+ ;;
+ *)
+ break
+ ;;
+ esac
+done
ARGS=()
# Run with a pseduo-TTY for nicer logging.
@@ -59,6 +75,12 @@
then
ARGS+=(-i)
fi
+# Allow ptrace() syscall if invoked with '-p'.
+if [ "${ALLOW_PTRACE}" == "true" ]
+then
+ echo "WARNING: Docker seccomp profile is disabled!" 1>&2
+ ARGS+=(--cap-add=SYS_PTRACE --security-opt seccomp=unconfined)
+fi
# Set environment variable informing the build that we are running inside
# a container.
ARGS+=(-e HAFNIUM_HERMETIC_BUILD=inside)
diff --git a/build/strace_open.sh b/build/strace_open.sh
new file mode 100755
index 0000000..e960daf
--- /dev/null
+++ b/build/strace_open.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+# Copyright 2019 The Hafnium Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -euxo pipefail
+
+SCRIPT_NAME="$(realpath "${BASH_SOURCE[0]}")"
+SCRIPT_DIR="$(realpath "$(dirname "${BASH_SOURCE[0]}")")"
+ROOT_DIR="$(realpath ${SCRIPT_DIR}/..)"
+
+if [ "${HAFNIUM_HERMETIC_BUILD:-}" == "true" ]
+then
+ exec "${ROOT_DIR}/build/run_in_container.sh" -p ${SCRIPT_NAME} $@
+fi
+
+if [ $# != 1 ]
+then
+ echo "Usage: $0 <output_file>" 1>&2
+ exit 1
+fi
+
+MAKE="$(which make)"
+STRACE="$(which strace)"
+
+# Set up a temp directory and register a cleanup function on exit.
+TMP_DIR="$(mktemp -d)"
+function cleanup() {
+ rm -rf "${TMP_DIR}"
+}
+trap cleanup EXIT
+
+STRACE_LOG="${TMP_DIR}/strace.log"
+
+echo "Building with strace"
+pushd ${ROOT_DIR}
+${MAKE} clobber
+${STRACE} \
+ -o "${STRACE_LOG}" \
+ -f \
+ -qq \
+ -e trace=%file,chdir,%process \
+ ${MAKE}
+popd
+
+echo "Processing strace output"
+"${SCRIPT_DIR}/parse_strace_open.py" ${ROOT_DIR} < "${STRACE_LOG}" > "$1"