Run QEMU tests individually.

Each test should have a clean context to run in which means a fresh QEMU
run is needed. The test image will report the tests it contains and the
runner script will query this and then run the individual tests.

The test descriptors are stored in the .hftest section of the image to
allow the image to query itself.

Change-Id: I9d4d10097d768aa51728e428a4e48eaa9b4949b4
diff --git a/.clang-tidy b/.clang-tidy
index bc5bcaa..2eacd36 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -1,3 +1,4 @@
 Checks: 'readability-*,portability-*,performance-*,misc-*,bugprone-*,modernize-*'
 HeaderFilterRegex: '.*'
 FormatStyle: file
+WarningsAsErrors: '*'
diff --git a/build/image/image.ld b/build/image/image.ld
index 9973954..cb32f98 100644
--- a/build/image/image.ld
+++ b/build/image/image.ld
@@ -52,6 +52,21 @@
 		*(.rela.*)
 	}
 	rela_end = .;
+	/* The hftest framework adds test descriptors in the .hftest section
+	 * which is examined at runtime to discover the available tests. The
+	 * input sections are named after the test they include so sorting here
+	 * means they are stored sorted by the name of the test suite and then
+	 * by test case names. To ensure tests aren't accidentally included in
+	 * images that are not meant to have them, the assertion checks for a
+	 * marker to signal tests are allowed. */
+	. = ALIGN(8);
+	hftest_begin = .;
+	.hftest : {
+		KEEP(*(SORT(.hftest.*)))
+	}
+	hftest_end = .;
+	ASSERT((SIZEOF(.hftest) == (DEFINED(hftest_enable) ? SIZEOF(.hftest) : 0)),
+	       "Error: Image includes .hftest section but not HFTEST_ENABLE().")
 	rodata_end = .;
 
 	/*
diff --git a/kokoro/ubuntu/build.sh b/kokoro/ubuntu/build.sh
index 9a7f510..a0b2c90 100755
--- a/kokoro/ubuntu/build.sh
+++ b/kokoro/ubuntu/build.sh
@@ -43,6 +43,6 @@
 
 if [[ `git status --porcelain` ]]
 then
-	echo "Run `make format` and `make tidy` locally to fix this."
+	echo "Run \`make format\' and \`make tidy\` locally to fix this."
 	exit 1
 fi
diff --git a/kokoro/ubuntu/continuous.cfg b/kokoro/ubuntu/continuous.cfg
index c2554d0..0e0e286 100644
--- a/kokoro/ubuntu/continuous.cfg
+++ b/kokoro/ubuntu/continuous.cfg
@@ -2,3 +2,10 @@
 
 # Location of the continuous bash script in Git.
 build_file: "hafnium/kokoro/ubuntu/build.sh"
+
+action {
+  define_artifacts {
+    regex: "git/hafnium/out/**/test_log/**/*.log"
+    strip_prefix: "git/hafnium"
+  }
+}
diff --git a/kokoro/ubuntu/presubmit.cfg b/kokoro/ubuntu/presubmit.cfg
index 7d60242..aa7fd6b 100644
--- a/kokoro/ubuntu/presubmit.cfg
+++ b/kokoro/ubuntu/presubmit.cfg
@@ -2,3 +2,10 @@
 
 # Location of the presubmit bash script in Git.
 build_file: "hafnium/kokoro/ubuntu/build.sh"
+
+action {
+  define_artifacts {
+    regex: "git/hafnium/out/**/test_log/**/*.log"
+    strip_prefix: "git/hafnium"
+  }
+}
diff --git a/kokoro/ubuntu/test.sh b/kokoro/ubuntu/test.sh
index 623c4aa..6d146b6 100755
--- a/kokoro/ubuntu/test.sh
+++ b/kokoro/ubuntu/test.sh
@@ -9,10 +9,9 @@
 set -x
 
 TIMEOUT="timeout --foreground"
-QEMU="./prebuilts/linux-x64/qemu/qemu-system-aarch64 -M virt -cpu cortex-a57 -m 8M -machine virtualization=true -nographic -nodefaults -serial stdio"
-HAFNIUM="out/aarch64/qemu/clang_aarch64/hafnium.bin"
-INITRD="out/aarch64/qemu/clang_aarch64/initrd"
+OUT="out/aarch64/qemu/clang_aarch64"
+HFTEST="timeout --foreground 30s ./test/vm/hftest.py --out $OUT --initrd"
 
-# Run the QEMU tests with a timeout so they can't loop forever.
-$TIMEOUT 5s $QEMU -kernel $HAFNIUM -initrd $INITRD/primary_only_test.img
-$TIMEOUT 5s $QEMU -kernel $HAFNIUM -initrd $INITRD/primary_with_secondary_test.img
+# Run the tests with a timeout so they can't loop forever.
+$HFTEST primary_only_test
+$HFTEST primary_with_secondary_test
diff --git a/src/BUILD.gn b/src/BUILD.gn
index 3f54988..acac35f 100644
--- a/src/BUILD.gn
+++ b/src/BUILD.gn
@@ -5,17 +5,17 @@
     "api.c",
     "cpio.c",
     "cpu.c",
-    "fdt.c",
     "fdt_handler.c",
     "load.c",
     "main.c",
-    "memiter.c",
     "mm.c",
     "vm.c",
   ]
 
   deps = [
     ":common",
+    ":fdt",
+    ":memiter",
   ]
 
   if (is_debug) {
@@ -41,3 +41,15 @@
     "//src/arch/${arch}:putchar",
   ]
 }
+
+source_set("fdt") {
+  sources = [
+    "fdt.c",
+  ]
+}
+
+source_set("memiter") {
+  sources = [
+    "memiter.c",
+  ]
+}
diff --git a/src/arch/aarch64/vm/vm_entry.S b/src/arch/aarch64/vm/vm_entry.S
index 6db2526..0e6a29e 100644
--- a/src/arch/aarch64/vm/vm_entry.S
+++ b/src/arch/aarch64/vm/vm_entry.S
@@ -2,8 +2,8 @@
 .global image_entry
 image_entry:
 	/* Prepare the stack. */
-	adr x0, kstack + 4096
-	mov sp, x0
+	adr x30, kstack + 4096
+	mov sp, x30
 
 	/* Call into C code. */
 	bl kmain
diff --git a/test/vm/BUILD.gn b/test/vm/BUILD.gn
index dd1825e..2103fbd 100644
--- a/test/vm/BUILD.gn
+++ b/test/vm/BUILD.gn
@@ -1,13 +1,28 @@
 import("//build/image/image.gni")
 
-source_set("hf_test_vm") {
+source_set("hftest_vm") {
   sources = [
-    "hf_test.h",
+    "hftest.c",
+    "hftest.h",
   ]
 
   deps = [
     "//src:common",
     "//src:common_debug",
+    "//src:fdt",
+    "//src:memiter",
+    "//src/arch/${arch}:entry",
+    "//src/arch/${arch}/vm:hf_call",
+    "//src/arch/${arch}/vm:shutdown",
+    "//src/arch/${arch}/vm:vm_entry",
+  ]
+}
+
+# TODO: work out what to run in the secondary VMs
+source_set("other_vm") {
+  deps = [
+    "//src:common",
+    "//src:common_debug",
     "//src/arch/${arch}:entry",
     "//src/arch/${arch}/vm:hf_call",
     "//src/arch/${arch}/vm:shutdown",
diff --git a/test/vm/hf_test.h b/test/vm/hf_test.h
deleted file mode 100644
index 146d6aa1..0000000
--- a/test/vm/hf_test.h
+++ /dev/null
@@ -1,141 +0,0 @@
-#pragma once
-
-#include <stdbool.h>
-#include <stdint.h>
-
-#include "hf/dlog.h"
-
-/*
- * Prefixed to log lines from tests for easy filtering in the console.
- */
-#define HF_TEST_LOG_PREFIX "[hf_test] "
-
-/*
- * Context for tests.
- */
-struct hf_test_context {
-	uint32_t failures;
-};
-
-/*
- * This union can store any of the primitive types supported by the assertion
- * macros.
- */
-union hf_test_any {
-	bool b;
-	char c;
-	signed char sc;
-	unsigned char uc;
-	signed short ss;
-	unsigned short us;
-	signed int si;
-	unsigned int ui;
-	signed long int sli;
-	unsigned long int uli;
-	signed long long int slli;
-	unsigned long long int ulli;
-	void *p;
-};
-
-/* _Generic formatting doesn't seem to be supported so doing this manually. */
-/* clang-format off */
-
-/* Select the union member to match the type of the expression. */
-#define hf_test_any_get(any, x)                     \
-	_Generic((x),                               \
-		bool:                   (any).b,    \
-		char:                   (any).c,    \
-		signed char:            (any).sc,   \
-		unsigned char:          (any).uc,   \
-		signed short:           (any).ss,   \
-		unsigned short:         (any).us,   \
-		signed int:             (any).si,   \
-		unsigned int:           (any).ui,   \
-		signed long int:        (any).sli,  \
-		unsigned long int:      (any).uli,  \
-		signed long long int:   (any).slli, \
-		unsigned long long int: (any).ulli, \
-		void *:                 (any).p)
-
-/*
- * dlog format specifier for types. Note, these aren't the standard specifiers
- * for the types.
- */
-#define hf_test_dlog_format(x)                \
-	_Generic((x),                         \
-		bool:                   "%u", \
-		char:                   "%c", \
-		signed char:            "%d", \
-		unsigned char:          "%u", \
-		signed short:           "%d", \
-		unsigned short:         "%u", \
-		signed int:             "%d", \
-		unsigned int:           "%u", \
-		signed long int:        "%d", \
-		unsigned long int:      "%u", \
-		signed long long int:   "%d", \
-		unsigned long long int: "%u", \
-		void *:                 "%p")
-
-/* clang-format on */
-
-#define ASSERT_OP(lhs, rhs, op, fatal)                                     \
-	do {                                                               \
-		union hf_test_any lhs_value;                               \
-		union hf_test_any rhs_value;                               \
-		hf_test_any_get(lhs_value, lhs) = (lhs);                   \
-		hf_test_any_get(rhs_value, rhs) = (rhs);                   \
-		if (!(hf_test_any_get(lhs_value, lhs)                      \
-			      op hf_test_any_get(rhs_value, rhs))) {       \
-			++hf_test_ctx->failures;                           \
-			dlog(HF_TEST_LOG_PREFIX "  %s:%u: Failure\n",      \
-			     __FILE__, __LINE__);                          \
-			dlog(HF_TEST_LOG_PREFIX "    %s %s %s (%s=", #lhs, \
-			     #op, #rhs, #lhs);                             \
-			dlog(hf_test_dlog_format(lhs),                     \
-			     hf_test_any_get(lhs_value, lhs));             \
-			dlog(", %s=", #rhs);                               \
-			dlog(hf_test_dlog_format(rhs),                     \
-			     hf_test_any_get(rhs_value, rhs));             \
-			dlog(")\n");                                       \
-			if (fatal) {                                       \
-				return;                                    \
-			}                                                  \
-		}                                                          \
-	} while (0)
-
-#define ASSERT_EQ(x, y) ASSERT_OP(x, y, ==, true)
-#define ASSERT_NE(x, y) ASSERT_OP(x, y, !=, true)
-#define ASSERT_LE(x, y) ASSERT_OP(x, y, <=, true)
-#define ASSERT_LT(x, y) ASSERT_OP(x, y, <, true)
-#define ASSERT_GE(x, y) ASSERT_OP(x, y, >=, true)
-#define ASSERT_GT(x, y) ASSERT_OP(x, y, >, true)
-
-#define EXPECT_EQ(x, y) ASSERT_OP(x, y, ==, false)
-#define EXPECT_NE(x, y) ASSERT_OP(x, y, !=, false)
-#define EXPECT_LE(x, y) ASSERT_OP(x, y, <=, false)
-#define EXPECT_LT(x, y) ASSERT_OP(x, y, <, false)
-#define EXPECT_GE(x, y) ASSERT_OP(x, y, >=, false)
-#define EXPECT_GT(x, y) ASSERT_OP(x, y, >, false)
-
-/*
- * Declare a test case.
- */
-#define TEST(name) static void name(struct hf_test_context *hf_test_ctx)
-
-/*
- * Run a test case.
- */
-#define RUN_TEST(test)                                       \
-	do {                                                 \
-		struct hf_test_context ctx = {               \
-			.failures = 0,                       \
-		};                                           \
-		dlog(HF_TEST_LOG_PREFIX "RUN %s\n", #test);  \
-		test(&ctx);                                  \
-		if (ctx.failures) {                          \
-			dlog(HF_TEST_LOG_PREFIX "FAILED\n"); \
-		} else {                                     \
-			dlog(HF_TEST_LOG_PREFIX "OK\n");     \
-		}                                            \
-	} while (0)
diff --git a/test/vm/hftest.c b/test/vm/hftest.c
new file mode 100644
index 0000000..90b5cf0
--- /dev/null
+++ b/test/vm/hftest.c
@@ -0,0 +1,232 @@
+#include "hftest.h"
+
+#include <stdalign.h>
+#include <stdint.h>
+
+#include "hf/fdt.h"
+#include "hf/memiter.h"
+
+alignas(4096) uint8_t kstack[4096];
+
+HFTEST_ENABLE();
+
+extern struct hftest_test hftest_begin[];
+extern struct hftest_test hftest_end[];
+
+static void json(void)
+{
+	struct hftest_test *test;
+	const char *suite = NULL;
+	size_t suites_in_image = 0;
+	size_t tests_in_suite = 0;
+
+	HFTEST_LOG("{");
+	HFTEST_LOG("  \"suites\": [");
+	for (test = hftest_begin; test < hftest_end; ++test) {
+		if (test->suite != suite) {
+			/* Close out previously open suite. */
+			if (tests_in_suite) {
+				HFTEST_LOG("      ]");
+				HFTEST_LOG("    },");
+			}
+			/* Move onto new suite. */
+			++suites_in_image;
+			suite = test->suite;
+			tests_in_suite = 0;
+			HFTEST_LOG("    {");
+			HFTEST_LOG("      \"name\": \"%s\",", test->suite);
+		}
+		if (test->kind == HFTEST_KIND_SET_UP) {
+			HFTEST_LOG("      \"setup\": true,");
+		}
+		if (test->kind == HFTEST_KIND_TEAR_DOWN) {
+			HFTEST_LOG("      \"teardown\": true,");
+		}
+		if (test->kind == HFTEST_KIND_TEST) {
+			if (!tests_in_suite) {
+				HFTEST_LOG("      \"tests\": [");
+			}
+			/* It's easier to put the comma at the start of the line
+			 * than the end even
+			 * though the JSON looks a bit funky. */
+			HFTEST_LOG("       %c\"%s\"",
+				   tests_in_suite ? ',' : ' ', test->name);
+			++tests_in_suite;
+		}
+	}
+	if (tests_in_suite) {
+		HFTEST_LOG("      ]");
+		HFTEST_LOG("    }");
+	}
+	HFTEST_LOG("  ]");
+	HFTEST_LOG("}");
+}
+
+static void run_test(hftest_test_fn set_up, hftest_test_fn test,
+		     hftest_test_fn tear_down)
+{
+	struct hftest_context ctx = {
+		.failures = 0,
+	};
+
+	if (set_up) {
+		set_up(&ctx);
+		if (ctx.failures) {
+			goto fail;
+		}
+	}
+
+	test(&ctx);
+	if (ctx.failures) {
+		goto fail;
+	}
+
+	if (tear_down) {
+		tear_down(&ctx);
+		if (ctx.failures) {
+			goto fail;
+		}
+	}
+
+	HFTEST_LOG("PASS");
+	return;
+
+fail:
+	HFTEST_LOG("FAIL");
+}
+
+static void run(struct memiter *args)
+{
+	struct memiter suite_name;
+	struct memiter test_name;
+	struct hftest_test *test;
+	bool found_suite = false;
+	const char *suite = NULL;
+	hftest_test_fn suite_set_up = NULL;
+	hftest_test_fn suite_tear_down = NULL;
+
+	if (!memiter_parse_str(args, &suite_name)) {
+		HFTEST_LOG("Unable to parse test suite.");
+		return;
+	}
+
+	if (!memiter_parse_str(args, &test_name)) {
+		HFTEST_LOG("Unable to parse test.");
+		return;
+	}
+
+	for (test = hftest_begin; test < hftest_end; ++test) {
+		/* Find the test suite. */
+		if (found_suite) {
+			if (test->suite != suite) {
+				/* Test wasn't in the suite. */
+				break;
+			}
+		} else {
+			if (test->suite == suite) {
+				/* This isn't the right suite so keep going. */
+				continue;
+			}
+			/* Examine a new suite. */
+			suite = test->suite;
+			if (memiter_iseq(&suite_name, test->suite)) {
+				found_suite = true;
+			}
+		}
+
+		switch (test->kind) {
+		/* The first entries in the suite are the set up and tear down
+		 * functions. */
+		case HFTEST_KIND_SET_UP:
+			suite_set_up = test->fn;
+			break;
+		case HFTEST_KIND_TEAR_DOWN:
+			suite_tear_down = test->fn;
+			break;
+		/* Find the test. */
+		case HFTEST_KIND_TEST:
+			if (memiter_iseq(&test_name, test->name)) {
+				run_test(suite_set_up, test->fn,
+					 suite_tear_down);
+				return;
+			}
+			break;
+		}
+	}
+
+	HFTEST_LOG("Unable to find requested tests.");
+}
+
+void help(void)
+{
+	HFTEST_LOG("usage:");
+	HFTEST_LOG("");
+	HFTEST_LOG("  help");
+	HFTEST_LOG("");
+	HFTEST_LOG("    Show this help.");
+	HFTEST_LOG("");
+	HFTEST_LOG("  json");
+	HFTEST_LOG("");
+	HFTEST_LOG(
+		"    Print a directory of test suites and tests in "
+		"JSON "
+		"format.");
+	HFTEST_LOG("");
+	HFTEST_LOG("  run <suite> <test>");
+	HFTEST_LOG("");
+	HFTEST_LOG("    Run the named test from the named test suite.");
+}
+
+void main(const struct fdt_header *fdt)
+{
+	struct fdt_node n;
+	const char *bootargs;
+	uint32_t bootargs_size;
+	struct memiter bootargs_iter;
+	struct memiter command;
+
+	if (!fdt_root_node(&n, fdt)) {
+		HFTEST_LOG("FDT failed validation.");
+		return;
+	}
+
+	if (!fdt_find_child(&n, "")) {
+		HFTEST_LOG("Unable to find root node in FDT.");
+		return;
+	}
+
+	if (!fdt_find_child(&n, "chosen")) {
+		HFTEST_LOG("Unable to find 'chosen' node in FDT.");
+		return;
+	}
+
+	if (!fdt_read_property(&n, "bootargs", &bootargs, &bootargs_size)) {
+		HFTEST_LOG("Unable to read bootargs.");
+		return;
+	}
+
+	/* Remove null terminator. */
+	memiter_init(&bootargs_iter, bootargs, bootargs_size - 1);
+
+	if (!memiter_parse_str(&bootargs_iter, &command)) {
+		HFTEST_LOG("Unable to parse command.");
+		return;
+	}
+
+	if (memiter_iseq(&command, "json")) {
+		json();
+		return;
+	}
+
+	if (memiter_iseq(&command, "run")) {
+		run(&bootargs_iter);
+		return;
+	}
+
+	help();
+}
+
+void kmain(const struct fdt_header *fdt)
+{
+	main(fdt);
+}
diff --git a/test/vm/hftest.h b/test/vm/hftest.h
new file mode 100644
index 0000000..6e22a3e
--- /dev/null
+++ b/test/vm/hftest.h
@@ -0,0 +1,231 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "hf/dlog.h"
+
+/*
+ * Define a set up function to be run before every test in a test suite.
+ */
+#define SET_UP(suite) HFTEST_SET_UP(suite)
+
+/*
+ * Define a tear down function to be run after every test in a test suite.
+ */
+#define TEAR_DOWN(suite) HFTEST_TEAR_DOWN(suite)
+
+/*
+ * Define a test as part of a test suite.
+ */
+#define TEST(suite, test) HFTEST_TEST(suite, test)
+
+/* Assertions. */
+#define ASSERT_EQ(x, y) ASSERT_OP(x, y, ==, true)
+#define ASSERT_NE(x, y) ASSERT_OP(x, y, !=, true)
+#define ASSERT_LE(x, y) ASSERT_OP(x, y, <=, true)
+#define ASSERT_LT(x, y) ASSERT_OP(x, y, <, true)
+#define ASSERT_GE(x, y) ASSERT_OP(x, y, >=, true)
+#define ASSERT_GT(x, y) ASSERT_OP(x, y, >, true)
+
+#define EXPECT_EQ(x, y) ASSERT_OP(x, y, ==, false)
+#define EXPECT_NE(x, y) ASSERT_OP(x, y, !=, false)
+#define EXPECT_LE(x, y) ASSERT_OP(x, y, <=, false)
+#define EXPECT_LT(x, y) ASSERT_OP(x, y, <, false)
+#define EXPECT_GE(x, y) ASSERT_OP(x, y, >=, false)
+#define EXPECT_GT(x, y) ASSERT_OP(x, y, >, false)
+
+/*
+ * This must be used exactly once in a test image to signal to the linker that
+ * the .hftest section is allowed to be included in the generated image.
+ */
+#define HFTEST_ENABLE() int hftest_enable
+
+/*
+ * Prefixed to log lines from tests for easy filtering in the console.
+ */
+#define HFTEST_LOG_PREFIX "[hftest] "
+
+/* Above this point is the public API. Below are the implementation details. */
+
+/* Log with the HFTEST_LOG_PREFIX and a new line. The zero is added so there is
+ * always at least one variadic argument. */
+#define HFTEST_LOG(...) HFTEST_LOG_IMPL(__VA_ARGS__, 0)
+#define HFTEST_LOG_IMPL(format, ...) \
+	dlog("%s" format "\n", HFTEST_LOG_PREFIX, __VA_ARGS__)
+
+/* Helper to wrap the argument in quotes. */
+#define HFTEST_STR(str) #str
+
+/* Sections are names such that when the linker sorts them, all entries for the
+ * same test suite are contiguous and the set up and tear down entries come
+ * before the tests. This order simplifies test discovery in the running image.
+ */
+#define HFTEST_SET_UP_SECTION(suite_name) \
+	HFTEST_STR(.hftest.suite_name .1set_up)
+#define HFTEST_TEAR_DOWN_SECTION(suite_name) \
+	HFTEST_STR(.hftest.suite_name .1tear_down)
+#define HFTEST_TEST_SECTION(suite_name, test_name) \
+	HFTEST_STR(.hftest.suite_name .2test.test_name)
+
+/* Helpers to construct unique identifiers. */
+#define HFTEST_SET_UP_STRUCT(suite_name) hftest_set_up_##suite_name
+#define HFTEST_TEAR_DOWN_STRUCT(suite_name) hftest_tear_down_##suite_name
+#define HFTEST_TEST_STRUCT(suite_name, test_name) \
+	hftest_test_##suite_name##_##test_name
+
+#define HFTEST_SET_UP_FN(suite_name) hftest_set_up_fn_##suite_name
+#define HFTEST_TEAR_DOWN_FN(suite_name) hftest_tear_down_fn_##suite_name
+#define HFTEST_TEST_FN(suite_name, test_name) \
+	hftest_test_fn_##suite_name##_##test_name
+
+/* Register test functions. */
+#define HFTEST_SET_UP(suite_name)                                           \
+	static void HFTEST_SET_UP_FN(suite_name)(struct hftest_context *    \
+						 hftest_ctx);               \
+	const struct hftest_test __attribute__((used))                      \
+		__attribute__((section(HFTEST_SET_UP_SECTION(suite_name)))) \
+			HFTEST_SET_UP_STRUCT(suite_name) = {                \
+				.suite = #suite_name,                       \
+				.kind = HFTEST_KIND_SET_UP,                 \
+				.fn = HFTEST_SET_UP_FN(suite_name),         \
+	};                                                                  \
+	static void HFTEST_SET_UP_FN(suite_name)(                           \
+		__attribute__((unused)) struct hftest_context * hftest_ctx)
+
+#define HFTEST_TEAR_DOWN(suite_name)                                           \
+	static void HFTEST_TEAR_DOWN_FN(suite_name)(struct hftest_context *    \
+						    hftest_ctx);               \
+	const struct hftest_test __attribute__((used))                         \
+		__attribute__((section(HFTEST_TEAR_DOWN_SECTION(suite_name)))) \
+			HFTEST_TEAR_DOWN_STRUCT(suite_name) = {                \
+				.suite = #suite_name,                          \
+				.kind = HFTEST_KIND_TEAR_DOWN,                 \
+				.fn = HFTEST_TEAR_DOWN_FN(suite_name),         \
+	};                                                                     \
+	static void HFTEST_TEAR_DOWN_FN(suite_name)(                           \
+		__attribute__((unused)) struct hftest_context * hftest_ctx)
+
+#define HFTEST_TEST(suite_name, test_name)                                  \
+	static void HFTEST_TEST_FN(                                         \
+		suite_name, test_name)(struct hftest_context * hftest_ctx); \
+	const struct hftest_test __attribute__((used)) __attribute__(       \
+		(section(HFTEST_TEST_SECTION(suite_name, test_name))))      \
+		HFTEST_TEST_STRUCT(suite_name, test_name) = {               \
+			.suite = #suite_name,                               \
+			.kind = HFTEST_KIND_TEST,                           \
+			.name = #test_name,                                 \
+			.fn = HFTEST_TEST_FN(suite_name, test_name),        \
+	};                                                                  \
+	static void HFTEST_TEST_FN(suite_name, test_name)(                  \
+		__attribute__((unused)) struct hftest_context * hftest_ctx)
+
+/* Context for tests. */
+struct hftest_context {
+	uint32_t failures;
+};
+
+/* A test case. */
+typedef void (*hftest_test_fn)(struct hftest_context *);
+
+enum hftest_kind {
+	HFTEST_KIND_SET_UP = 0,
+	HFTEST_KIND_TEST = 1,
+	HFTEST_KIND_TEAR_DOWN = 2,
+};
+
+struct hftest_test {
+	const char *suite;
+	enum hftest_kind kind;
+	const char *name;
+	hftest_test_fn fn;
+};
+
+/*
+ * This union can store any of the primitive types supported by the assertion
+ * macros.
+ *
+ * It does not include pointers as comparison of pointers is not often needed
+ * and could be a mistake for string comparison. If pointer comparison is needed
+ * and explicit assertion such as ASSERT_PTR_EQ() would be more appropriate.
+ */
+union hftest_any {
+	bool b;
+	char c;
+	signed char sc;
+	unsigned char uc;
+	signed short ss;
+	unsigned short us;
+	signed int si;
+	unsigned int ui;
+	signed long int sli;
+	unsigned long int uli;
+	signed long long int slli;
+	unsigned long long int ulli;
+};
+
+/* _Generic formatting doesn't seem to be supported so doing this manually. */
+/* clang-format off */
+
+/* Select the union member to match the type of the expression. */
+#define hftest_any_get(any, x)                      \
+	_Generic((x),                               \
+		bool:                   (any).b,    \
+		char:                   (any).c,    \
+		signed char:            (any).sc,   \
+		unsigned char:          (any).uc,   \
+		signed short:           (any).ss,   \
+		unsigned short:         (any).us,   \
+		signed int:             (any).si,   \
+		unsigned int:           (any).ui,   \
+		signed long int:        (any).sli,  \
+		unsigned long int:      (any).uli,  \
+		signed long long int:   (any).slli, \
+		unsigned long long int: (any).ulli)
+
+/*
+ * dlog format specifier for types. Note, these aren't the standard specifiers
+ * for the types.
+ */
+#define hftest_dlog_format(x)                 \
+	_Generic((x),                         \
+		bool:                   "%u", \
+		char:                   "%c", \
+		signed char:            "%d", \
+		unsigned char:          "%u", \
+		signed short:           "%d", \
+		unsigned short:         "%u", \
+		signed int:             "%d", \
+		unsigned int:           "%u", \
+		signed long int:        "%d", \
+		unsigned long int:      "%u", \
+		signed long long int:   "%d", \
+		unsigned long long int: "%u")
+
+/* clang-format on */
+
+#define ASSERT_OP(lhs, rhs, op, fatal)                                         \
+	do {                                                                   \
+		union hftest_any lhs_value;                                    \
+		union hftest_any rhs_value;                                    \
+		hftest_any_get(lhs_value, lhs) = (lhs);                        \
+		hftest_any_get(rhs_value, rhs) = (rhs);                        \
+		if (!(hftest_any_get(lhs_value, lhs)                           \
+			      op hftest_any_get(rhs_value, rhs))) {            \
+			++hftest_ctx->failures;                                \
+			dlog(HFTEST_LOG_PREFIX "  %s:%u: Failure\n", __FILE__, \
+			     __LINE__);                                        \
+			dlog(HFTEST_LOG_PREFIX "    %s %s %s (%s=", #lhs, #op, \
+			     #rhs, #lhs);                                      \
+			dlog(hftest_dlog_format(lhs),                          \
+			     hftest_any_get(lhs_value, lhs));                  \
+			dlog(", %s=", #rhs);                                   \
+			dlog(hftest_dlog_format(rhs),                          \
+			     hftest_any_get(rhs_value, rhs));                  \
+			dlog(")\n");                                           \
+			if (fatal) {                                           \
+				return;                                        \
+			}                                                      \
+		}                                                              \
+	} while (0)
diff --git a/test/vm/hftest.py b/test/vm/hftest.py
new file mode 100755
index 0000000..6cf8e97
--- /dev/null
+++ b/test/vm/hftest.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+"""Run tests.
+
+Runs tests on QEMU.
+"""
+
+from __future__ import print_function
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+
+
+def qemu(hafnium, initrd, args, log):
+    qemu_args = [
+        "timeout", "--foreground", "5s",
+        "./prebuilts/linux-x64/qemu/qemu-system-aarch64", "-M", "virt", "-cpu",
+        "cortex-a57", "-m", "8M", "-machine", "virtualization=true",
+        "-nographic", "-nodefaults", "-serial", "stdio", "-kernel", hafnium,
+        "-initrd", initrd
+    ]
+    if args:
+        qemu_args += ["-append", args]
+    # Save the log to a file.
+    with open(log, "w") as f:
+        f.write("$ {}\r\n".format(" ".join(qemu_args)))
+        f.flush()
+        subprocess.check_call(qemu_args, stdout=f, stderr=f)
+    # Return that log for processing.
+    with open(log, "r") as f:
+        return f.read()
+
+
+def ensure_dir(path):
+    try:
+        os.makedirs(path)
+    except OSError:
+        if not os.path.isdir(path):
+            raise
+
+
+def hftest_lines(raw):
+    prefix = "[hftest] "
+    return [
+        line[len(prefix):]
+        for line in raw.splitlines()
+        if line.startswith(prefix)
+    ]
+
+
+def Main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--out", required=True)
+    parser.add_argument("--initrd", required=True)
+    parser.add_argument("--suite")
+    parser.add_argument("--test")
+    args = parser.parse_args()
+    # Resolve some paths.
+    hafnium = os.path.join(args.out, "hafnium.bin")
+    initrd = os.path.join(args.out, "initrd", args.initrd + ".img")
+    log = os.path.join(args.out, "test_log", args.initrd)
+    ensure_dir(log)
+    print("Logs saved under", log)
+    # Query the tests in the image.
+    out = qemu(hafnium, initrd, "json", os.path.join(log, "json.log"))
+    hftest_json = "\n".join(hftest_lines(out))
+    tests = json.loads(hftest_json)
+    # Run the selected tests.
+    tests_run = 0
+    failures = 0
+    suite_re = re.compile(args.suite or ".*")
+    test_re = re.compile(args.test or ".*")
+    for suite in tests['suites']:
+        if not suite_re.match(suite['name']):
+            continue
+        tests_run_from_suite = 0
+        for test in suite['tests']:
+            if not test_re.match(test):
+                continue
+            tests_run_from_suite += 1
+            if tests_run_from_suite == 1:
+                print("    SUITE", suite['name'])
+            print("      RUN", test)
+            test_log = os.path.join(log, suite['name'] + "." + test + ".log")
+            out = qemu(hafnium, initrd, "run {} {}".format(suite['name'], test), test_log)
+            hftest_out = hftest_lines(out)
+            if hftest_out[-1] == "PASS":
+                print("        PASS")
+            else:
+                failures += 1
+                print("[x]     FAIL --", test_log)
+        tests_run += tests_run_from_suite
+    # If none were run, this is probably a mistake.
+    if tests_run == 0:
+        print("Error: no tests match")
+        return 10
+    # Exit with 0 on success and 1 if any test failed.
+    if failures:
+        print("[x] FAIL:", failures, "of", tests_run, "tests failed")
+        return 1
+    else:
+        print("    PASS: all", tests_run, "tests passed")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(Main())
diff --git a/test/vm/primary_only/BUILD.gn b/test/vm/primary_only/BUILD.gn
index b4e1212..821ef0e 100644
--- a/test/vm/primary_only/BUILD.gn
+++ b/test/vm/primary_only/BUILD.gn
@@ -8,7 +8,7 @@
   ]
 
   deps = [
-    "//test/vm:hf_test_vm",
+    "//test/vm:hftest_vm",
   ]
 }
 
diff --git a/test/vm/primary_only/primary.c b/test/vm/primary_only/primary.c
index 03dc95c..18c4709 100644
--- a/test/vm/primary_only/primary.c
+++ b/test/vm/primary_only/primary.c
@@ -1,29 +1,18 @@
-#include <stdint.h>
-
 #include "vmapi/hf/call.h"
 
-#include "../hf_test.h"
+#include "../hftest.h"
 
-uint8_t kstack[4096] __attribute__((aligned(4096)));
-
-TEST(vm_get_count)
+TEST(hf_vm_get_count, no_secondary_vms)
 {
 	EXPECT_EQ(hf_vm_get_count(), 0);
 }
 
-TEST(vcpu_get_count_when_no_secondary_vm)
+TEST(hf_vcpu_get_count, no_secondary_vms)
 {
 	EXPECT_EQ(hf_vcpu_get_count(0), -1);
 }
 
-TEST(vcpu_get_count_for_large_invalid_vm_index)
+TEST(hf_vcpu_get_count, large_invalid_vm_index)
 {
 	EXPECT_EQ(hf_vcpu_get_count(0xffffffff), -1);
 }
-
-void kmain(void)
-{
-	RUN_TEST(vm_get_count);
-	RUN_TEST(vcpu_get_count_when_no_secondary_vm);
-	RUN_TEST(vcpu_get_count_for_large_invalid_vm_index);
-}
diff --git a/test/vm/primary_with_secondary/BUILD.gn b/test/vm/primary_with_secondary/BUILD.gn
index 512ccfd..8b4c489 100644
--- a/test/vm/primary_with_secondary/BUILD.gn
+++ b/test/vm/primary_with_secondary/BUILD.gn
@@ -8,7 +8,7 @@
   ]
 
   deps = [
-    "//test/vm:hf_test_vm",
+    "//test/vm:hftest_vm",
   ]
 }
 
@@ -20,7 +20,7 @@
   ]
 
   deps = [
-    "//test/vm:hf_test_vm",
+    "//test/vm:other_vm",
   ]
 }
 
diff --git a/test/vm/primary_with_secondary/primary.c b/test/vm/primary_with_secondary/primary.c
index 5502904..58d8ba4 100644
--- a/test/vm/primary_with_secondary/primary.c
+++ b/test/vm/primary_with_secondary/primary.c
@@ -6,35 +6,49 @@
 
 #include "vmapi/hf/call.h"
 
-#include "../hf_test.h"
+#include "../hftest.h"
 
-alignas(4096) uint8_t kstack[4096];
-
-alignas(PAGE_SIZE) uint8_t send_page[PAGE_SIZE];
-alignas(PAGE_SIZE) uint8_t recv_page[PAGE_SIZE];
+static alignas(PAGE_SIZE) uint8_t send_page[PAGE_SIZE];
+static alignas(PAGE_SIZE) uint8_t recv_page[PAGE_SIZE];
 static_assert(sizeof(send_page) == PAGE_SIZE, "Send page is not a page.");
 static_assert(sizeof(recv_page) == PAGE_SIZE, "Recv page is not a page.");
 
-TEST(vm_get_count)
+static hf_ipaddr_t send_page_addr = (hf_ipaddr_t)send_page;
+static hf_ipaddr_t recv_page_addr = (hf_ipaddr_t)recv_page;
+
+/**
+ * Confirm there is 1 secondary VM.
+ */
+TEST(hf_vm_get_count, one_secondary_vm)
 {
 	EXPECT_EQ(hf_vm_get_count(), 1);
 }
 
-TEST(vcpu_get_count_when_no_secondary_vm)
+/**
+ * Confirm there that secondary VM has 1 VCPU.
+ */
+TEST(hf_vcpu_get_count, secondary_has_one_vcpu)
 {
 	EXPECT_EQ(hf_vcpu_get_count(0), 1);
 }
 
-TEST(vcpu_get_count_for_large_invalid_vm_index)
+/**
+ * Confirm it is an error to query how many VCPUs are assigned to a nonexistent
+ * secondary VM.
+ */
+TEST(hf_vcpu_get_count, large_invalid_vm_index)
 {
 	EXPECT_EQ(hf_vcpu_get_count(0xffffffff), -1);
 }
 
-TEST(vm_configure_fails_with_unaligned_pointer)
+/**
+ * The configured send/receive addresses can't be unaligned.
+ */
+TEST(hf_vm_configure, fails_with_unaligned_pointer)
 {
 	uint8_t maybe_aligned[2];
 	hf_ipaddr_t unaligned_addr = (hf_ipaddr_t)&maybe_aligned[1];
-	hf_ipaddr_t aligned_addr = (hf_ipaddr_t)&send_page;
+	hf_ipaddr_t aligned_addr = (hf_ipaddr_t)send_page;
 
 	/* Check the the address is unaligned. */
 	ASSERT_EQ(unaligned_addr & 1, 1);
@@ -44,40 +58,29 @@
 	EXPECT_EQ(hf_vm_configure(unaligned_addr, unaligned_addr), -1);
 }
 
-TEST(vm_configure_fails_with_same_page)
+/**
+ * The configured send/receive addresses can't be the same page.
+ */
+TEST(hf_vm_configure, fails_with_same_page)
 {
-	EXPECT_EQ(
-		hf_vm_configure((hf_ipaddr_t)send_page, (hf_ipaddr_t)send_page),
-		-1);
-	EXPECT_EQ(
-		hf_vm_configure((hf_ipaddr_t)recv_page, (hf_ipaddr_t)recv_page),
-		-1);
+	EXPECT_EQ(hf_vm_configure(send_page_addr, send_page_addr), -1);
+	EXPECT_EQ(hf_vm_configure(recv_page_addr, recv_page_addr), -1);
 }
 
-TEST(vm_configure)
+/**
+ * The configuration of the send/receive addresses can only happen once.
+ */
+TEST(hf_vm_configure, fails_if_already_succeeded)
 {
-	EXPECT_EQ(
-		hf_vm_configure((hf_ipaddr_t)send_page, (hf_ipaddr_t)recv_page),
-		0);
+	EXPECT_EQ(hf_vm_configure(send_page_addr, recv_page_addr), 0);
+	EXPECT_EQ(hf_vm_configure(send_page_addr, recv_page_addr), -1);
 }
 
-TEST(vm_configure_fails_if_already_succeeded)
+/**
+ * The configuration of the send/receive address is successful with valid
+ * arguments.
+ */
+TEST(hf_vm_configure, succeeds)
 {
-	EXPECT_EQ(
-		hf_vm_configure((hf_ipaddr_t)send_page, (hf_ipaddr_t)recv_page),
-		-1);
-}
-
-void kmain(void)
-{
-	RUN_TEST(vm_get_count);
-	RUN_TEST(vcpu_get_count_when_no_secondary_vm);
-	RUN_TEST(vcpu_get_count_for_large_invalid_vm_index);
-
-	/* TODO: the order matters as the configuration can only be set once.
-	 * We'll need to work out how to deal with this better in the tests. */
-	RUN_TEST(vm_configure_fails_with_unaligned_pointer);
-	RUN_TEST(vm_configure_fails_with_same_page);
-	RUN_TEST(vm_configure);
-	RUN_TEST(vm_configure_fails_if_already_succeeded);
+	EXPECT_EQ(hf_vm_configure(send_page_addr, recv_page_addr), 0);
 }
diff --git a/test/vm/primary_with_secondary/secondary.c b/test/vm/primary_with_secondary/secondary.c
index 19f6901..788e310 100644
--- a/test/vm/primary_with_secondary/secondary.c
+++ b/test/vm/primary_with_secondary/secondary.c
@@ -1,10 +1,11 @@
+#include <stdalign.h>
 #include <stdint.h>
 
 #include "vmapi/hf/call.h"
 
-#include "../hf_test.h"
+#include "../hftest.h"
 
-uint8_t kstack[4096] __attribute__((aligned(4096)));
+alignas(4096) uint8_t kstack[4096];
 
 void kmain(void)
 {