Merge "README: link to the design doc"
diff --git a/inc/cpio.h b/inc/cpio.h
index a563ae6..ad6f4b7 100644
--- a/inc/cpio.h
+++ b/inc/cpio.h
@@ -4,19 +4,9 @@
 #include <stdbool.h>
 #include <stddef.h>
 
-struct cpio {
-	const struct cpio_header *first;
-	size_t total_size;
-};
+#include "memiter.h"
 
-struct cpio_iter {
-	const struct cpio_header *cur;
-	size_t size_left;
-};
-
-void cpio_init(struct cpio *c, const void *buf, size_t size);
-void cpio_init_iter(struct cpio *c, struct cpio_iter *iter);
-bool cpio_next(struct cpio_iter *iter, const char **name, const void **contents,
+bool cpio_next(struct memiter *iter, const char **name, const void **contents,
 	       size_t *size);
 
 #endif /* _CPIO_H */
diff --git a/inc/load.h b/inc/load.h
index 93aa535..7a41b9a 100644
--- a/inc/load.h
+++ b/inc/load.h
@@ -7,7 +7,9 @@
 #include "cpio.h"
 #include "memiter.h"
 
-bool load_primary(struct cpio *c, size_t kernel_arg, struct memiter *initrd);
-bool load_secondary(struct cpio *c, uint64_t mem_begin, uint64_t *mem_end);
+bool load_primary(const struct memiter *cpio, size_t kernel_arg,
+		  struct memiter *initrd);
+bool load_secondary(const struct memiter *cpio, uint64_t mem_begin,
+		    uint64_t *mem_end);
 
 #endif /* _LOAD_H */
diff --git a/inc/memiter.h b/inc/memiter.h
index ffaa822..9e066da 100644
--- a/inc/memiter.h
+++ b/inc/memiter.h
@@ -14,5 +14,6 @@
 bool memiter_parse_uint(struct memiter *it, uint64_t *value);
 bool memiter_parse_str(struct memiter *it, struct memiter *str);
 bool memiter_iseq(const struct memiter *it, const char *str);
+bool memiter_advance(struct memiter *it, size_t v);
 
 #endif /* _MEMITER_H */
diff --git a/inc/mm.h b/inc/mm.h
index 3a5c691..7eb3169 100644
--- a/inc/mm.h
+++ b/inc/mm.h
@@ -2,12 +2,13 @@
 #define _MM_H
 
 #include <stdbool.h>
+#include <stdint.h>
 
 #include "arch_mm.h"
 
 struct mm_ptable {
-	struct arch_mm_ptable arch;
 	pte_t *table;
+	uint32_t id;
 };
 
 #define PAGE_SIZE (1 << PAGE_BITS)
@@ -32,13 +33,20 @@
  */
 #define MM_MODE_STAGE1 0x20
 
-bool mm_ptable_init(struct mm_ptable *t, int mode);
-void mm_ptable_dump(struct mm_ptable *t);
+/*
+ * This flag indicates that no TLB invalidations should be issued for the
+ * changes in the page table.
+ */
+#define MM_MODE_NOINVALIDATE 0x40
+
+bool mm_ptable_init(struct mm_ptable *t, uint32_t id, int mode);
+void mm_ptable_dump(struct mm_ptable *t, int mode);
 bool mm_ptable_map(struct mm_ptable *t, vaddr_t begin, vaddr_t end,
 		   paddr_t paddr, int mode);
 bool mm_ptable_map_page(struct mm_ptable *t, vaddr_t va, paddr_t pa, int mode);
 bool mm_ptable_unmap(struct mm_ptable *t, vaddr_t begin, vaddr_t end, int mode);
 void mm_ptable_defrag(struct mm_ptable *t, int mode);
+bool mm_ptable_unmap_hypervisor(struct mm_ptable *t, int mode);
 
 bool mm_init(void);
 bool mm_map(vaddr_t begin, vaddr_t end, paddr_t paddr, int mode);
diff --git a/inc/vm.h b/inc/vm.h
index eb6a386..847d5b7 100644
--- a/inc/vm.h
+++ b/inc/vm.h
@@ -2,15 +2,17 @@
 #define _VM_H
 
 #include "cpu.h"
+#include "mm.h"
 
 struct vm {
-	struct vcpu vcpus[MAX_CPUS];
+	struct mm_ptable ptable;
 	uint32_t vcpu_count;
-	struct arch_page_table page_table;
+	struct vcpu vcpus[MAX_CPUS];
 };
 
-void vm_init(struct vm *vm, uint32_t vcpu_count);
+bool vm_init(struct vm *vm, uint32_t id, uint32_t vcpu_count);
 void vm_start_vcpu(struct vm *vm, size_t index, size_t entry, size_t arg,
 		   bool is_primary);
+void vm_set_current(struct vm *vm);
 
 #endif /* _VM_H */
diff --git a/src/api.c b/src/api.c
index 31ff376..0030473 100644
--- a/src/api.c
+++ b/src/api.c
@@ -49,7 +49,7 @@
 		return HF_VCPU_WAIT_FOR_INTERRUPT;
 	}
 
-	arch_set_vm_mm(&vm->page_table);
+	vm_set_current(vm);
 	*next = vcpu;
 
 	return HF_VCPU_YIELD;
@@ -64,7 +64,7 @@
 	struct vcpu *vcpu = &primary_vm.vcpus[cpu_index(cpu())];
 
 	/* Switch back to primary VM. */
-	arch_set_vm_mm(&primary_vm.page_table);
+	vm_set_current(&primary_vm);
 
 	/*
 	 * Inidicate to primary VM that this vcpu blocked waiting for an
diff --git a/src/arch/aarch64/handler.c b/src/arch/aarch64/handler.c
index 94a9722..6c00067 100644
--- a/src/arch/aarch64/handler.c
+++ b/src/arch/aarch64/handler.c
@@ -92,7 +92,7 @@
 	 * VM. */
 
 	/* Switch back to primary VM, interrupts will be handled there. */
-	arch_set_vm_mm(&primary_vm.page_table);
+	vm_set_current(&primary_vm);
 	return &primary_vm.vcpus[cpu_index(cpu())];
 }
 
diff --git a/src/arch/aarch64/inc/arch_cpu.h b/src/arch/aarch64/inc/arch_cpu.h
index c886844..1a3055f 100644
--- a/src/arch/aarch64/inc/arch_cpu.h
+++ b/src/arch/aarch64/inc/arch_cpu.h
@@ -40,12 +40,6 @@
 	} lazy;
 };
 
-struct arch_page_table {
-	alignas(4096) uint64_t first[512];
-	alignas(4096) uint64_t entry0[512];
-	alignas(4096) uint64_t entry1[512];
-};
-
 static inline struct cpu *cpu(void)
 {
 	struct cpu *p;
@@ -123,11 +117,4 @@
 	smc(0xC4000002, 0, 0, 0);
 }
 
-static inline void arch_set_vm_mm(struct arch_page_table *table)
-{
-	__asm volatile("msr vttbr_el2, %0" : : "r"((size_t)table));
-}
-
-void arch_vptable_init(struct arch_page_table *table);
-
 #endif /* _ARCH_CPU_H */
diff --git a/src/arch/aarch64/inc/arch_mm.h b/src/arch/aarch64/inc/arch_mm.h
index 689665c..3473f37 100644
--- a/src/arch/aarch64/inc/arch_mm.h
+++ b/src/arch/aarch64/inc/arch_mm.h
@@ -5,7 +5,7 @@
 #include <stddef.h>
 #include <stdint.h>
 
-/* A phypiscal address. */
+/* A physical address. */
 typedef size_t paddr_t;
 
 /* A virtual address. */
@@ -17,26 +17,6 @@
 #define PAGE_LEVEL_BITS 9
 #define PAGE_BITS 12
 
-struct arch_mm_ptable {
-	int max_level;
-};
-
-/**
- * Initialises the architecture-dependents aspects of the page table.
- */
-static inline void arch_mm_ptable_init(struct arch_mm_ptable *t)
-{
-	t->max_level = 2;
-}
-
-/**
- * Determines the maximum level supported by the given page table.
- */
-static inline int arch_mm_max_level(struct arch_mm_ptable *t)
-{
-	return t->max_level;
-}
-
 /**
  * Converts a physical address to a table PTE.
  *
@@ -173,6 +153,8 @@
 {
 	vaddr_t it;
 
+	/* TODO: This only applies to the current VMID. */
+
 	begin >>= 12;
 	end >>= 12;
 
@@ -188,7 +170,13 @@
 		"dsb ish\n");
 }
 
+static inline void arch_mm_set_vm(uint64_t vmid, paddr_t table)
+{
+	__asm__ volatile("msr vttbr_el2, %0" : : "r"(table | (vmid << 48)));
+}
+
 uint64_t arch_mm_mode_to_attrs(int mode);
-void arch_mm_init(paddr_t table);
+bool arch_mm_init(paddr_t table);
+int arch_mm_max_level(int mode);
 
 #endif /* _ARCH_MM_H */
diff --git a/src/arch/aarch64/mm.c b/src/arch/aarch64/mm.c
index f5dcdf1..c5c1b4f 100644
--- a/src/arch/aarch64/mm.c
+++ b/src/arch/aarch64/mm.c
@@ -1,5 +1,6 @@
 #include "mm.h"
 #include "arch_cpu.h"
+#include "dlog.h"
 #include "msr.h"
 
 /* Keep macro alignment */
@@ -43,43 +44,20 @@
 #define STAGE2_WRITETHROUGH 2ull
 #define STAGE2_WRITEBACK    3ull
 
-#define STAGE2_MEMATTR_NORMAL(outer, inner) (((outer) << 2) | (inner))
+#define STAGE2_MEMATTR_NORMAL(outer, inner) ((((outer) << 2) | (inner)) << 2)
 
 /* The following stage-2 memory attributes for device memory. */
-#define STAGE2_MEMATTR_DEVICE_nGnRnE 0ull
-#define STAGE2_MEMATTR_DEVICE_nGnRE  1ull
-#define STAGE2_MEMATTR_DEVICE_nGRE   2ull
-#define STAGE2_MEMATTR_DEVICE_GRE    3ull
+#define STAGE2_MEMATTR_DEVICE_nGnRnE (0ull << 2)
+#define STAGE2_MEMATTR_DEVICE_nGnRE  (1ull << 2)
+#define STAGE2_MEMATTR_DEVICE_nGRE   (2ull << 2)
+#define STAGE2_MEMATTR_DEVICE_GRE    (3ull << 2)
 
 #define STAGE2_ACCESS_READ  1ull
 #define STAGE2_ACCESS_WRITE 2ull
 
 /* clang-format on */
 
-void arch_vptable_init(struct arch_page_table *table)
-{
-	uint64_t i;
-
-	/* TODO: Check each bit. */
-	for (i = 0; i < 512; i++) {
-		table->entry0[i] =
-			1 | (i << 30) | /* Address */
-			(1 << 10) |     /* Access flag. */
-			(0 << 8) |  /* sh: non-shareable. this preserves EL1. */
-			(3 << 6) |  /* rw */
-			(0xf << 2); /* normal mem; preserves EL0/1. */
-		table->entry1[i] =
-			1 | ((i + 512) << 30) | /* Address */
-			(1 << 10) |		/* Access flag. */
-			(0 << 8) |  /* sh: non-shareable. this preserves EL1. */
-			(3 << 6) |  /* rw */
-			(0xf << 2); /* normal mem; preserves EL0/1. */
-		table->first[i] = 0;
-	}
-
-	table->first[0] = (uint64_t)&table->entry0[0] | 3;
-	table->first[1] = (uint64_t)&table->entry1[0] | 3;
-}
+static uint64_t mm_max_s2_level = 2;
 
 uint64_t arch_mm_mode_to_attrs(int mode)
 {
@@ -109,7 +87,12 @@
 	} else {
 		uint64_t access = 0;
 
-		attrs |= STAGE2_AF | STAGE2_SH(OUTER_SHAREABLE);
+		/*
+		 * Non-shareable is the "neutral" share mode, i.e., the
+		 * shareability attribute of stage 1 will determine the actual
+		 * attribute.
+		 */
+		attrs |= STAGE2_AF | STAGE2_SH(NON_SHAREABLE);
 
 		/* Define the read/write bits. */
 		if (mode & MM_MODE_R) {
@@ -129,9 +112,12 @@
 			attrs |= STAGE2_XN(STAGE2_EXECUTE_NONE);
 		}
 
-		/* Define the memory attribute bits. */
+		/*
+		 * Define the memory attribute bits, using the "neutral" values
+		 * for either device or normal memory.
+		 */
 		if (mode & MM_MODE_D) {
-			attrs |= STAGE2_MEMATTR_DEVICE_nGnRnE;
+			attrs |= STAGE2_MEMATTR_DEVICE_GRE;
 		} else {
 			attrs |= STAGE2_MEMATTR_NORMAL(STAGE2_WRITEBACK,
 						       STAGE2_WRITEBACK);
@@ -141,16 +127,68 @@
 	return attrs;
 }
 
-void arch_mm_init(paddr_t table)
+/**
+ * Determines the maximum level supported by the given mode.
+ */
+int arch_mm_max_level(int mode)
 {
-	uint64_t v = (1u << 31) | /* RES1. */
-		     (4 << 16) |  /* PS: 44 bits. */
-		     (0 << 14) |  /* TG0: 4 KB granule. */
-		     (3 << 12) |  /* SH0: inner shareable. */
-		     (1 << 10) |  /* ORGN0: normal, cacheable ... */
-		     (1 << 8) |   /* IRGN0: normal, cacheable ... */
-		     (2 << 6) |   /* SL0: Start at level 0. */
-		     (20 << 0);   /* T0SZ: 44-bit input address size. */
+	if (mode & MM_MODE_STAGE1) {
+		/*
+		 * For stage 1 we hard-code this to 2 for now so that we can
+		 * save one page table level at the expense of limiting the
+		 * physical memory to 512GB.
+		 */
+		return 2;
+	}
+
+	return mm_max_s2_level;
+}
+
+bool arch_mm_init(paddr_t table)
+{
+	static const int pa_bits_table[16] = {32, 36, 40, 42, 44, 48};
+	uint64_t features = read_msr(id_aa64mmfr0_el1);
+	uint64_t v;
+	int pa_bits = pa_bits_table[features & 0xf];
+	int sl0;
+
+	/* Check that 4KB granules are supported. */
+	if ((features >> 28) & 0xf) {
+		dlog("4KB granules are not supported\n");
+		return false;
+	}
+
+	/* Check the physical address range. */
+	if (!pa_bits) {
+		dlog("Unsupportes value of id_aa64mmfr0_el1.PARange: %x\n",
+		     features & 0xf);
+		return false;
+	}
+
+	dlog("Supported bits in physical address: %d\n", pa_bits);
+
+	/*
+	 * Determine sl0 based on the number of bits. The maximum value is given
+	 * in D4-7 of the ARM arm.
+	 */
+	if (pa_bits >= 44) {
+		mm_max_s2_level = 3;
+		sl0 = 2;
+	} else {
+		mm_max_s2_level = 2;
+		sl0 = 1;
+	}
+
+	dlog("Number of page table levels: %d\n", mm_max_s2_level + 1);
+
+	v = (1u << 31) |	       /* RES1. */
+	    ((features & 0xf) << 16) | /* PS, matching features. */
+	    (0 << 14) |		       /* TG0: 4 KB granule. */
+	    (3 << 12) |		       /* SH0: inner shareable. */
+	    (1 << 10) |		       /* ORGN0: normal, cacheable ... */
+	    (1 << 8) |		       /* IRGN0: normal, cacheable ... */
+	    (sl0 << 6) |	       /* SL0. */
+	    ((64 - pa_bits) << 0);     /* T0SZ: dependent on PS. */
 	write_msr(vtcr_el2, v);
 
 	/*
@@ -166,10 +204,10 @@
 	/*
 	 * Configure tcr_el2.
 	 */
-	v = (1 << 20) | /* TBI, top byte ignored. */
-	    (2 << 16) | /* PS, Physical Address Size, 40 bits, 1TB. */
-	    (0 << 14) | /* TG0, granule size, 4KB. */
-	    (3 << 12) | /* SH0, inner shareable. */
+	v = (1 << 20) |		       /* TBI, top byte ignored. */
+	    ((features & 0xf) << 16) | /* PS. */
+	    (0 << 14) |		       /* TG0, granule size, 4KB. */
+	    (3 << 12) |		       /* SH0, inner shareable. */
 	    (1 << 10) | /* ORGN0, normal mem, WB RA WA Cacheable. */
 	    (1 << 8) |  /* IRGN0, normal mem, WB RA WA Cacheable. */
 	    (25 << 0) | /* T0SZ, input address is 2^39 bytes. */
@@ -185,7 +223,7 @@
 	    (1 << 12) | /* I, instruction cache enable. */
 	    (1 << 16) | /* RES1 bit. */
 	    (1 << 18) | /* RES1 bit. */
-	    (1 << 19) | /* WXN bit, writable execute never . */
+	    (1 << 19) | /* WXN bit, writable execute never. */
 	    (3 << 22) | /* RES1 bits. */
 	    (3 << 28) | /* RES1 bits. */
 	    0;
@@ -194,4 +232,6 @@
 	__asm volatile("isb");
 	write_msr(sctlr_el2, v);
 	__asm volatile("isb");
+
+	return true;
 }
diff --git a/src/cpio.c b/src/cpio.c
index 2a00837..c6db268 100644
--- a/src/cpio.c
+++ b/src/cpio.c
@@ -20,63 +20,47 @@
 };
 #pragma pack(pop)
 
-void cpio_init(struct cpio *c, const void *buf, size_t size)
-{
-	c->first = buf;
-	c->total_size = size;
-}
-
-void cpio_init_iter(struct cpio *c, struct cpio_iter *iter)
-{
-	iter->cur = c->first;
-	iter->size_left = c->total_size;
-}
-
-bool cpio_next(struct cpio_iter *iter, const char **name, const void **contents,
+/**
+ * Retrieves the next file stored in the cpio archive stored in the cpio, and
+ * advances the iterator such that another call to this function would return
+ * the following file.
+ */
+bool cpio_next(struct memiter *iter, const char **name, const void **contents,
 	       size_t *size)
 {
-	const struct cpio_header *h = iter->cur;
-	size_t size_left;
-	size_t filelen;
-	size_t namelen;
+	size_t len;
+	struct memiter lit = *iter;
+	const struct cpio_header *h = (const struct cpio_header *)lit.next;
 
-	size_left = iter->size_left;
-	if (size_left < sizeof(struct cpio_header)) {
+	if (!memiter_advance(&lit, sizeof(struct cpio_header))) {
 		return false;
 	}
 
+	*name = lit.next;
+
 	/* TODO: Check magic. */
 
-	size_left -= sizeof(struct cpio_header);
-	namelen = (h->namesize + 1) & ~1;
-	if (size_left < namelen) {
+	len = (h->namesize + 1) & ~1;
+	if (!memiter_advance(&lit, len)) {
 		return false;
 	}
 
-	size_left -= namelen;
-	filelen = (size_t)h->filesize[0] << 16 | h->filesize[1];
-	if (size_left < filelen) {
+	*contents = lit.next;
+
+	len = (size_t)h->filesize[0] << 16 | h->filesize[1];
+	if (!memiter_advance(&lit, (len + 1) & ~1)) {
 		return false;
 	}
 
 	/* TODO: Check that string is null-terminated. */
-	/* TODO: Check that trailler is not returned. */
 
 	/* Stop enumerating files when we hit the end marker. */
-	if (!strcmp((const char *)(iter->cur + 1), "TRAILER!!!")) {
+	if (!strcmp(*name, "TRAILER!!!")) {
 		return false;
 	}
 
-	size_left -= filelen;
-
-	*name = (const char *)(iter->cur + 1);
-	*contents = *name + namelen;
-	*size = filelen;
-
-	iter->cur = (struct cpio_header *)((char *)*contents + filelen);
-	iter->cur =
-		(struct cpio_header *)(char *)(((size_t)iter->cur + 1) & ~1);
-	iter->size_left = size_left;
+	*size = len;
+	*iter = lit;
 
 	return true;
 }
diff --git a/src/load.c b/src/load.c
index e0949d6..1b75008 100644
--- a/src/load.c
+++ b/src/load.c
@@ -44,15 +44,14 @@
  * null-terminated, so we use a memory iterator to represent it. The file, if
  * found, is returned in the "it" argument.
  */
-static bool memiter_find_file(struct cpio *c, const struct memiter *filename,
+static bool memiter_find_file(const struct memiter *cpio,
+			      const struct memiter *filename,
 			      struct memiter *it)
 {
 	const char *fname;
 	const void *fcontents;
 	size_t fsize;
-	struct cpio_iter iter;
-
-	cpio_init_iter(c, &iter);
+	struct memiter iter = *cpio;
 
 	while (cpio_next(&iter, &fname, &fcontents, &fsize)) {
 		if (memiter_iseq(filename, fname)) {
@@ -68,14 +67,13 @@
  * Looks for a file in the given cpio archive. The file, if found, is returned
  * in the "it" argument.
  */
-static bool find_file(struct cpio *c, const char *name, struct memiter *it)
+static bool find_file(const struct memiter *cpio, const char *name,
+		      struct memiter *it)
 {
 	const char *fname;
 	const void *fcontents;
 	size_t fsize;
-	struct cpio_iter iter;
-
-	cpio_init_iter(c, &iter);
+	struct memiter iter = *cpio;
 
 	while (cpio_next(&iter, &fname, &fcontents, &fsize)) {
 		if (!strcmp(fname, name)) {
@@ -90,11 +88,12 @@
 /**
  * Loads the primary VM.
  */
-bool load_primary(struct cpio *c, size_t kernel_arg, struct memiter *initrd)
+bool load_primary(const struct memiter *cpio, size_t kernel_arg,
+		  struct memiter *initrd)
 {
 	struct memiter it;
 
-	if (!find_file(c, "vmlinuz", &it)) {
+	if (!find_file(cpio, "vmlinuz", &it)) {
 		dlog("Unable to find vmlinuz\n");
 		return false;
 	}
@@ -104,7 +103,7 @@
 		return false;
 	}
 
-	if (!find_file(c, "initrd.img", initrd)) {
+	if (!find_file(cpio, "initrd.img", initrd)) {
 		dlog("Unable to find initrd.img\n");
 		return false;
 	}
@@ -112,11 +111,31 @@
 	{
 		size_t tmp = (size_t)&load_primary;
 		tmp = (tmp + 0x80000 - 1) & ~(0x80000 - 1);
-		vm_init(&primary_vm, MAX_CPUS);
+		if (!vm_init(&primary_vm, 0, MAX_CPUS)) {
+			dlog("Unable to initialise primary vm\n");
+			return false;
+		}
+
+		/* Map the 1TB of memory. */
+		/* TODO: We should do a whitelist rather than a blacklist. */
+		if (!mm_ptable_map(&primary_vm.ptable, 0,
+				   1024ull * 1024 * 1024 * 1024, 0,
+				   MM_MODE_R | MM_MODE_W | MM_MODE_X |
+					   MM_MODE_NOINVALIDATE)) {
+			dlog("Unable to initialise memory for primary vm\n");
+			return false;
+		}
+
+		if (!mm_ptable_unmap_hypervisor(&primary_vm.ptable,
+						MM_MODE_NOINVALIDATE)) {
+			dlog("Unable to unmap hypervisor from primary vm\n");
+			return false;
+		}
+
 		vm_start_vcpu(&primary_vm, 0, tmp, kernel_arg, true);
 	}
 
-	arch_set_vm_mm(&primary_vm.page_table);
+	vm_set_current(&primary_vm);
 
 	return true;
 }
@@ -126,7 +145,8 @@
  * reflect the fact that some of the memory isn't available to the primary VM
  * anymore.
  */
-bool load_secondary(struct cpio *c, uint64_t mem_begin, uint64_t *mem_end)
+bool load_secondary(const struct memiter *cpio, uint64_t mem_begin,
+		    uint64_t *mem_end)
 {
 	struct memiter it;
 	struct memiter str;
@@ -134,22 +154,27 @@
 	uint64_t cpu;
 	uint32_t count;
 
-	if (!find_file(c, "vms.txt", &it)) {
+	if (!find_file(cpio, "vms.txt", &it)) {
 		dlog("vms.txt is missing\n");
 		return true;
 	}
 
+	/* Round the last address down to the page size. */
+	*mem_end &= ~(PAGE_SIZE - 1);
+
 	for (count = 0;
 	     memiter_parse_uint(&it, &mem) && memiter_parse_uint(&it, &cpu) &&
 	     memiter_parse_str(&it, &str) && count < MAX_VMS;
 	     count++) {
 		struct memiter kernel;
 
-		if (!memiter_find_file(c, &str, &kernel)) {
+		if (!memiter_find_file(cpio, &str, &kernel)) {
 			dlog("Unable to load kernel for vm %u\n", count);
 			continue;
 		}
 
+		/* Round up to page size. */
+		mem = (mem + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
 		if (mem > *mem_end - mem_begin) {
 			dlog("Not enough memory for vm %u (%u bytes)\n", count,
 			     mem);
@@ -170,11 +195,37 @@
 			continue;
 		}
 
+		if (!vm_init(secondary_vm + count, count + 1, cpu)) {
+			dlog("Unable to initialise vm %u\n", count);
+			continue;
+		}
+
+		/* TODO: Remove this. */
+		/* Grant VM access to uart. */
+		mm_ptable_map_page(&secondary_vm[count].ptable, PL011_BASE,
+				   PL011_BASE,
+				   MM_MODE_R | MM_MODE_W | MM_MODE_D |
+					   MM_MODE_NOINVALIDATE);
+
+		/* Grant the VM access to the memory. */
+		if (!mm_ptable_map(&secondary_vm[count].ptable, *mem_end,
+				   *mem_end + mem, *mem_end,
+				   MM_MODE_R | MM_MODE_W | MM_MODE_X |
+					   MM_MODE_NOINVALIDATE)) {
+			dlog("Unable to initialise memory for vm %u\n", count);
+			continue;
+		}
+
+		/* Deny the primary VM access to this memory. */
+		if (!mm_ptable_unmap(&primary_vm.ptable, *mem_end,
+				     *mem_end + mem, MM_MODE_NOINVALIDATE)) {
+			dlog("Unable to unmap secondary VM from primary VM\n");
+			return false;
+		}
+
 		dlog("Loaded VM%u with %u vcpus, entry at 0x%x\n", count, cpu,
 		     *mem_end);
 
-		/* TODO: Update page table to reflect the memory range. */
-		vm_init(secondary_vm + count, cpu);
 		vm_start_vcpu(secondary_vm + count, 0, *mem_end, 0, false);
 	}
 
diff --git a/src/main.c b/src/main.c
index 7792c8a..8862ba8 100644
--- a/src/main.c
+++ b/src/main.c
@@ -13,7 +13,7 @@
 #include "std.h"
 #include "vm.h"
 
-char ptable_buf[PAGE_SIZE * 20];
+char ptable_buf[PAGE_SIZE * 40];
 
 /**
  * Blocks the hypervisor.
@@ -47,7 +47,7 @@
 	struct boot_params_update update;
 	uint64_t new_mem_end;
 	struct memiter primary_initrd;
-	struct cpio c;
+	struct memiter cpio;
 
 	dlog("Initialising hafnium\n");
 
@@ -73,16 +73,19 @@
 		panic("unable to map initrd in");
 	}
 
-	cpio_init(&c, (void *)params.initrd_begin,
-		  params.initrd_end - params.initrd_begin);
+	memiter_init(&cpio, (void *)params.initrd_begin,
+		     params.initrd_end - params.initrd_begin);
 
 	/* Load all VMs. */
 	new_mem_end = params.mem_end;
-	load_secondary(&c, params.mem_begin, &new_mem_end);
-	if (!load_primary(&c, params.kernel_arg, &primary_initrd)) {
+	if (!load_primary(&cpio, params.kernel_arg, &primary_initrd)) {
 		panic("unable to load primary VM");
 	}
 
+	if (!load_secondary(&cpio, params.mem_begin, &new_mem_end)) {
+		panic("unable to load secondary VMs");
+	}
+
 	/* Prepare to run by updating bootparams as seens by primary VM. */
 	update.initrd_begin = (paddr_t)primary_initrd.next;
 	update.initrd_end = (paddr_t)primary_initrd.limit;
diff --git a/src/memiter.c b/src/memiter.c
index d3d37d2..6dd9d8c 100644
--- a/src/memiter.c
+++ b/src/memiter.c
@@ -101,3 +101,18 @@
 
 	return true;
 }
+
+/**
+ * Advances the iterator by the given number of bytes. Returns true if the
+ * iterator was advanced without going over its limit; returns false and leaves
+ * the iterator unmodified otherwise.
+ */
+bool memiter_advance(struct memiter *it, size_t v)
+{
+	const char *p = it->next + v;
+	if (p < it->next || p > it->limit) {
+		return false;
+	}
+	it->next = p;
+	return true;
+}
diff --git a/src/mm.c b/src/mm.c
index 236e763..afdbf50 100644
--- a/src/mm.c
+++ b/src/mm.c
@@ -14,6 +14,12 @@
 
 /* clang-format on */
 
+extern char text_begin[];
+extern char text_end[];
+extern char rodata_begin[];
+extern char rodata_end[];
+extern char data_begin[];
+extern char data_end[];
 static struct mm_ptable ptable;
 
 /**
@@ -128,7 +134,7 @@
 static bool mm_map_level(vaddr_t va, vaddr_t va_end, paddr_t pa, uint64_t attrs,
 			 pte_t *table, int level, int flags)
 {
-	size_t i = mm_index(va, level);
+	pte_t *pte = table + mm_index(va, level);
 	vaddr_t va_level_end = mm_level_end(va, level);
 	size_t entry_size = mm_entry_size(level);
 	bool commit = flags & MAP_FLAG_COMMIT;
@@ -143,21 +149,20 @@
 	while (va < va_end) {
 		if (level == 0) {
 			if (commit) {
-				table[i] = arch_mm_pa_to_page_pte(pa, attrs);
+				*pte = arch_mm_pa_to_page_pte(pa, attrs);
 			}
 		} else if ((va_end - va) >= entry_size &&
 			   arch_mm_is_block_allowed(level) &&
 			   (va & (entry_size - 1)) == 0) {
 			if (commit) {
-				pte_t pte = table[i];
-				table[i] = arch_mm_pa_to_block_pte(pa, attrs);
+				pte_t v = *pte;
+				*pte = arch_mm_pa_to_block_pte(pa, attrs);
 				/* TODO: Add barrier. How do we ensure this
 				 * isn't in use by another CPU? Send IPI? */
-				mm_free_page_pte(pte, level, sync);
+				mm_free_page_pte(v, level, sync);
 			}
 		} else {
-			pte_t *nt =
-				mm_populate_table_pte(table + i, level, sync);
+			pte_t *nt = mm_populate_table_pte(pte, level, sync);
 			if (!nt) {
 				return false;
 			}
@@ -170,7 +175,7 @@
 
 		va = (va + entry_size) & ~(entry_size - 1);
 		pa = (pa + entry_size) & ~(entry_size - 1);
-		i++;
+		pte++;
 	}
 
 	return true;
@@ -198,7 +203,7 @@
 {
 	uint64_t attrs = arch_mm_mode_to_attrs(mode);
 	int flags = (mode & MM_MODE_NOSYNC) ? 0 : MAP_FLAG_SYNC;
-	int level = arch_mm_max_level(&t->arch);
+	int level = arch_mm_max_level(mode);
 
 	begin = arch_mm_clear_va(begin);
 	end = arch_mm_clear_va(end + PAGE_SIZE - 1);
@@ -217,7 +222,9 @@
 		     flags | MAP_FLAG_COMMIT);
 
 	/* Invalidate the tlb. */
-	mm_invalidate_tlb(begin, end, (mode & MM_MODE_STAGE1) != 0);
+	if (!(mode & MM_MODE_NOINVALIDATE)) {
+		mm_invalidate_tlb(begin, end, (mode & MM_MODE_STAGE1) != 0);
+	}
 
 	return true;
 }
@@ -229,7 +236,7 @@
 bool mm_ptable_unmap(struct mm_ptable *t, vaddr_t begin, vaddr_t end, int mode)
 {
 	int flags = (mode & MM_MODE_NOSYNC) ? 0 : MAP_FLAG_SYNC;
-	int level = arch_mm_max_level(&t->arch);
+	int level = arch_mm_max_level(mode);
 
 	begin = arch_mm_clear_va(begin);
 	end = arch_mm_clear_va(end + PAGE_SIZE - 1);
@@ -243,7 +250,9 @@
 		     flags | MAP_FLAG_COMMIT);
 
 	/* Invalidate the tlb. */
-	mm_invalidate_tlb(begin, end, (mode & MM_MODE_STAGE1) != 0);
+	if (!(mode & MM_MODE_NOINVALIDATE)) {
+		mm_invalidate_tlb(begin, end, (mode & MM_MODE_STAGE1) != 0);
+	}
 
 	return true;
 }
@@ -262,7 +271,7 @@
 	va = arch_mm_clear_va(va);
 	pa = arch_mm_clear_pa(pa);
 
-	for (i = arch_mm_max_level(&t->arch); i > 0; i--) {
+	for (i = arch_mm_max_level(mode); i > 0; i--) {
 		table = mm_populate_table_pte(table + mm_index(va, i), i, sync);
 		if (!table) {
 			return false;
@@ -301,9 +310,9 @@
 /**
  * Write the given table to the debug log.
  */
-void mm_ptable_dump(struct mm_ptable *t)
+void mm_ptable_dump(struct mm_ptable *t, int mode)
 {
-	int max_level = arch_mm_max_level(&t->arch);
+	int max_level = arch_mm_max_level(mode);
 	mm_dump_table_recursive(t->table, max_level, max_level);
 }
 
@@ -319,9 +328,22 @@
 }
 
 /**
+ * Unmaps the hypervisor pages from the given page table.
+ */
+bool mm_ptable_unmap_hypervisor(struct mm_ptable *t, int mode)
+{
+	/* TODO: If we add pages dynamically, they must be included here too. */
+	return mm_ptable_unmap(t, (vaddr_t)text_begin, (vaddr_t)text_end,
+			       mode) &&
+	       mm_ptable_unmap(t, (vaddr_t)rodata_begin, (vaddr_t)rodata_end,
+			       mode) &&
+	       mm_ptable_unmap(t, (vaddr_t)data_begin, (vaddr_t)data_end, mode);
+}
+
+/**
  * Initialises the given page table.
  */
-bool mm_ptable_init(struct mm_ptable *t, int mode)
+bool mm_ptable_init(struct mm_ptable *t, uint32_t id, int mode)
 {
 	size_t i;
 	pte_t *table;
@@ -341,7 +363,7 @@
 	}
 
 	t->table = table;
-	arch_mm_ptable_init(&t->arch);
+	t->id = id;
 
 	return true;
 }
@@ -370,18 +392,11 @@
  */
 bool mm_init(void)
 {
-	extern char text_begin[];
-	extern char text_end[];
-	extern char rodata_begin[];
-	extern char rodata_end[];
-	extern char data_begin[];
-	extern char data_end[];
-
 	dlog("text: 0x%x - 0x%x\n", text_begin, text_end);
 	dlog("rodata: 0x%x - 0x%x\n", rodata_begin, rodata_end);
 	dlog("data: 0x%x - 0x%x\n", data_begin, data_end);
 
-	if (!mm_ptable_init(&ptable, MM_MODE_NOSYNC | MM_MODE_STAGE1)) {
+	if (!mm_ptable_init(&ptable, 0, MM_MODE_NOSYNC | MM_MODE_STAGE1)) {
 		dlog("Unable to allocate memory for page table.\n");
 		return false;
 	}
@@ -402,9 +417,7 @@
 	mm_map((vaddr_t)data_begin, (vaddr_t)data_end, (paddr_t)data_begin,
 	       MM_MODE_R | MM_MODE_W | MM_MODE_NOSYNC);
 
-	arch_mm_init((paddr_t)ptable.table);
-
-	return true;
+	return arch_mm_init((paddr_t)ptable.table);
 }
 
 /**
diff --git a/src/vm.c b/src/vm.c
index 0b5101c..b1d26c9 100644
--- a/src/vm.c
+++ b/src/vm.c
@@ -2,7 +2,7 @@
 
 #include "cpu.h"
 
-void vm_init(struct vm *vm, uint32_t vcpu_count)
+bool vm_init(struct vm *vm, uint32_t id, uint32_t vcpu_count)
 {
 	uint32_t i;
 
@@ -13,7 +13,7 @@
 		vcpu_init(vm->vcpus + i, vm);
 	}
 
-	arch_vptable_init(&vm->page_table);
+	return mm_ptable_init(&vm->ptable, id, 0);
 }
 
 /* TODO: Shall we use index or id here? */
@@ -26,3 +26,8 @@
 		vcpu_on(vcpu);
 	}
 }
+
+void vm_set_current(struct vm *vm)
+{
+	arch_mm_set_vm(vm->ptable.id, (paddr_t)vm->ptable.table);
+}