Enable MMU in EL2.
diff --git a/inc/alloc.h b/inc/alloc.h
index b3fc110..4277495 100644
--- a/inc/alloc.h
+++ b/inc/alloc.h
@@ -7,5 +7,6 @@
 void *halloc(size_t size);
 void hfree(void *ptr);
 void *halloc_aligned(size_t size, size_t align);
+void *halloc_aligned_nosync(size_t size, size_t align);
 
 #endif  /* _ALLOC_H */
diff --git a/inc/fdt.h b/inc/fdt.h
index 740d345..fcc1716 100644
--- a/inc/fdt.h
+++ b/inc/fdt.h
@@ -2,6 +2,7 @@
 #define _FDT_H
 
 #include <stdbool.h>
+#include <stddef.h>
 #include <stdint.h>
 
 struct fdt_node {
@@ -12,6 +13,8 @@
 	const char *strs;
 };
 
+size_t fdt_header_size(void);
+size_t fdt_total_size(struct fdt_header *hdr);
 void fdt_dump(struct fdt_header *hdr);
 void fdt_root_node(struct fdt_node *node, const struct fdt_header *hdr);
 bool fdt_find_child(struct fdt_node *node, const char *child);
diff --git a/inc/mm.h b/inc/mm.h
new file mode 100644
index 0000000..1af7496
--- /dev/null
+++ b/inc/mm.h
@@ -0,0 +1,43 @@
+#ifndef _MM_H
+#define _MM_H
+
+#include <stdbool.h>
+
+#include "arch_mm.h"
+
+struct mm_ptable {
+	struct arch_mm_ptable arch;
+	pte_t *table;
+};
+
+#define PAGE_SIZE (1 << PAGE_BITS)
+
+/* The following are arch-independent page mapping modes. */
+#define MM_MODE_R 0x01 /* read */
+#define MM_MODE_W 0x02 /* write */
+#define MM_MODE_X 0x04 /* execute */
+#define MM_MODE_D 0x08 /* device */
+
+/*
+ * This flag indicates that memory allocation must not use locks. This is
+ * relevant in systems where interlocked operations are only available after
+ * virtual memory is enabled.
+ */
+#define MM_MODE_NOSYNC 0x10
+
+/*
+ * This flag indicates that the mapping is intended to be used in a first
+ * stage translation table, which might have different encodings for the
+ * attribute bits than the second stage table.
+ */
+#define MM_MODE_STAGE1 0x20
+
+bool mm_ptable_init(struct mm_ptable *t, int mode);
+void mm_ptable_dump(struct mm_ptable *t);
+bool mm_ptable_map(struct mm_ptable *t, vaddr_t vaddr_begin, vaddr_t vaddr_end,
+		   paddr_t paddr, int mode);
+bool mm_ptable_map_page(struct mm_ptable *t, vaddr_t va, paddr_t pa, int mode);
+bool mm_ptable_unmap(struct mm_ptable *t, vaddr_t begin, vaddr_t end, int mode);
+void mm_ptable_defrag(struct mm_ptable *t);
+
+#endif  /* _MM_H */
diff --git a/src/alloc.c b/src/alloc.c
index cd7c614..b9dc585 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -7,7 +7,7 @@
 static size_t alloc_limit;
 static struct spinlock alloc_lock = SPINLOCK_INIT;
 
-/*
+/**
  * Initializes the allocator.
  */
 void halloc_init(size_t base, size_t size)
@@ -16,16 +16,16 @@
 	alloc_limit = base + size;
 }
 
-/*
- * Allocates the requested amount of memory. Return NULL when there isn't enough
- * free memory.
+/**
+ * Allocates the requested amount of memory. Returns NULL when there isn't
+ * enough free memory.
  */
 void *halloc(size_t size)
 {
 	return halloc_aligned(size, 2 * sizeof(size_t));
 }
 
-/*
+/**
  * Frees the provided memory.
  *
  * Currently unimplemented.
@@ -35,7 +35,7 @@
 	dlog("Attempted to free pointer %p\n", ptr);
 }
 
-/*
+/**
  * Allocates the requested amount of memory, with the requested alignment.
  *
  * Alignment must be a power of two. Returns NULL when there isn't enough free
@@ -43,10 +43,27 @@
  */
 void *halloc_aligned(size_t size, size_t align)
 {
-	size_t begin;
-	size_t end;
+	void *ret;
 
 	sl_lock(&alloc_lock);
+	ret = halloc_aligned_nosync(size, align);
+	sl_unlock(&alloc_lock);
+
+	return ret;
+}
+
+/**
+ * Allocates the requested amount of memory, with the requested alignment, but
+ * no synchronisation with other CPUs. The caller is responsible for serialising
+ * all such calls.
+ *
+ * Alignment must be a power of two. Returns NULL when there isn't enough free
+ * memory.
+ */
+void *halloc_aligned_nosync(size_t size, size_t align)
+{
+	size_t begin;
+	size_t end;
 
 	begin = (alloc_base + align - 1) & ~(align - 1);
 	end = begin + size;
@@ -57,7 +74,5 @@
 	else
 		begin = 0;
 
-	sl_unlock(&alloc_lock);
-
 	return (void *)begin;
 }
diff --git a/src/arch/aarch64/entry.S b/src/arch/aarch64/entry.S
index 3c8f860..7f8a4e1 100644
--- a/src/arch/aarch64/entry.S
+++ b/src/arch/aarch64/entry.S
@@ -1,14 +1,11 @@
 #include "offsets.h"
 
-#define PECOFF_FILE_ALIGNMENT 0x200
-
 .section .init.entry, "ax"
 
 .global entry
 entry:
-
-	add x13, x18, #0x16
 	b 0f
+	.word 0
 	.quad 4096        /* text_offset */
 	.quad file_size   /* image_size */
 	.quad 0           /* flags */
diff --git a/src/arch/aarch64/handler.c b/src/arch/aarch64/handler.c
index 48de85d..27ceacf 100644
--- a/src/arch/aarch64/handler.c
+++ b/src/arch/aarch64/handler.c
@@ -19,7 +19,21 @@
 
 void sync_current_exception(uint64_t esr, uint64_t elr)
 {
-	dlog("Exception: esr=%#x, elr=%#x\n", esr, elr);
+	switch (esr >> 26) {
+	case 0x25: /* EC = 100101, Data abort. */
+		dlog("Data abort: pc=0x%x, esr=0x%x, ec=0x%x", elr, esr, esr >> 26);
+		if (!(esr & (1u << 10))) /* Check FnV bit. */
+			dlog(", far=0x%x, hpfar=0x%x", read_msr(far_el2), read_msr(hpfar_el2) << 8);
+		else
+			dlog(", far=invalid");
+
+		dlog("\n");
+		for (;;);
+
+	default:
+		dlog("Unknown sync exception pc=0x%x, esr=0x%x, ec=0x%x\n", elr, esr, esr >> 26);
+		for (;;);
+	}
 	for (;;);
 }
 
diff --git a/src/arch/aarch64/inc/arch_mm.h b/src/arch/aarch64/inc/arch_mm.h
new file mode 100644
index 0000000..c65c488
--- /dev/null
+++ b/src/arch/aarch64/inc/arch_mm.h
@@ -0,0 +1,191 @@
+#ifndef _ARCH_MM_H
+#define _ARCH_MM_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/* A phypiscal address. */
+typedef size_t paddr_t;
+
+/* A virtual address. */
+typedef size_t vaddr_t;
+
+/* A page table entry. */
+typedef size_t pte_t;
+
+#define PAGE_LEVEL_BITS 9
+#define PAGE_BITS 12
+
+struct arch_mm_ptable {
+	int max_level;
+};
+
+/**
+ * Initialises the architecture-dependents aspects of the page table.
+ */
+static inline void arch_mm_ptable_init(struct arch_mm_ptable *t)
+{
+	t->max_level = 2;
+}
+
+/**
+ * Determines the maximum level supported by the given page table.
+ */
+static inline int arch_mm_max_level(struct arch_mm_ptable *t)
+{
+	return t->max_level;
+}
+
+/**
+ * Converts a physical address to a table PTE.
+ *
+ * The spec says that 'Table descriptors for stage 2 translations do not
+ * include any attribute field', so we don't take any attributes as arguments.
+ */
+static inline pte_t arch_mm_pa_to_table_pte(paddr_t pa)
+{
+	return pa | 0x3;
+}
+
+/**
+ * Converts a physical address to a block PTE.
+ */
+static inline pte_t arch_mm_pa_to_block_pte(paddr_t pa, uint64_t attrs)
+{
+	return pa | attrs;
+}
+
+/**
+ * Converts a physical address to a page PTE.
+ */
+static inline pte_t arch_mm_pa_to_page_pte(paddr_t pa, uint64_t attrs)
+{
+	return pa | attrs | ((attrs & 1) << 1);
+}
+
+/**
+ * Converts a block PTE to a page PTE.
+ */
+static inline pte_t arch_mm_block_to_page_pte(pte_t pte)
+{
+	return pte | 2;
+}
+
+/**
+ * Specifies whether block mappings are acceptable at the given level.
+ */
+static inline bool arch_mm_is_block_allowed(int level)
+{
+	return level == 1 || level == 2;
+}
+
+/**
+ * Returns the encoding of a page table entry that isn't present.
+ */
+static inline pte_t arch_mm_absent_pte(void)
+{
+	return 0;
+}
+
+/**
+ * Determines if the given pte is present, i.e., if it points to another table,
+ * to a page, or a block of pages.
+ */
+static inline bool arch_mm_pte_is_present(pte_t pte)
+{
+	return (pte & 1) != 0;
+}
+
+/**
+ * Determines if the given pte references another table.
+ */
+static inline bool arch_mm_pte_is_table(pte_t pte)
+{
+	return (pte & 3) == 3;
+}
+
+/**
+ * Determines if the given pte references a block of pages.
+ */
+static inline bool arch_mm_pte_is_block(pte_t pte)
+{
+	return (pte & 3) == 1;
+}
+
+/**
+ * Clears the given virtual address, i.e., sets the ignored bits (from a page
+ * table perspective) to zero.
+ */
+static inline vaddr_t arch_mm_clear_va(vaddr_t addr)
+{
+	return addr & ~((1ull << PAGE_BITS) - 1) & ((1ull << 48) - 1);
+}
+
+/**
+ * Clears the given physical address, i.e., sets the ignored bits (from a page
+ * table perspective) to zero.
+ */
+static inline paddr_t arch_mm_clear_pa(paddr_t addr)
+{
+	return addr & ~((1ull << PAGE_BITS) - 1) & ((1ull << 48) - 1);
+}
+
+/**
+ * Extracts the physical address from a page table entry.
+ */
+static inline paddr_t arch_mm_pte_to_paddr(pte_t pte)
+{
+	return arch_mm_clear_pa(pte);
+}
+
+/**
+ * Extracts a page table pointer from the given page table entry.
+ */
+static inline pte_t *arch_mm_pte_to_table(pte_t pte)
+{
+	return (pte_t *)arch_mm_pte_to_paddr(pte);
+}
+
+/**
+ * Invalidates stage-1 TLB entries referring to the given virtual address range.
+ */
+static inline void arch_mm_invalidate_stage1_range(vaddr_t begin, vaddr_t end)
+{
+	vaddr_t it;
+
+	begin >>= 12;
+	end >>= 12;
+
+	__asm__ volatile("dsb ishst");
+
+	for (it = begin; it < end; it += (1ull << (PAGE_BITS - 12)))
+		__asm__("tlbi vae2is, %0" : : "r"(it));
+
+	__asm__ volatile("dsb ish");
+}
+
+/**
+ * Invalidates stage-2 TLB entries referring to the given virtual address range.
+ */
+static inline void arch_mm_invalidate_stage2_range(vaddr_t begin, vaddr_t end)
+{
+	vaddr_t it;
+
+	begin >>= 12;
+	end >>= 12;
+
+	__asm__ volatile("dsb ishst");
+
+	for (it = begin; it < end; it += (1ull << (PAGE_BITS - 12)))
+		__asm__("tlbi ipas2e1, %0" : : "r"(it));
+
+	__asm__ volatile("dsb ish\n"
+			 "tlbi vmalle1is\n"
+			 "dsb ish\n");
+}
+
+uint64_t arch_mm_mode_to_attrs(int mode);
+void arch_mm_init(paddr_t table);
+
+#endif  /* _ARCH_MM_H */
diff --git a/src/arch/aarch64/mm.c b/src/arch/aarch64/mm.c
index ef446f3..225f1ae 100644
--- a/src/arch/aarch64/mm.c
+++ b/src/arch/aarch64/mm.c
@@ -1,10 +1,59 @@
 #include "arch_cpu.h"
+#include "mm.h"
 #include "msr.h"
 
+#define NON_SHAREABLE   0ull
+#define OUTER_SHAREABLE 2ull
+#define INNER_SHAREABLE 3ull
+
+#define STAGE1_XN          (1ull << 54)
+#define STAGE1_CONTIGUOUS  (1ull << 52)
+#define STAGE1_DBM         (1ull << 51)
+#define STAGE1_NG          (1ull << 11)
+#define STAGE1_AF          (1ull << 10)
+#define STAGE1_SH(x)       ((x) << 8)
+#define STAGE1_AP(x)       ((x) << 6)
+#define STAGE1_NS          (1ull << 5)
+#define STAGE1_ATTRINDX(x) ((x) << 2)
+
+#define STAGE1_READONLY  2ull
+#define STAGE1_READWRITE 0ull
+
+#define STAGE1_DEVICEINDX 0ull
+#define STAGE1_NORMALINDX 1ull
+
+#define STAGE2_XN(x)      ((x) << 53)
+#define STAGE2_CONTIGUOUS (1ull << 52)
+#define STAGE2_DBM        (1ull << 51)
+#define STAGE2_AF         (1ull << 10)
+#define STAGE2_SH(x)      ((x) << 8)
+#define STAGE2_S2AP(x)    ((x) << 6)
+#define STAGE2_MEMATTR(x) ((x) << 2)
+
+#define STAGE2_EXECUTE_ALL  0ull
+#define STAGE2_EXECUTE_EL0  1ull
+#define STAGE2_EXECUTE_NONE 2ull
+#define STAGE2_EXECUTE_EL1  3ull
+
+/* The following are stage-2 memory attributes for normal memory. */
+#define STAGE2_NONCACHEABLE 1ull
+#define STAGE2_WRITETHROUGH 2ull
+#define STAGE2_WRITEBACK    3ull
+
+#define STAGE2_MEMATTR_NORMAL(outer, inner) ((outer << 2) | (inner))
+
+/* The following stage-2 memory attributes for device memory. */
+#define STAGE2_MEMATTR_DEVICE_nGnRnE 0ull
+#define STAGE2_MEMATTR_DEVICE_nGnRE  1ull
+#define STAGE2_MEMATTR_DEVICE_nGRE   2ull
+#define STAGE2_MEMATTR_DEVICE_GRE    3ull
+
+#define STAGE2_ACCESS_READ  1ull
+#define STAGE2_ACCESS_WRITE 2ull
+
 void arch_vptable_init(struct arch_page_table *table)
 {
 	uint64_t i;
-	uint64_t v;
 
 	/* TODO: Check each bit. */
 	for (i = 0; i < 512; i++) {
@@ -25,9 +74,64 @@
 
 	table->first[0] = (uint64_t)&table->entry0[0] | 3;
 	table->first[1] = (uint64_t)&table->entry1[0] | 3;
+}
 
-	/* TODO: Where should this go? */
-	v =
+uint64_t arch_mm_mode_to_attrs(int mode)
+{
+	uint64_t attrs = 1; /* Present bit. */
+
+	if (mode & MM_MODE_STAGE1) {
+		attrs |= STAGE1_AF | STAGE1_SH(OUTER_SHAREABLE);
+
+		/* Define the execute bits. */
+		if (!(mode & MM_MODE_X))
+			attrs |= STAGE1_XN;
+
+		/* Define the read/write bits. */
+		if (mode & MM_MODE_W)
+			attrs |= STAGE1_AP(STAGE1_READWRITE);
+		else
+			attrs |= STAGE1_AP(STAGE1_READONLY);
+
+		/* Define the memory attribute bits. */
+		if (mode & MM_MODE_D)
+			attrs |= STAGE1_ATTRINDX(STAGE1_DEVICEINDX);
+		else
+			attrs |= STAGE1_ATTRINDX(STAGE1_NORMALINDX);
+	} else {
+		uint64_t access = 0;
+
+		attrs |= STAGE2_AF | STAGE2_SH(OUTER_SHAREABLE);
+
+		/* Define the read/write bits. */
+		if (mode & MM_MODE_R)
+			access |= STAGE2_ACCESS_READ;
+
+		if (mode & MM_MODE_W)
+			access |= STAGE2_ACCESS_WRITE;
+
+		attrs |= STAGE2_S2AP(access);
+
+		/* Define the execute bits. */
+		if (mode & MM_MODE_X)
+			attrs |= STAGE2_XN(STAGE2_EXECUTE_ALL);
+		else
+			attrs |= STAGE2_XN(STAGE2_EXECUTE_NONE);
+
+		/* Define the memory attribute bits. */
+		if (mode & MM_MODE_D)
+			attrs |= STAGE2_MEMATTR_DEVICE_nGnRnE;
+		else
+			attrs |= STAGE2_MEMATTR_NORMAL(STAGE2_WRITEBACK,
+						       STAGE2_WRITEBACK);
+	}
+
+	return attrs;
+}
+
+void arch_mm_init(paddr_t table)
+{
+	uint64_t v =
 		(1u << 31) | /* RES1. */
 		(4 << 16) | /* PS: 44 bits. */
 		(0 << 14) | /* TG0: 4 KB granule. */
@@ -37,258 +141,16 @@
 		(2 << 6) | /* SL0: Start at level 0. */
 		(20 << 0); /* T0SZ: 44-bit input address size. */
 	write_msr(vtcr_el2, v);
-}
 
-#if 0
-#include "arch.h"
-
-#include <stdint.h>
-
-#include "alloc.h"
-#include "log.h"
-#include "msr.h"
-
-#define PAGE_BITS 12
-#define PAGE_SIZE (1 << PAGE_BITS)
-#define ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(uint64_t))
-#define INITIAL_LEVEL 1
-
-extern char text_begin[];
-extern char text_end[];
-extern char rodata_begin[];
-extern char rodata_end[];
-extern char data_begin[];
-extern char data_end[];
-extern char bin_end[];
-
-static uint64_t *ttbr;
-
-static inline size_t mm_entry_size(int level)
-{
-	return 1ull << (PAGE_BITS + (3 - level) * (PAGE_BITS - 3));
-}
-
-static inline size_t mm_level_end(size_t va, int level)
-{
-	size_t offset = (PAGE_BITS + (4 - level) * (PAGE_BITS - 3));
-	return ((va >> offset) + 1) << offset;
-}
-
-static inline size_t mm_index(size_t va, int level)
-{
-	size_t v = va >> (PAGE_BITS + (3 - level) * (PAGE_BITS - 3));
-	return v & ((1 << (PAGE_BITS - 3)) - 1);
-}
-
-static inline uint64_t mm_clear_attrs(uint64_t v)
-{
-	/* Clean bottom bits. */
-	v &= ~((1 << PAGE_BITS) - 1);
-
-	/* Clean top bits. */
-	v &= ((1ull << 59) - 1);
-
-	return v;
-}
-
-static inline uint64_t *mm_table_ptr(uint64_t pa)
-{
-	return (uint64_t *)mm_clear_attrs(pa);
-}
-
-static inline uint64_t mm_mode_to_attrs(uint64_t mode)
-{
-	uint64_t attrs =
-		(1 << 10) | /* Access flag. */
-		(2 << 8); /* sh -> outer shareable. */
-
-	/* TODO: This is different in s2. */
-	if (!(mode & MM_X)) {
-		attrs |= (1ull << 54); /* XN or UXN, [user] execute never. */
-
-		/* TODO: This is only ok in EL1, it is RES0 in EL2. */
-		attrs |= (1ull << 53); /* PXN, privileged execute never. */
-	}
-
-	/* TODO: This is different in s2. */
-	if (mode & MM_W)
-		attrs |= (0 << 6); /* rw, no EL0 access. */
-	else
-		attrs |= (2 << 6); /* read-only, no EL0 access. */
-
-	if (mode & MM_D)
-		attrs |= (0 << 2); /* device memory in MAIR_ELx. */
-	else
-		attrs |= (1 << 2); /* normal memory in MAIR_ELx. */
-
-	return attrs;
-}
-
-static uint64_t *mm_populate_table(uint64_t *table, uint64_t index)
-{
-	uint64_t *ntable;
-	uint64_t v = table[index];
-	uint64_t i;
-
-	/* Check if table entry already exists. */
-	if (v & 1) {
-		/* Fail if it's a block one. */
-		if (!(v & 2))
-			return NULL;
-		return mm_table_ptr(v);
-	}
-
-	/* Allocate a new table entry and initialize it. */
-	ntable = halloc_aligned(PAGE_SIZE, PAGE_SIZE);
-	if (!ntable)
-		return NULL;
-
-	for (i = 0; i < ENTRIES_PER_LEVEL; i++)
-		ntable[i] = 0;
-
-	/* Fill in the new entry. */
-	table[index] = (size_t)ntable | 0x3;
-
-	return ntable;
-}
-
-static bool mm_map_level(size_t va, size_t va_end, size_t pa,
-			 uint64_t attrs, uint64_t *table, int level)
-{
-	size_t i = mm_index(va, level);
-	size_t va_level_end = mm_level_end(va, level);
-	size_t entry_size = mm_entry_size(level);
-
-	/* Cap va_end so that we don't go over of the current level max. */
-	if (va_end > va_level_end)
-		va_end = va_level_end;
-
-	/* Fill each entry in the table. */
-	while (va < va_end) {
-		if (level == 3) {
-			table[i] = pa | 0x3 | attrs;
-		} else {
-			uint64_t *nt = mm_populate_table(table, i);
-			if (!nt) {
-				/* TODO: Undo all the work so far? */
-				return false;
-			}
-
-			if (!mm_map_level(va, va_end, pa, attrs, nt, level+1)) {
-				/* TODO: Undo all the work so far? */
-				return false;
-			}
-		}
-
-		va += entry_size;
-		pa += entry_size;
-		i++;
-	}
-
-	return true;
-}
-
-bool mm_map_range(size_t va, size_t size, uint64_t pa, uint64_t mode)
-{
-	uint64_t attrs = mm_mode_to_attrs(mode);
-	uint64_t end = mm_clear_attrs(va + size + PAGE_SIZE - 1);
-
-	va = mm_clear_attrs(va);
-	pa = mm_clear_attrs(pa);
-
-	return mm_map_level(va, end, pa, attrs, ttbr, INITIAL_LEVEL);
-}
-
-bool mm_map_page(size_t va, size_t pa, uint64_t mode)
-{
-	size_t i;
-	uint64_t attrs = mm_mode_to_attrs(mode);
-	uint64_t *table = ttbr;
-
-	va = mm_clear_attrs(va);
-	pa = mm_clear_attrs(pa);
-	for (i = INITIAL_LEVEL; i < 3; i++) {
-		table = mm_populate_table(table, mm_index(va, i));
-		if (!table)
-			return false;
-	}
-
-	/* We reached level 3. */
-	i = mm_index(va, 3);
-	table[i] = pa | 0x3 | attrs;
-	return true;
-}
-
-bool arch_init_mm(void)
-{
-#if 0
-	size_t i;
-
-	/* Allocate the first level, then zero it out. */
-	ttbr = halloc_aligned(PAGE_SIZE, PAGE_SIZE);
-	if (!ttbr)
-		return false;
-
-	for (i = 0; i < ENTRIES_PER_LEVEL; i++)
-		ttbr[i] = 0;
-
-	/* Map page for uart. */
-	mm_map_page(PL011_BASE, PL011_BASE, MM_R | MM_W | MM_D);
-
-	/* Map page for gic. */
-	mm_map_page(GICD_BASE, GICD_BASE, MM_R | MM_W | MM_D);
-	mm_map_page(GICC_BASE, GICC_BASE, MM_R | MM_W | MM_D);
-
-	/* Map each section. */
-	mm_map_range((size_t)text_begin, text_end - text_begin,
-		     (size_t)text_begin,  MM_X);
-
-	mm_map_range((size_t)rodata_begin, rodata_end - rodata_begin,
-		     (size_t)rodata_begin, MM_R);
-
-	mm_map_range((size_t)data_begin, data_end - data_begin,
-		     (size_t)data_begin, MM_R | MM_W);
-
-	mm_map_range((size_t)bin_end, 20 * 1024 * 1024, (size_t)bin_end,
-		     MM_R | MM_W);
-#endif
-	log(INFO, "About to enable mmu.\n");
-	enable_mmu(ttbr);
-	log(INFO, "mmu is on.\n");
-
-	return true;
-}
-
-static void arch_mm_dump_table(uint64_t *table, int level)
-{
-	uint64_t i, j;
-	for (i = 0; i < ENTRIES_PER_LEVEL; i++) {
-		if ((table[i] & 1) == 0)
-			continue;
-
-		for (j = 1 * (level - INITIAL_LEVEL + 1); j; j--)
-			log(INFO, "\t");
-		log(INFO, "%x: %x\n", i, table[i]);
-		if (level >= 3)
-			continue;
-
-		if ((table[i] & 3) == 3)
-			arch_mm_dump_table(mm_table_ptr(table[i]), level + 1);
-	}
-}
-
-void enable_mmu(uint64_t *table)
-{
-	//uint32_t v;
-
-	enable_s2();
-#if 0
 	/*
-	 * 0 -> Device-nGnRnE memory
-	 * 1 -> Normal memory, Inner/Outer Write-Back Non-transient,
-	 *      Write-Alloc, Read-Alloc.
+	 * 0    -> Device-nGnRnE memory
+	 * 0xff -> Normal memory, Inner/Outer Write-Back Non-transient,
+	 *         Write-Alloc, Read-Alloc.
 	 */
-	write_msr(mair_el2, 0xff00);
+	write_msr(mair_el2,
+		  (0 << (8 * STAGE1_DEVICEINDX)) |
+		  (0xff << (8 * STAGE1_NORMALINDX)));
+
 	write_msr(ttbr0_el2, table);
 
 	/*
@@ -308,8 +170,7 @@
 	v =
 		(1 << 0) | /* M, enable stage 1 EL2 MMU. */
 		(1 << 1) | /* A, enable alignment check faults. */
-		// TODO: Enable this.
-//		(1 << 2) | /* C, data cache enable. */
+		(1 << 2) | /* C, data cache enable. */
 		(1 << 3) | /* SA, enable stack alignment check. */
 		(3 << 4) | /* RES1 bits. */
 		(1 << 11) | /* RES1 bit. */
@@ -325,6 +186,4 @@
 	__asm volatile("isb");
 	write_msr(sctlr_el2, v);
 	__asm volatile("isb");
-#endif
 }
-#endif
diff --git a/src/fdt.c b/src/fdt.c
index d5b1e16..d20b82e 100644
--- a/src/fdt.c
+++ b/src/fdt.c
@@ -358,3 +358,13 @@
 	e->address = htobe64(addr);
 	e->size = htobe64(len);
 }
+
+size_t fdt_header_size(void)
+{
+	return sizeof(struct fdt_header);
+}
+
+size_t fdt_total_size(struct fdt_header *hdr)
+{
+	return be32toh(hdr->totalsize);
+}
diff --git a/src/main.c b/src/main.c
index d5c2c22..0c18d79 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,15 +1,19 @@
 #include <stdalign.h>
-#include <stdatomic.h>
 #include <stddef.h>
 
+#include "alloc.h"
+#include "api.h"
 #include "cpio.h"
 #include "cpu.h"
 #include "dlog.h"
 #include "fdt.h"
+#include "mm.h"
 #include "std.h"
 #include "vm.h"
 
 void *fdt;
+char ptable_buf[PAGE_SIZE * 20];
+struct mm_ptable ptable;
 
 bool fdt_find_node(struct fdt_node *node, const char *path)
 {
@@ -91,19 +95,32 @@
 	return true;
 }
 
-static void relocate(const char *from, size_t size)
+/**
+ * Copies data to an unmapped location by mapping it for write, copying the
+ * data, then unmapping it.
+ */
+static bool copy_to_unmaped(paddr_t to, const void *from, size_t size)
 {
-	extern char bin_end[];
-	size_t tmp = (size_t)&bin_end[0];
-	char *dest = (char *)((tmp + 0x80000 - 1) & ~(0x80000 - 1));
-	dlog("bin_end is at %p, copying to %p\n", &bin_end[0], dest);
-	memcpy(dest, from, size);
+	if (!mm_ptable_map(&ptable, (vaddr_t)to, (vaddr_t)to + size, to,
+			   MM_MODE_W | MM_MODE_STAGE1))
+		return false;
+
+	memcpy((void *)to, from, size);
+
+	mm_ptable_unmap(&ptable, to, to + size, MM_MODE_STAGE1);
+
+	return true;
 }
 
-/* TODO: Remove this. */
-struct vm primary_vm;
-struct vm secondary_vm[MAX_VMS];
-uint32_t secondary_vm_count = 0;
+static bool relocate(const char *from, size_t size)
+{
+	/* TODO: This is a hack. We must read the alignment from the binary. */
+	extern char bin_end[];
+	size_t tmp = (size_t)&bin_end[0];
+	paddr_t dest = (tmp + 0x80000 - 1) & ~(0x80000 - 1);
+	dlog("bin_end is at %p, copying to %p\n", &bin_end[0], dest);
+	return copy_to_unmaped(dest, from, size);
+}
 
 static void find_memory_range(const struct fdt_node *root,
 			      uint64_t *block_start, uint64_t *block_size)
@@ -341,8 +358,11 @@
 		}
 
 		*mem_size -= mem;
-		memcpy((void *)(mem_start + *mem_size), kernel.next,
-		       kernel.limit - kernel.next);
+		if (!copy_to_unmaped(mem_start + *mem_size, kernel.next,
+				     kernel.limit - kernel.next)) {
+			dlog("Unable to copy kernel for vm %u\n", count);
+			continue;
+		}
 
 		dlog("Loaded VM%u with %u vcpus, entry at 0x%x\n", count, cpu,
 		     mem_start + *mem_size);
@@ -365,7 +385,10 @@
 		return false;
 	}
 
-	relocate(it.next, it.limit - it.next);
+	if (!relocate(it.next, it.limit - it.next)) {
+		dlog("Unable to relocate kernel for primary vm.\n");
+		return false;
+	}
 
 	if (!find_file(c, "initrd.img", &it)) {
 		dlog("Unable to find initrd.img\n");
@@ -389,7 +412,6 @@
 	{
 		size_t tmp = (size_t)&relocate;
 		tmp = (tmp + 0x80000 - 1) & ~(0x80000 - 1);
-
 		fdt_add_mem_reservation(fdt, tmp & ~0xfffff, 0x80000);
 		vm_init(&primary_vm, MAX_CPUS);
 		vm_start_vcpu(&primary_vm, 0, tmp, (size_t)fdt, true);
@@ -398,24 +420,83 @@
 	return true;
 }
 
+/**
+ * Performs one-time initialisation of the hypervisor.
+ */
 static void one_time_init(void)
 {
+	extern char text_begin[];
+	extern char text_end[];
+	extern char rodata_begin[];
+	extern char rodata_end[];
+	extern char data_begin[];
+	extern char data_end[];
+
 	dlog("Initializing hafnium\n");
 
 	cpu_module_init();
+	halloc_init((size_t)ptable_buf, sizeof(ptable_buf));
+
+	if (!mm_ptable_init(&ptable, MM_MODE_NOSYNC | MM_MODE_STAGE1)) {
+		dlog("Unable to allocate memory for page table.\n");
+		for (;;);
+	}
+
+	dlog("text: 0x%x - 0x%x\n", text_begin, text_end);
+	dlog("rodata: 0x%x - 0x%x\n", rodata_begin, rodata_end);
+	dlog("data: 0x%x - 0x%x\n", data_begin, data_end);
+
+        /* Map page for uart. */
+        mm_ptable_map_page(&ptable, PL011_BASE, PL011_BASE,
+			   MM_MODE_R | MM_MODE_W | MM_MODE_D | MM_MODE_NOSYNC |
+			   MM_MODE_STAGE1);
+
+        /* Map each section. */
+        mm_ptable_map(&ptable, (vaddr_t)text_begin, (vaddr_t)text_end,
+		      (paddr_t)text_begin,
+		      MM_MODE_X | MM_MODE_NOSYNC | MM_MODE_STAGE1);
+
+        mm_ptable_map(&ptable, (vaddr_t)rodata_begin, (vaddr_t)rodata_end,
+		      (paddr_t)rodata_begin,
+		      MM_MODE_R | MM_MODE_NOSYNC | MM_MODE_STAGE1);
+
+        mm_ptable_map(&ptable, (vaddr_t)data_begin, (vaddr_t)data_end,
+		      (paddr_t)data_begin,
+		      MM_MODE_R | MM_MODE_W | MM_MODE_NOSYNC | MM_MODE_STAGE1);
+
+	arch_mm_init((paddr_t)ptable.table);
 
 	/* TODO: Code below this point should be removed from this function. */
-	/* TODO: Remove this. */
-
 	do {
 		struct fdt_node n;
 		uint64_t mem_start = 0;
 		uint64_t mem_size = 0;
+		uint64_t new_mem_size;
+
+		/* Map in the fdt header. */
+		if (!mm_ptable_map(&ptable, (vaddr_t)fdt,
+				   (vaddr_t)fdt + fdt_header_size(),
+				   (paddr_t)fdt,
+				   MM_MODE_R | MM_MODE_STAGE1)) {
+			dlog("Unable to map FDT header.\n");
+			break;
+		}
+
+		/*
+		 * Map the rest of the fdt plus an extra page for adding new
+		 * memory reservations.
+		 */
+		if (!mm_ptable_map(&ptable, (vaddr_t)fdt,
+				   (vaddr_t)fdt + fdt_total_size(fdt),
+				   (paddr_t)fdt,
+				   MM_MODE_R | MM_MODE_STAGE1)) {
+			dlog("Unable to map FDT.\n");
+			break;
+		}
 
 		fdt_root_node(&n, fdt);
 		fdt_find_child(&n, "");
 
-		/* TODO: Use this. */
 		find_memory_range(&n, &mem_start, &mem_size);
 		dlog("Memory range: 0x%x - 0x%x\n", mem_start,
 		     mem_start + mem_size - 1);
@@ -427,18 +508,44 @@
 			break;
 
 		dlog("Ramdisk range: 0x%x - 0x%x\n", begin, end - 1);
+		mm_ptable_map(&ptable, begin, end, begin,
+			      MM_MODE_R | MM_MODE_STAGE1);
 
 		struct cpio c;
 		cpio_init(&c, (void *)begin, end - begin);
 
-		load_secondary(&c, mem_start, &mem_size);
+		/* Map the fdt in r/w mode in preparation for extending it. */
+		if (!mm_ptable_map(&ptable, (vaddr_t)fdt,
+				   (vaddr_t)fdt + fdt_total_size(fdt) +
+				   PAGE_SIZE,
+				   (paddr_t)fdt,
+				   MM_MODE_R | MM_MODE_W | MM_MODE_STAGE1)) {
+			dlog("Unable to map FDT in r/w mode.\n");
+			break;
+		}
+		new_mem_size = mem_size;
+		load_secondary(&c, mem_start, &new_mem_size);
 		load_primary(&c, &n);
+
+		/* Patch fdt to reserve memory for secondary VMs. */
+		fdt_add_mem_reservation(fdt, mem_start + new_mem_size,
+					mem_size - new_mem_size);
+
+		/* Unmap FDT. */
+		if (!mm_ptable_unmap(&ptable, (vaddr_t)fdt,
+				     (vaddr_t)fdt + fdt_total_size(fdt) +
+				     PAGE_SIZE, MM_MODE_STAGE1)) {
+			dlog("Unable to unmap the FDT.\n");
+			break;
+		}
 	} while (0);
 
+	mm_ptable_defrag(&ptable);
+
 	arch_set_vm_mm(&primary_vm.page_table);
 }
 
-/*
+/**
  * The entry point of CPUs when they are turned on. It is supposed to initialise
  * all state and return the first vCPU to run.
  */
@@ -446,10 +553,13 @@
 {
 	struct cpu *c = cpu();
 
-	/* Do global one-time initialization just once. */
-	static atomic_flag inited = ATOMIC_FLAG_INIT;
-	if (!atomic_flag_test_and_set_explicit(&inited, memory_order_acq_rel))
+	/* Do global one-time initialization just once. We avoid using atomics
+	 * by only touching the variable from cpu 0. */
+	static volatile bool inited = false;
+	if (cpu_index(c) == 0 && !inited) {
+		inited = true;
 		one_time_init();
+	}
 
 	dlog("Starting up cpu %d\n", cpu_index(c));
 
diff --git a/src/mm.c b/src/mm.c
new file mode 100644
index 0000000..4a72d56
--- /dev/null
+++ b/src/mm.c
@@ -0,0 +1,320 @@
+#include "mm.h"
+
+#include <stdatomic.h>
+#include <stdint.h>
+
+#include "alloc.h"
+#include "dlog.h"
+
+#define MAP_FLAG_SYNC   0x01
+#define MAP_FLAG_COMMIT 0x02
+
+/**
+ * Calculates the size of the address space represented by a page table entry at
+ * the given level.
+ */
+static inline size_t mm_entry_size(int level)
+{
+	return 1ull << (PAGE_BITS + level * PAGE_LEVEL_BITS);
+}
+
+/**
+ * For a given virtual address, calculates the maximum (plus one) address that
+ * can be represented by the same table at the given level.
+ */
+static inline vaddr_t mm_level_end(vaddr_t va, int level)
+{
+	size_t offset = PAGE_BITS + (level + 1) * PAGE_LEVEL_BITS;
+	return ((va >> offset) + 1) << offset;
+}
+
+/**
+ * For a given virtual address, calculates the index at which its entry is
+ * stored in a table at the given level.
+ */
+static inline size_t mm_index(vaddr_t va, int level)
+{
+	vaddr_t v = va >> (PAGE_BITS + level * PAGE_LEVEL_BITS);
+	return v & ((1ull << PAGE_LEVEL_BITS) - 1);
+}
+
+/**
+ * Populates the provided page table entry with a reference to another table if
+ * needed, that is, if it does not yet point to another table.
+ *
+ * Returns a pointer to the table the entry now points to.
+ */
+static pte_t *mm_populate_table_pte(pte_t *pte, int level, bool sync_alloc)
+{
+	pte_t *ntable;
+	pte_t v = *pte;
+	pte_t new_pte;
+	size_t i;
+	size_t inc;
+
+	/* Just return pointer to table if it's already populated. */
+	if (arch_mm_pte_is_table(v))
+		return arch_mm_pte_to_table(v);
+
+	/* Allocate a new table. */
+	ntable = (sync_alloc ? halloc_aligned : halloc_aligned_nosync)(
+			PAGE_SIZE, PAGE_SIZE);
+	if (!ntable) {
+		dlog("Failed to allocate memory for page table\n");
+		return NULL;
+	}
+
+	/* Determine template for new pte and its increment. */
+	if (!arch_mm_pte_is_block(v)) {
+		inc = 0;
+		new_pte = arch_mm_absent_pte();
+	} else {
+		inc = mm_entry_size(level - 1);
+		if (level == 1)
+			new_pte = arch_mm_block_to_page_pte(v);
+		else
+			new_pte = v;
+	}
+
+	/* Initialise entries in the new table. */
+	for (i = 0; i < PAGE_SIZE / sizeof(paddr_t); i++) {
+		ntable[i] = new_pte;
+		new_pte += inc;
+	}
+
+	/*
+	 * Ensure initialisation is visible before updating the actual pte, then
+	 * update it.
+	 */
+	atomic_thread_fence(memory_order_release);
+	*pte = arch_mm_pa_to_table_pte((paddr_t)ntable);
+
+	return ntable;
+}
+
+/**
+ * Frees all page-table-related memory associated with the given pte at the
+ * given level.
+ */
+static void mm_free_page_pte(pte_t pte, int level, bool sync)
+{
+	/* TODO: Implement.
+	if (!arch_mm_pte_is_present(pte) || level < 1)
+		return;
+	*/
+}
+
+/**
+ * Updates the page table at the given level to map the given virtual address
+ * range to a physical range using the provided (architecture-specific)
+ * attributes.
+ *
+ * This function calls itself recursively if it needs to update additional
+ * levels, but the recursion is bound by the maximum number of levels in a page
+ * table.
+ */
+static bool mm_map_level(vaddr_t va, vaddr_t va_end, paddr_t pa, uint64_t attrs,
+			 pte_t *table, int level, int flags)
+{
+	size_t i = mm_index(va, level);
+	vaddr_t va_level_end = mm_level_end(va, level);
+	size_t entry_size = mm_entry_size(level);
+	bool commit = flags & MAP_FLAG_COMMIT;
+	bool sync = flags & MAP_FLAG_SYNC;
+
+	/* Cap va_end so that we don't go over the current level max. */
+	if (va_end > va_level_end)
+		va_end = va_level_end;
+
+	/* Fill each entry in the table. */
+	while (va < va_end) {
+		if (level == 0) {
+			if (commit)
+				table[i] = arch_mm_pa_to_page_pte(pa, attrs);
+		} else if ((va_end - va) >= entry_size &&
+			   arch_mm_is_block_allowed(level) &&
+			   (va & (entry_size - 1)) == 0) {
+			if (commit) {
+				pte_t pte = table[i];
+				table[i] = arch_mm_pa_to_block_pte(pa, attrs);
+				/* TODO: Add barrier. How do we ensure this
+				 * isn't in use by another CPU? Send IPI? */
+				mm_free_page_pte(pte, level, sync);
+			}
+		} else {
+			pte_t *nt = mm_populate_table_pte(table + i, level,
+							  sync);
+			if (!nt)
+				return false;
+
+			if (!mm_map_level(va, va_end, pa, attrs, nt, level-1,
+					  flags))
+				return false;
+		}
+
+		va = (va + entry_size) & ~(entry_size - 1);
+		pa = (pa + entry_size) & ~(entry_size - 1);
+		i++;
+	}
+
+	return true;
+}
+
+/**
+ * Invalidates the TLB for the given virtual address range.
+ */
+static void mm_invalidate_tlb(vaddr_t begin, vaddr_t end, bool stage1)
+{
+	if (stage1)
+		arch_mm_invalidate_stage1_range(begin, end);
+	else
+		arch_mm_invalidate_stage2_range(begin, end);
+}
+
+/**
+ * Updates the given table such that the given virtual address range is mapped
+ * to the given physical address range in the architecture-agnostic mode
+ * provided.
+ */
+bool mm_ptable_map(struct mm_ptable *t, vaddr_t begin, vaddr_t end,
+		   paddr_t paddr, int mode)
+{
+	uint64_t attrs = arch_mm_mode_to_attrs(mode);
+	int flags = (mode & MM_MODE_NOSYNC) ? 0 : MAP_FLAG_SYNC;
+	int level = arch_mm_max_level(&t->arch);
+
+	begin = arch_mm_clear_va(begin);
+	end = arch_mm_clear_va(end + PAGE_SIZE - 1);
+	paddr = arch_mm_clear_pa(paddr);
+
+	/*
+	 * Do it in two steps to prevent leaving the table in a halfway updated
+	 * state. In such a two-step implementation, the table may be left with
+	 * extra internal tables, but no different mapping on failure.
+	 */
+	if (!mm_map_level(begin, end, paddr, attrs, t->table, level, flags))
+		return false;
+
+	mm_map_level(begin, end, paddr, attrs, t->table, level,
+		     flags | MAP_FLAG_COMMIT);
+
+	/* Invalidate the tlb. */
+	mm_invalidate_tlb(begin, end, (mode & MM_MODE_STAGE1) != 0);
+
+	return true;
+}
+
+/**
+ * Updates the given table such that the given virtual address range is not
+ * mapped to any physical address.
+ */
+bool mm_ptable_unmap(struct mm_ptable *t, vaddr_t begin, vaddr_t end, int mode)
+{
+	int flags = (mode & MM_MODE_NOSYNC) ? 0 : MAP_FLAG_SYNC;
+	int level = arch_mm_max_level(&t->arch);
+
+	begin = arch_mm_clear_va(begin);
+	end = arch_mm_clear_va(end + PAGE_SIZE - 1);
+
+	/* Also do updates in two steps, similarly to mm_ptable_map. */
+	if (!mm_map_level(begin, end, begin, 0, t->table, level, flags))
+		return false;
+
+	mm_map_level(begin, end, begin, 0, t->table, level,
+		     flags | MAP_FLAG_COMMIT);
+
+	/* Invalidate the tlb. */
+	mm_invalidate_tlb(begin, end, (mode & MM_MODE_STAGE1) != 0);
+
+	return true;
+}
+
+/**
+ * Updates the given table such that a single virtual address page is mapped
+ * to a single physical address page in the provided architecture-agnostic mode.
+ */
+bool mm_ptable_map_page(struct mm_ptable *t, vaddr_t va, paddr_t pa, int mode)
+{
+	size_t i;
+	uint64_t attrs = arch_mm_mode_to_attrs(mode);
+	pte_t *table = t->table;
+	bool sync = !(mode & MM_MODE_NOSYNC);
+
+	va = arch_mm_clear_va(va);
+	pa = arch_mm_clear_pa(pa);
+
+	for (i = arch_mm_max_level(&t->arch); i > 0; i--) {
+		table = mm_populate_table_pte(table + mm_index(va, i), i, sync);
+		if (!table)
+			return false;
+	}
+
+	i = mm_index(va, 0);
+	table[i] = arch_mm_pa_to_page_pte(pa, attrs);
+	return true;
+}
+
+/**
+ * Writes the given table to the debug log, calling itself recursively to
+ * write sub-tables.
+ */
+static void mm_dump_table_recursive(pte_t *table, int level, int max_level)
+{
+	uint64_t i;
+	for (i = 0; i < PAGE_SIZE / sizeof(pte_t); i++) {
+		if (!arch_mm_pte_is_present(table[i]))
+			continue;
+
+		dlog("%*s%x: %x\n", 4 * (max_level - level), "", i, table[i]);
+		if (!level)
+			continue;
+
+		if (arch_mm_pte_is_table(table[i])) {
+			mm_dump_table_recursive(arch_mm_pte_to_table(table[i]),
+						level - 1, max_level);
+		}
+	}
+}
+
+/**
+ * Write the given table to the debug log.
+ */
+void mm_ptable_dump(struct mm_ptable *t)
+{
+	int max_level = arch_mm_max_level(&t->arch);
+	mm_dump_table_recursive(t->table, max_level, max_level);
+}
+
+/**
+ * Defragments the given page table by converting page table references to
+ * blocks whenever possible.
+ */
+void mm_ptable_defrag(struct mm_ptable *t)
+{
+	/* TODO: Implement. */
+}
+
+/**
+ * Initialises the given page table.
+ */
+bool mm_ptable_init(struct mm_ptable *t, int mode)
+{
+	size_t i;
+	pte_t *table;
+
+	if (mode & MM_MODE_NOSYNC)
+		table = halloc_aligned_nosync(PAGE_SIZE, PAGE_SIZE);
+	else
+		table = halloc_aligned(PAGE_SIZE, PAGE_SIZE);
+
+	if (!table)
+		return false;
+
+	for (i = 0; i < PAGE_SIZE / sizeof(pte_t); i++)
+		table[i] = arch_mm_absent_pte();
+
+	t->table = table;
+	arch_mm_ptable_init(&t->arch);
+
+	return true;
+}
diff --git a/src/rules.mk b/src/rules.mk
index 313748d..0426fba 100644
--- a/src/rules.mk
+++ b/src/rules.mk
@@ -4,6 +4,7 @@
 SRCS += cpu.c
 SRCS += fdt.c
 SRCS += main.c
+SRCS += mm.c
 SRCS += std.c
 SRCS += vm.c