Load multiple VMs from ramdisk and run vcpus on request.

Remove gic and timer code for now.
diff --git a/Makefile b/Makefile
index f2e590c..a094afd 100644
--- a/Makefile
+++ b/Makefile
@@ -49,6 +49,7 @@
 COPTS += -DDEBUG=$(DEBUG)
 COPTS += -MMD -MP -MF $$(patsubst %,%.d,$$@)
 COPTS += -DMAX_CPUS=8
+COPTS += -DMAX_VMS=16
 COPTS += -DSTACK_SIZE=4096
 COPTS += -I$(ROOT_DIR)inc
 COPTS += -I$(ROOT_DIR)src/arch/$(ARCH)/inc
diff --git a/inc/arch.h b/inc/arch.h
index e12a8cf..84624fb 100644
--- a/inc/arch.h
+++ b/inc/arch.h
@@ -2,11 +2,7 @@
 #define _ARCH_H
 
 #include "cpu.h"
-#include "irq.h"
 
-void arch_init(struct cpu *cpu);
-void arch_irq_init_percpu(void);
-void arch_irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p);
 void arch_putchar(char c);
 
 #endif  /* _ARCH_H */
diff --git a/inc/cpu.h b/inc/cpu.h
index 6eb90ab..2eae33b 100644
--- a/inc/cpu.h
+++ b/inc/cpu.h
@@ -6,15 +6,12 @@
 #include <stdint.h>
 
 #include "arch_cpu.h"
-#include "list.h"
 #include "spinlock.h"
 
 struct vcpu {
-	struct list_entry links;
-	bool is_runnable;
-	bool interrupt;
+	struct spinlock lock;
+	bool is_on;
 	struct arch_regs regs;
-	struct cpu *cpu;
 	struct vm *vm;
 };
 
@@ -24,22 +21,14 @@
 
 	struct vcpu *current;
 
-	struct list_entry ready_queue;
-
 	/*
 	 * Enabling/disabling irqs are counted per-cpu. They are enabled when
 	 * the count is zero, and disabled when it's non-zero.
 	 */
 	uint32_t irq_disable_count;
 
-	/*
-	 * The number of VMs that have turned this CPU on. CPUs are off when
-	 * this count is zero, and on when this count is ono-zero.
-	 */
-	uint32_t cpu_on_count;
-
-	bool (*timer_cb)(void *context);
-	void *timer_context;
+	/* Determines whether or not the cpu is currently on. */
+	bool is_on;
 
 	/* CPU identifier. Doesn't have to be contiguous. */
 	size_t id;
@@ -51,11 +40,11 @@
 void cpu_init(struct cpu *c);
 void cpu_irq_enable(struct cpu *c);
 void cpu_irq_disable(struct cpu *c);
-void cpu_on(struct cpu *c);
+bool cpu_on(struct cpu *c);
 void cpu_off(struct cpu *c);
 
-void vcpu_init(struct vcpu *vcpu, struct cpu *cpu, struct vm *vm);
-void vcpu_ready(struct vcpu *v);
-void vcpu_unready(struct vcpu *v);
+void vcpu_init(struct vcpu *vcpu, struct vm *vm);
+void vcpu_on(struct vcpu *v);
+void vcpu_off(struct vcpu *v);
 
 #endif  /* _CPU_H */
diff --git a/inc/fdt.h b/inc/fdt.h
index fb23c8e..740d345 100644
--- a/inc/fdt.h
+++ b/inc/fdt.h
@@ -15,9 +15,12 @@
 void fdt_dump(struct fdt_header *hdr);
 void fdt_root_node(struct fdt_node *node, const struct fdt_header *hdr);
 bool fdt_find_child(struct fdt_node *node, const char *child);
+bool fdt_first_child(struct fdt_node *node, const char **child_name);
+bool fdt_next_sibling(struct fdt_node *node, const char **sibling_name);
 bool fdt_read_property(const struct fdt_node *node, const char *name,
 		       const char **buf, uint32_t *size);
 
-void fdt_add_mem_reservation(struct fdt_header *hdr, uint64_t addr, uint64_t len);
+void fdt_add_mem_reservation(struct fdt_header *hdr,
+			     uint64_t addr, uint64_t len);
 
 #endif  /* _FDT_H */
diff --git a/inc/irq.h b/inc/irq.h
deleted file mode 100644
index af5faba..0000000
--- a/inc/irq.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef _IRQ_H
-#define _IRQ_H
-
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-
-struct irq_handle;
-
-enum irq_trigger {
-	irq_trigger_level,
-	irq_trigger_edge,
-};
-
-enum irq_polarity {
-	irq_polarity_active_high,
-	irq_polarity_active_low,
-};
-
-/* TODO: Add target CPUs here. */
-void irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p,
-		bool (*cb)(void *, struct irq_handle *), void *context);
-void irq_enable(uint32_t num);
-
-void irq_dismiss(struct irq_handle *h);
-
-/* TODO: These don't really belong here, do they?. */
-bool irq_handle(uint32_t num, struct irq_handle *h);
-void irq_init(void);
-void irq_init_percpu(void);
-
-#endif
diff --git a/inc/list.h b/inc/list.h
deleted file mode 100644
index 3001394..0000000
--- a/inc/list.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef _LIST_H
-#define _LIST_H
-
-#include <stdbool.h>
-
-struct list_entry {
-	struct list_entry *next;
-	struct list_entry *prev;
-};
-
-#define LIST_INIT(l) {.next = &l, .prev = &l}
-#define LIST_ELEM(ptr, type, field) \
-	((type*)(char*)ptr - offsetof(type, field))
-
-static inline void list_init(struct list_entry *e)
-{
-	e->next = e;
-	e->prev = e;
-}
-
-static inline void list_append(struct list_entry *l, struct list_entry *e)
-{
-	e->next = l;
-	e->prev = l->prev;
-
-	e->next->prev = e;
-	e->prev->next = e;
-}
-
-static inline void list_prepend(struct list_entry *l, struct list_entry *e)
-{
-	e->next = l->next;
-	e->prev = l;
-
-	e->next->prev = e;
-	e->prev->next = e;
-}
-
-static inline bool list_empty(struct list_entry *l)
-{
-	return l->next == l;
-}
-
-static inline void list_remove(struct list_entry *e)
-{
-	e->prev->next = e->next;
-	e->next->prev = e->prev;
-}
-
-#endif  /* _LIST_H */
diff --git a/inc/timer.h b/inc/timer.h
deleted file mode 100644
index aac58c4..0000000
--- a/inc/timer.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _TIMER_H
-#define _TIMER_H
-
-#include <stdbool.h>
-
-void timer_init(void);
-void timer_init_percpu(void);
-void timer_set(uint64_t time, bool (*cb)(void *), void *context);
-
-#endif  /* _TIMER_H */
diff --git a/inc/vm.h b/inc/vm.h
index aa9133c..88ae8ac 100644
--- a/inc/vm.h
+++ b/inc/vm.h
@@ -5,10 +5,12 @@
 
 struct vm {
 	struct vcpu vcpus[MAX_CPUS];
+	uint32_t vcpu_count;
 	struct arch_page_table page_table;
 };
 
-void vm_init(struct vm *vm, struct cpu *cpus);
-void vm_start_vcpu(struct vm *vm, size_t index, size_t entry, size_t arg);
+void vm_init(struct vm *vm, uint32_t vcpu_count);
+void vm_start_vcpu(struct vm *vm, size_t index, size_t entry, size_t arg,
+		   bool is_primary);
 
 #endif  /* _VM_H */
diff --git a/src/arch/aarch64/entry.S b/src/arch/aarch64/entry.S
index 560aaf8..3c8f860 100644
--- a/src/arch/aarch64/entry.S
+++ b/src/arch/aarch64/entry.S
@@ -97,9 +97,8 @@
 	/* Call into C code. */
 	bl cpu_main
 
-	/* Run the first vcpu. */
-	bl cpu_next_vcpu
-	b vcpu_enter_restore_all
+	/* Run the vcpu returned by cpu_main. */
+	b vcpu_restore_all_and_run
 
 	/* Loop forever waiting for interrupts. */
 5:	wfi
diff --git a/src/arch/aarch64/exceptions.S b/src/arch/aarch64/exceptions.S
index 97bd2fd..efb7ed5 100644
--- a/src/arch/aarch64/exceptions.S
+++ b/src/arch/aarch64/exceptions.S
@@ -52,7 +52,10 @@
 	subs x18, x18, #0x16
 	b.ne slow_sync_lower_64
 
-	/* Save x29 and x30, then jump to HVC handler. */
+	/*
+	 * Save x29 and x30, which are not saved by the callee, then jump to
+	 * HVC handler.
+	 */
 	stp x29, x30, [sp, #-16]!
 	bl hvc_handler
 	ldp x29, x30, [sp], #16
@@ -103,13 +106,15 @@
 	stp x1, x2, [x0, #8 * 31]
 
 	/* Call C handler. */
-	bl irq_handle_lower
-	cbz x0, vcpu_return
+	bl irq_lower
 
-	/* Find a new vcpu to run. */
-	bl cpu_next_vcpu
-	cbz x0, vcpu_return
-	b vcpu_switch
+	mrs x1, tpidr_el2
+	ldr x1, [x1, #CPU_CURRENT]
+	cbnz x0, vcpu_switch
+
+	/* vcpu is not changing. */
+	mov x0, x1
+	b vcpu_restore_volatile_and_run
 
 .balign 0x80
 	/* fiq_lower_64 */
@@ -137,6 +142,16 @@
 
 .balign 0x80
 
+/**
+ * Switch to a new vcpu.
+ *
+ * All volatile registers from the old vcpu have already been saved. We need
+ * to save only non-volatile ones from the old vcpu, and restore all from the
+ * new one.
+ *
+ * x0 is a pointer to the new vcpu.
+ * x1 is a pointer to the old vcpu.
+ */
 vcpu_switch:
 	/* Save non-volatile registers. */
 	add x1, x1, #VCPU_REGS
@@ -196,8 +211,14 @@
 	mrs x22, par_el1
 	str x22, [x1, #16 * 11]
 
-.globl vcpu_enter_restore_all
-vcpu_enter_restore_all:
+	/* Intentional fall through. */
+
+.globl vcpu_restore_all_and_run
+vcpu_restore_all_and_run:
+	/* Update cpu()->current. */
+	mrs x2, tpidr_el2
+	str x0, [x2, #CPU_CURRENT]
+
 	/* Get a pointer to the lazy registers. */
 	add x0, x0, #VCPU_LAZY
 
@@ -261,7 +282,12 @@
 	/* Restore volatile registers and return. */
 	sub x0, x0, #VCPU_REGS
 
-vcpu_return:
+/**
+ * x0 is a pointer to the vcpu.
+ *
+ * Restore volatile registers and run the given vcpu.
+ */
+vcpu_restore_volatile_and_run:
 	/* Restore volatile registers. */
 	add x0, x0, #VCPU_REGS
 
@@ -288,7 +314,6 @@
 slow_sync_lower_64:
 	/* Get the current vcpu. */
 	mrs x18, tpidr_el2
-	/* TODO: tpidr_el2 should store the vcpu pointer directly. */
 	ldr x18, [x18, #CPU_CURRENT]
 
 	/* Save volatile registers. */
@@ -315,15 +340,18 @@
 	/* Read syndrome register and call C handler. */
 	mrs x0, esr_el2
 	bl sync_lower_exception
-	cbz x0, vcpu_return
 
-	/* Find a new vcpu to run. */
-	bl cpu_next_vcpu
-	cbz x0, vcpu_return
-	b vcpu_switch
+	/* Switch to the vcpu returned by sync_lower_exception. */
+	mrs x1, tpidr_el2
+	ldr x1, [x1, #CPU_CURRENT]
+	cbnz x0, vcpu_switch
+
+	/* vcpu is not changing. */
+	mov x0, x1
+	b vcpu_restore_volatile_and_run
 
 sync_lower_64_switch:
-	/* We'll have to reschedule, so store state before doing so. */
+	/* We'll have to switch, so save volatile state before doing so. */
 	mrs x18, tpidr_el2
 	ldr x18, [x18, #CPU_CURRENT]
 
@@ -343,11 +371,11 @@
 	str xzr, [x18, #8 * 18]
 
 	/* Save return address & mode. */
-	mrs x1, elr_el2
-	mrs x2, spsr_el2
-	stp x1, x2, [x18, #8 * 31]
+	mrs x2, elr_el2
+	mrs x3, spsr_el2
+	stp x2, x3, [x18, #8 * 31]
 
-	/* Find a new vcpu to run. */
-	bl cpu_next_vcpu
-	cbz x0, vcpu_return
+	/* Save lazy state, then switch to new vcpu. */
+	mov x0, x1
+	sub x1, x18, #VCPU_REGS
 	b vcpu_switch
diff --git a/src/arch/aarch64/gicv2.c b/src/arch/aarch64/gicv2.c
deleted file mode 100644
index c160007..0000000
--- a/src/arch/aarch64/gicv2.c
+++ /dev/null
@@ -1,102 +0,0 @@
-#include "dlog.h"
-#include "io.h"
-#include "irq.h"
-
-#define GICD_CTLR       (0x00)
-#define GICD_TYPER      (0x04)
-#define GICD_ISENABLER  (0x100)
-#define GICD_ICENABLER  (0x180)
-#define GICD_ICPENDR    (0x280)
-#define GICD_ICACTIVER  (0x380)
-#define GICD_IPRIORITYR (0x400)
-#define GICD_ITARGETSR  (0x800)
-#define GICD_ICFGR      (0xc00)
-
-#define GICC_CTLR (0x000)
-#define GICC_PMR  (0x004)
-#define GICC_IAR  (0x00c)
-#define GICC_EOIR (0x010)
-
-struct irq_handle {
-	uint32_t iar;
-};
-
-/*
- * Dismisses an irq that was signaled and is being processed.
- */
-void irq_dismiss(struct irq_handle *h)
-{
-	io_write(GICC_BASE + GICC_EOIR, h->iar);
-}
-
-/*
- * Configures the given irq number before it can be enabled.
- */
-void arch_irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p)
-{
-	uint32_t v = io_read(GICD_BASE + GICD_ICFGR + (num / 16) * 4);
-
-	if (t == irq_trigger_level)
-		v &= ~(2u << ((num % 16) * 2));
-	else
-		v |= 2u << ((num % 16) * 2);
-
-	io_write(GICD_BASE + GICD_ICFGR + (num / 16) * 4, v);
-}
-
-/*
- * Enables the given irq number such that interrupts will be signaled when its
- * interrupt line is asserted. A caller must first configure the irq before
- * enabling it.
- */
-void irq_enable(uint32_t num)
-{
-	io_write(GICD_BASE + GICD_ISENABLER + (num / 32) * 4,
-		 (1u << (num % 32)));
-}
-
-/*
- * Handles an interrupt signaled when the CPU was in a lower level (EL0 or EL1),
- * it is called directly from the exception handler.
- *
- * The return value indicates whether a new vcpu should be scheduled.
- */
-bool irq_handle_lower(void)
-{
-	struct irq_handle h = {
-		.iar = io_read(GICC_BASE + GICC_IAR),
-	};
-
-	dlog("irq: %u\n", h.iar & 0x3ff);
-
-	return irq_handle(h.iar & 0x3ff, &h);
-}
-
-/*
- * Initializes the GICv2 for use as the interrupt controller.
- */
-void arch_irq_init_percpu(void)
-{
-	uint32_t i;
-	uint32_t max = 32 * (1 + (io_read(GICD_BASE + GICD_TYPER) & 0x1f));
-
-	/* Disable all irqs, clear pending & active states. */
-	for (i = 0; i < (max + 31) / 32; i++) {
-		io_write(GICD_BASE + GICD_ICENABLER + i * 4, 0xffffffff);
-		io_write(GICD_BASE + GICD_ICACTIVER + i * 4, 0xffffffff);
-		io_write(GICD_BASE + GICD_ICPENDR + i * 4, 0xffffffff);
-	}
-
-	/* Set the priority to zero, and cpu target to cpu 0 by default. */
-	for (i = 0; i < (max + 3) / 4; i++) {
-		io_write(GICD_BASE + GICD_IPRIORITYR + i * 4, 0);
-		io_write(GICD_BASE + GICD_ITARGETSR + i * 4, 0x01010101);
-	}
-
-	/* Allow all irq levels to interrupt the current CPU. */
-	io_write(GICC_BASE + GICC_PMR, 0xff);
-
-	/* Enable distributor and CPU interfaces. */
-	io_write(GICD_BASE + GICD_CTLR, 1);
-	io_write(GICC_BASE + GICC_CTLR, 1);
-}
diff --git a/src/arch/aarch64/gicv3.c b/src/arch/aarch64/gicv3.c
deleted file mode 100644
index c160007..0000000
--- a/src/arch/aarch64/gicv3.c
+++ /dev/null
@@ -1,102 +0,0 @@
-#include "dlog.h"
-#include "io.h"
-#include "irq.h"
-
-#define GICD_CTLR       (0x00)
-#define GICD_TYPER      (0x04)
-#define GICD_ISENABLER  (0x100)
-#define GICD_ICENABLER  (0x180)
-#define GICD_ICPENDR    (0x280)
-#define GICD_ICACTIVER  (0x380)
-#define GICD_IPRIORITYR (0x400)
-#define GICD_ITARGETSR  (0x800)
-#define GICD_ICFGR      (0xc00)
-
-#define GICC_CTLR (0x000)
-#define GICC_PMR  (0x004)
-#define GICC_IAR  (0x00c)
-#define GICC_EOIR (0x010)
-
-struct irq_handle {
-	uint32_t iar;
-};
-
-/*
- * Dismisses an irq that was signaled and is being processed.
- */
-void irq_dismiss(struct irq_handle *h)
-{
-	io_write(GICC_BASE + GICC_EOIR, h->iar);
-}
-
-/*
- * Configures the given irq number before it can be enabled.
- */
-void arch_irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p)
-{
-	uint32_t v = io_read(GICD_BASE + GICD_ICFGR + (num / 16) * 4);
-
-	if (t == irq_trigger_level)
-		v &= ~(2u << ((num % 16) * 2));
-	else
-		v |= 2u << ((num % 16) * 2);
-
-	io_write(GICD_BASE + GICD_ICFGR + (num / 16) * 4, v);
-}
-
-/*
- * Enables the given irq number such that interrupts will be signaled when its
- * interrupt line is asserted. A caller must first configure the irq before
- * enabling it.
- */
-void irq_enable(uint32_t num)
-{
-	io_write(GICD_BASE + GICD_ISENABLER + (num / 32) * 4,
-		 (1u << (num % 32)));
-}
-
-/*
- * Handles an interrupt signaled when the CPU was in a lower level (EL0 or EL1),
- * it is called directly from the exception handler.
- *
- * The return value indicates whether a new vcpu should be scheduled.
- */
-bool irq_handle_lower(void)
-{
-	struct irq_handle h = {
-		.iar = io_read(GICC_BASE + GICC_IAR),
-	};
-
-	dlog("irq: %u\n", h.iar & 0x3ff);
-
-	return irq_handle(h.iar & 0x3ff, &h);
-}
-
-/*
- * Initializes the GICv2 for use as the interrupt controller.
- */
-void arch_irq_init_percpu(void)
-{
-	uint32_t i;
-	uint32_t max = 32 * (1 + (io_read(GICD_BASE + GICD_TYPER) & 0x1f));
-
-	/* Disable all irqs, clear pending & active states. */
-	for (i = 0; i < (max + 31) / 32; i++) {
-		io_write(GICD_BASE + GICD_ICENABLER + i * 4, 0xffffffff);
-		io_write(GICD_BASE + GICD_ICACTIVER + i * 4, 0xffffffff);
-		io_write(GICD_BASE + GICD_ICPENDR + i * 4, 0xffffffff);
-	}
-
-	/* Set the priority to zero, and cpu target to cpu 0 by default. */
-	for (i = 0; i < (max + 3) / 4; i++) {
-		io_write(GICD_BASE + GICD_IPRIORITYR + i * 4, 0);
-		io_write(GICD_BASE + GICD_ITARGETSR + i * 4, 0x01010101);
-	}
-
-	/* Allow all irq levels to interrupt the current CPU. */
-	io_write(GICC_BASE + GICC_PMR, 0xff);
-
-	/* Enable distributor and CPU interfaces. */
-	io_write(GICD_BASE + GICD_CTLR, 1);
-	io_write(GICC_BASE + GICC_CTLR, 1);
-}
diff --git a/src/arch/aarch64/handler.c b/src/arch/aarch64/handler.c
index 07b1291..1844e37 100644
--- a/src/arch/aarch64/handler.c
+++ b/src/arch/aarch64/handler.c
@@ -1,13 +1,12 @@
 #include "cpu.h"
 #include "dlog.h"
-#include "irq.h"
 #include "vm.h"
 
 #include "msr.h"
 
 struct hvc_handler_return {
-	size_t user_ret;
-	bool schedule;
+	long user_ret;
+	struct vcpu *new;
 };
 
 void irq_current(void)
@@ -22,13 +21,25 @@
 	for (;;);
 }
 
-struct hvc_handler_return hvc_handler(size_t arg1)
+/* TODO: Define constants below according to spec. */
+#define HF_VCPU_RUN       0xff00
+#define HF_VM_GET_COUNT   0xff01
+#define HF_VCPU_GET_COUNT 0xff02
+
+/* TODO: Move these decl elsewhere. */
+extern struct vm secondary_vm[MAX_VMS];
+extern uint32_t secondary_vm_count;
+extern struct vm primary_vm;
+extern struct cpu cpus[];
+
+struct hvc_handler_return hvc_handler(size_t arg0, size_t arg1, size_t arg2,
+				      size_t arg3)
 {
 	struct hvc_handler_return ret;
 
-	ret.schedule = true;
+	ret.new = NULL;
 
-	switch (arg1) {
+	switch (arg0) {
 	case 0x84000000: /* PSCI_VERSION */
 		ret.user_ret = 2;
 		break;
@@ -37,19 +48,30 @@
 		ret.user_ret = 2;
 		break;
 
-#if 0
-	TODO: Remove this.
-	case 1: /* TODO: Fix. */
-		{
-			extern struct vm vm0;
-			struct vcpu *vcpu = vm0.vcpus;
-			vcpu->interrupt = true;
-			vcpu_ready(vcpu);
-			dlog("Readying VCPU0 again\n");
-		}
-		ret.user_ret = 0;
+	case HF_VM_GET_COUNT:
+		ret.user_ret = secondary_vm_count;
 		break;
-#endif
+
+	case HF_VCPU_GET_COUNT:
+		if (arg1 >= secondary_vm_count)
+			ret.user_ret = -1;
+		else
+			ret.user_ret = secondary_vm[arg1].vcpu_count;
+		break;
+
+	case HF_VCPU_RUN:
+		/* TODO: Make sure we don't allow secondary VMs to make this
+		 * hvc call. */
+		ret.user_ret = 1; /* WFI */
+		if (arg1 < secondary_vm_count &&
+		    arg2 < secondary_vm[arg1].vcpu_count &&
+		    secondary_vm[arg1].vcpus[arg2].is_on) {
+			arch_set_vm_mm(&secondary_vm[arg1].page_table);
+			/* TODO: Update the virtual memory. */
+			ret.new = secondary_vm[arg1].vcpus + arg2;
+			ret.user_ret = 0;
+		}
+		break;
 
 	default:
 		ret.user_ret = -1;
@@ -58,18 +80,38 @@
 	return ret;
 }
 
-bool sync_lower_exception(uint64_t esr)
+struct vcpu *irq_lower(void)
+{
+	/* TODO: Only switch if we know the interrupt was not for the secondary
+	 * VM. */
+
+	/* Switch back to primary VM, interrupts will be handled there. */
+	arch_set_vm_mm(&primary_vm.page_table);
+	return &primary_vm.vcpus[cpus - cpu()];
+}
+
+struct vcpu *sync_lower_exception(uint64_t esr)
 {
 	struct cpu *c = cpu();
 	struct vcpu *vcpu = c->current;
 
 	switch (esr >> 26) {
 	case 0x01: /* EC = 000001, WFI or WFE. */
-		/* Check TI bit of ISS. */
+		/* Check TI bit of ISS, 0 = WFI, 1 = WFE. */
 		if (esr & 1)
-			return true;
-		//vcpu_unready(vcpu);
-		return true;
+			return NULL;
+
+		/* Switch back to primary VM. */
+		arch_set_vm_mm(&primary_vm.page_table);
+		vcpu = &primary_vm.vcpus[cpus - cpu()];
+
+		dlog("Returning due to WFI\n");
+
+		/* TODO: Use constant. */
+		/* Set return value to 1, indicating to primary VM that this
+		 * vcpu blocked on a WFI. */
+		arch_regs_set_retval(&vcpu->regs, 1);
+		return vcpu;
 
 	case 0x24: /* EC = 100100, Data abort. */
 		dlog("Data abort: pc=0x%x, esr=0x%x, ec=0x%x", vcpu->regs.pc, esr, esr >> 26);
@@ -86,6 +128,5 @@
 		for (;;);
 	}
 
-	/* TODO: For now we always reschedule. But we shoudln't. */
-	return true;
+	return NULL;
 }
diff --git a/src/arch/aarch64/inc/arch_cpu.h b/src/arch/aarch64/inc/arch_cpu.h
index e86983e..9612078 100644
--- a/src/arch/aarch64/inc/arch_cpu.h
+++ b/src/arch/aarch64/inc/arch_cpu.h
@@ -2,6 +2,7 @@
 #define _ARCH_CPU_H
 
 #include <stdalign.h>
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 
@@ -63,7 +64,7 @@
 }
 
 static inline
-void arch_regs_init(struct arch_regs *r, size_t pc, size_t arg)
+void arch_regs_init(struct arch_regs *r, size_t pc, size_t arg, bool is_primary)
 {
 	/* TODO: Use constant here. */
 	r->spsr = 5 |         /* M bits, set to EL1h. */
@@ -71,10 +72,17 @@
 	r->pc = pc;
 	r->r[0] = arg;
 	r->lazy.hcr_el2 = (1u << 31) |  /* RW bit. */
-//			  (7u << 3) |   /* AMO, IMO, FMO bits. */
-			  (3u << 13) |  /* TWI, TWE bits. */
 			  (1u << 2) |   /* PTW, Protected Table Walk. */
 			  (1u << 0);    /* VM: enable stage-2 translation. */
+
+	if (!is_primary)
+		r->lazy.hcr_el2 |= (7u << 3) |   /* AMO, IMO, FMO bits. */
+				   (3u << 13);   /* TWI, TWE bits. */
+}
+
+static inline void arch_regs_set_retval(struct arch_regs *r, size_t v)
+{
+	r->r[0] = v;
 }
 
 static inline void arch_regs_set_irq(struct arch_regs *r)
diff --git a/src/arch/aarch64/mm.c b/src/arch/aarch64/mm.c
index b062f85..ef446f3 100644
--- a/src/arch/aarch64/mm.c
+++ b/src/arch/aarch64/mm.c
@@ -1,5 +1,4 @@
 #include "arch_cpu.h"
-#include "dlog.h" /* TODO: Remove? */
 #include "msr.h"
 
 void arch_vptable_init(struct arch_page_table *table)
@@ -7,8 +6,6 @@
 	uint64_t i;
 	uint64_t v;
 
-	dlog("ID_AA64MMFR0_EL1=0x%x\n", read_msr(ID_AA64MMFR0_EL1));
-
 	/* TODO: Check each bit. */
 	for (i = 0; i < 512; i++) {
 		table->entry0[i] = 1 |
diff --git a/src/arch/aarch64/rules.mk b/src/arch/aarch64/rules.mk
index ea0d53d..fd6b737 100644
--- a/src/arch/aarch64/rules.mk
+++ b/src/arch/aarch64/rules.mk
@@ -2,18 +2,9 @@
 SRCS += exceptions.S
 SRCS += handler.c
 SRCS += mm.c
-SRCS += timer.c
 
 OFFSET_SRCS += offsets.c
 
-ifeq ($(GICV2),1)
-  SRCS += gicv2.c
-endif
-
-ifeq ($(GICV3),1)
-  SRCS += gicv3.c
-endif
-
 ifeq ($(PL011),1)
   SRCS += pl011.c
 endif
diff --git a/src/arch/aarch64/timer.c b/src/arch/aarch64/timer.c
deleted file mode 100644
index c4eaecd..0000000
--- a/src/arch/aarch64/timer.c
+++ /dev/null
@@ -1,49 +0,0 @@
-#include <stdint.h>
-
-#include "cpu.h"
-#include "irq.h"
-#include "msr.h"
-
-static bool timer_irq_handler(void *context, struct irq_handle *h)
-{
-	struct cpu *c = cpu();
-
-	/* Mask timer interrupt and dismiss current interrupt. */
-	write_msr(cnthp_ctl_el2, read_msr(cnthp_ctl_el2) | 0x2);
-	irq_dismiss(h);
-
-	/* Execute user-supplied callback. */
-	if (c->timer_cb)
-		return c->timer_cb(c->timer_context);
-
-	return false;
-}
-
-void timer_set(uint64_t time, bool (*cb)(void *), void *context)
-{
-	uint64_t v;
-	struct cpu *c = cpu();
-
-	/* Save callback. */
-	c->timer_cb = cb;
-	c->timer_context = context;
-
-	/* TODO: There's a better way to advance this. */
-	v = read_msr(cntpct_el0);
-	write_msr(CNTHP_CVAL_EL2, v + time);
-	write_msr(cnthp_ctl_el2, 1); /* enable. */
-}
-
-void timer_init(void)
-{
-	irq_config(TIMER_IRQ, irq_trigger_level, irq_polarity_active_high,
-		   timer_irq_handler, NULL);
-}
-
-void timer_init_percpu(void)
-{
-	/* Mask timer interrupt for now. */
-	write_msr(cnthp_ctl_el2, read_msr(cnthp_ctl_el2) | 0x2);
-
-	irq_enable(TIMER_IRQ);
-}
diff --git a/src/cpu.c b/src/cpu.c
index 85646c6..73a2d1c 100644
--- a/src/cpu.c
+++ b/src/cpu.c
@@ -3,19 +3,12 @@
 #include "arch_cpu.h"
 #include "dlog.h"
 #include "std.h"
-#include "timer.h"
 #include "vm.h"
 
-struct new_old_vcpu {
-	struct vcpu *new;
-	struct vcpu *old;
-};
-
 void cpu_init(struct cpu *c)
 {
 	/* TODO: Assumes that c is zeroed out already. */
 	sl_init(&c->lock);
-	list_init(&c->ready_queue);
 	c->irq_disable_count = 1;
 }
 
@@ -33,15 +26,24 @@
 	c->irq_disable_count++;
 }
 
-void cpu_on(struct cpu *c)
+/**
+ * Turns CPU on and returns the previous state.
+ */
+bool cpu_on(struct cpu *c)
 {
+	bool prev;
+
 	sl_lock(&c->lock);
-	if (!c->cpu_on_count) {
+	prev = c->is_on;
+	c->is_on = true;
+	sl_unlock(&c->lock);
+
+	if (!prev) {
 		/* The CPU is currently off, we need to turn it on. */
 		arch_cpu_on(c->id, c);
 	}
-	c->cpu_on_count++;
-	sl_unlock(&c->lock);
+
+	return prev;
 }
 
 /*
@@ -49,113 +51,31 @@
  */
 void cpu_off(struct cpu *c)
 {
-	bool on;
-
 	sl_lock(&c->lock);
-	c->cpu_on_count--;
-	on = c->cpu_on_count > 0;
+	c->is_on = false;
 	sl_unlock(&c->lock);
 
-	if (!on)
-		arch_cpu_off();
+	arch_cpu_off();
 }
 
-void vcpu_ready(struct vcpu *v)
-{
-	struct cpu *c = v->cpu;
-
-	sl_lock(&c->lock);
-	if (!v->is_runnable) {
-		v->is_runnable = true;
-		list_append(&c->ready_queue, &v->links);
-		/* TODO: Send IPI to cpu if needed. */
-	}
-	sl_unlock(&c->lock);
-}
-
-void vcpu_unready(struct vcpu *v)
-{
-	struct cpu *c = v->cpu;
-
-	sl_lock(&c->lock);
-	if (v->is_runnable) {
-		v->is_runnable = false;
-		list_remove(&v->links);
-	}
-	sl_unlock(&c->lock);
-}
-
-#if 0
-static bool cpu_schedule_next(void *ctx)
-{
-	/* Indicate that a new vcpu should be chosen. */
-	return true;
-}
-#endif
-
-struct new_old_vcpu cpu_next_vcpu(void)
-{
-	struct cpu *c = cpu();
-	struct new_old_vcpu ret;
-	struct vcpu *next;
-	bool switch_mm;
-
-	/* TODO: Check if too soon. */
-
-	sl_lock(&c->lock);
-
-	ret.old = c->current;
-	if (list_empty(&c->ready_queue)) {
-		bool first = true;
-		c->current = NULL;
-		do {
-			sl_unlock(&c->lock);
-			/* TODO: Implement this. Enable irqs. */
-			if (first) {
-				dlog("CPU%d waiting for work...\n", c->id);
-				first = false;
-			}
-			sl_lock(&c->lock);
-		} while (list_empty(&c->ready_queue));
-		dlog("CPU%d found work!\n", c->id);
-	}
-
-	next = LIST_ELEM(c->ready_queue.next, struct vcpu, links);
-	if (next->links.next != &c->ready_queue) {
-		/* Move new vcpu to the end of ready queue. */
-		list_remove(&next->links);
-		list_append(&c->ready_queue, &next->links);
-	}
-
-	c->current = next;
-
-	if (next->interrupt) {
-		arch_regs_set_irq(&next->regs);
-		next->interrupt = false;
-	} else {
-		arch_regs_clear_irq(&next->regs);
-	}
-
-	switch_mm = !ret.old || ret.old->vm != next->vm;
-
-	sl_unlock(&c->lock);
-
-	ret.new = next;
-
-	if (switch_mm)
-		arch_set_vm_mm(&next->vm->page_table);
-
-	/* TODO: Only set this when there is a next thing to run. */
-	/* Set timer again. */
-	//timer_set(5 * 1000000, cpu_schedule_next, NULL);
-
-	return ret;
-}
-
-void vcpu_init(struct vcpu *vcpu, struct cpu *cpu, struct vm *vm)
+void vcpu_init(struct vcpu *vcpu, struct vm *vm)
 {
 	memset(vcpu, 0, sizeof(*vcpu));
-	vcpu->cpu = cpu;
+	sl_init(&vcpu->lock);
 	vcpu->vm = vm;
 	/* TODO: Initialize vmid register. */
 }
+
+void vcpu_on(struct vcpu *vcpu)
+{
+	sl_lock(&vcpu->lock);
+	vcpu->is_on = true;
+	sl_unlock(&vcpu->lock);
+}
+
+void vcpu_off(struct vcpu *vcpu)
+{
+	sl_lock(&vcpu->lock);
+	vcpu->is_on = false;
+	sl_unlock(&vcpu->lock);
+}
diff --git a/src/fdt.c b/src/fdt.c
index 035ec64..da26178 100644
--- a/src/fdt.c
+++ b/src/fdt.c
@@ -244,6 +244,39 @@
 	return false;
 }
 
+bool fdt_first_child(struct fdt_node *node, const char **child_name)
+{
+	struct fdt_tokenizer t;
+
+	fdt_tokenizer_init(&t, node->strs, node->begin, node->end);
+
+	fdt_skip_properties(&t);
+
+	if (!fdt_next_subnode(&t, child_name))
+		return false;
+
+	node->begin = t.cur;
+
+	return true;
+}
+
+bool fdt_next_sibling(struct fdt_node *node, const char **sibling_name)
+{
+	struct fdt_tokenizer t;
+
+	fdt_tokenizer_init(&t, node->strs, node->begin, node->end);
+
+	if (!fdt_skip_node(&t))
+		return false;
+
+	if (!fdt_next_subnode(&t, sibling_name))
+		return false;
+
+	node->begin = t.cur;
+
+	return true;
+}
+
 bool fdt_find_child(struct fdt_node *node, const char *child)
 {
 	struct fdt_tokenizer t;
diff --git a/src/irq.c b/src/irq.c
deleted file mode 100644
index d751c56..0000000
--- a/src/irq.c
+++ /dev/null
@@ -1,41 +0,0 @@
-#include "irq.h"
-
-#include "arch.h"
-
-struct irq_config {
-	void *cb_context;
-	bool (*cb)(void *context, struct irq_handle *);
-};
-
-/*
- * TODO: Move this to write-once page so that we know it won't change in the
- * future.
- */
-static struct irq_config irq_configs[300];
-
-void irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p,
-		bool (*cb)(void *, struct irq_handle *), void *context)
-{
-	struct irq_config *cfg = irq_configs + num;
-
-	cfg->cb = cb;
-	cfg->cb_context = context;
-
-	arch_irq_config(num, t, p);
-}
-
-bool irq_handle(uint32_t num, struct irq_handle *h)
-{
-	struct irq_config *cfg = irq_configs + num;
-
-	return cfg->cb(cfg->cb_context, h);
-}
-
-void irq_init(void)
-{
-}
-
-void irq_init_percpu(void)
-{
-	arch_irq_init_percpu();
-}
diff --git a/src/main.c b/src/main.c
index 346c798..885f577 100644
--- a/src/main.c
+++ b/src/main.c
@@ -6,9 +6,7 @@
 #include "cpu.h"
 #include "dlog.h"
 #include "fdt.h"
-#include "irq.h"
 #include "std.h"
-#include "timer.h"
 #include "vm.h"
 
 void *fdt;
@@ -19,16 +17,13 @@
 /* State of all supported CPUs. The stack of the first one is initialized. */
 struct cpu cpus[MAX_CPUS] = {
 	{
-		.cpu_on_count = 1,
+		.is_on = 1,
 		.stack_bottom = callstacks + STACK_SIZE,
 	},
 };
 
 bool fdt_find_node(struct fdt_node *node, const char *path)
 {
-	if (!fdt_find_child(node, ""))
-		return false;
-
 	while (*path) {
 		if (!fdt_find_child(node, path))
 			return false;
@@ -38,26 +33,37 @@
 	return true;
 }
 
-bool fdt_read_number(struct fdt_node *node, const char *name, uint64_t *value)
+static uint64_t convert_number(const char *data, uint32_t size)
 {
-	const char *data;
-	uint32_t size;
 	union {
 		volatile uint64_t v;
 		char a[8];
 	} t;
 
+	switch (size) {
+	case sizeof(uint32_t):
+		return ntohl(*(uint32_t *)data);
+	case sizeof(uint64_t):
+		memcpy(t.a, data, sizeof(uint64_t));
+		return ntohll(t.v);
+	default:
+		return 0;
+	}
+}
+
+static bool fdt_read_number(const struct fdt_node *node, const char *name,
+		     uint64_t *value)
+{
+	const char *data;
+	uint32_t size;
+
 	if (!fdt_read_property(node, name, &data, &size))
 		return false;
 
 	switch (size) {
 	case sizeof(uint32_t):
-		*value = ntohl(*(uint32_t *)data);
-		break;
-
 	case sizeof(uint64_t):
-		memcpy(t.a, data, sizeof(uint64_t));
-		*value = ntohll(t.v);
+		*value = convert_number(data, size);
 		break;
 
 	default:
@@ -106,7 +112,302 @@
 }
 
 /* TODO: Remove this. */
-struct vm vm0;
+struct vm primary_vm;
+struct vm secondary_vm[MAX_VMS];
+uint32_t secondary_vm_count = 0;
+
+static void find_memory_range(const struct fdt_node *root,
+			      uint64_t *block_start, uint64_t *block_size)
+{
+	struct fdt_node n = *root;
+	const char *name;
+	uint64_t address_size;
+	uint64_t size_size;
+	uint64_t entry_size;
+
+	/* Get the sizes of memory range addresses and sizes. */
+	if (fdt_read_number(&n, "#address-cells", &address_size))
+		address_size *= sizeof(uint32_t);
+	else
+		address_size = sizeof(uint32_t);
+
+	if (fdt_read_number(&n, "#size-cells", &size_size))
+		size_size *= sizeof(uint32_t);
+	else
+		size_size = sizeof(uint32_t);
+
+	entry_size = address_size + size_size;
+
+	/* Look for nodes with the device_type set to "memory". */
+	if (!fdt_first_child(&n, &name))
+		return;
+
+	do {
+		const char *data;
+		uint32_t size;
+		if (!fdt_read_property(&n, "device_type", &data, &size) ||
+		    size != sizeof("memory") ||
+		    memcmp(data, "memory", sizeof("memory")) != 0 ||
+		    !fdt_read_property(&n, "reg", &data, &size)) {
+			continue;
+		}
+
+		/* Traverse all memory ranges within this node. */
+		while (size >= entry_size) {
+			uint64_t addr = convert_number(data, address_size);
+			uint64_t len = convert_number(data + address_size,
+						      size_size);
+
+			if (len > *block_size) {
+				/* Remember the largest range we've found. */
+				*block_start = addr;
+				*block_size = len;
+			}
+
+			size -= entry_size;
+			data += entry_size;
+		}
+	} while (fdt_next_sibling(&n, &name));
+
+	/* TODO: Check for "reserved-memory" nodes. */
+}
+
+/**
+ * Finds the memory region where initrd is stored, and udpates the fdt node
+ * cursor to the node called "chosen".
+ */
+static bool find_initrd(struct fdt_node *n, uint64_t *begin, uint64_t *end)
+{
+	if (!fdt_find_node(n, "chosen\0")) {
+		dlog("Unable to find 'chosen'\n");
+		return false;
+	}
+
+	if (!fdt_read_number(n, "linux,initrd-start", begin)) {
+		dlog("Unable to read linux,initrd-start\n");
+		return false;
+	}
+
+	if (!fdt_read_number(n, "linux,initrd-end", end)) {
+		dlog("Unable to read linux,initrd-end\n");
+		return false;
+	}
+
+	return true;
+}
+
+struct memiter {
+	const char *next;
+	const char *limit;
+};
+
+static void memiter_init(struct memiter *it, const void *data, size_t size)
+{
+	it->next = data;
+	it->limit = it->next + size;
+}
+
+static bool memiter_isspace(struct memiter *it)
+{
+	switch (*it->next) {
+	case ' ':
+	case '\t':
+	case '\n':
+	case '\r':
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void memiter_skip_space(struct memiter *it)
+{
+	while (it->next < it->limit && memiter_isspace(it))
+		it->next++;
+}
+
+static bool memiter_iseq(const struct memiter *it, const char *str)
+{
+	size_t len = strlen(str);
+	if (len != it->limit - it->next)
+		return false;
+	return memcmp(it->next, str, len) == 0;
+}
+
+static bool memiter_parse_str(struct memiter *it, struct memiter *str)
+{
+	/* Skip all white space and fail if we reach the end of the buffer. */
+	memiter_skip_space(it);
+	if (it->next >= it->limit)
+		return false;
+
+	str->next = it->next;
+
+	/* Find the end of the string. */
+	while (it->next < it->limit && !memiter_isspace(it))
+		it->next++;
+
+	str->limit = it->next;
+
+	return true;
+}
+
+static bool memiter_parse_uint(struct memiter *it, uint64_t *value)
+{
+	uint64_t v = 0;
+
+	/* Skip all white space and fail if we reach the end of the buffer. */
+	memiter_skip_space(it);
+	if (it->next >= it->limit)
+		return false;
+
+	/* Fail if it's not a number. */
+	if (*it->next < '0' && *it->next > '9')
+		return false;
+
+	/* Parse the number. */
+	do {
+		v = v * 10 + *it->next - '0';
+		it->next++;
+	} while (it->next < it->limit && *it->next >= '0' && *it->next <= '9');
+
+	*value = v;
+
+	return true;
+}
+
+static bool memiter_find_file(struct cpio *c, const struct memiter *filename,
+			      struct memiter *it)
+{
+	const char *fname;
+	const void *fcontents;
+	size_t fsize;
+	struct cpio_iter iter;
+
+	cpio_init_iter(c, &iter);
+
+	while (cpio_next(&iter, &fname, &fcontents, &fsize)) {
+		if (memiter_iseq(filename, fname)) {
+			memiter_init(it, fcontents, fsize);
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool find_file(struct cpio *c, const char *name, struct memiter *it)
+{
+	const char *fname;
+	const void *fcontents;
+	size_t fsize;
+	struct cpio_iter iter;
+
+	cpio_init_iter(c, &iter);
+
+	while (cpio_next(&iter, &fname, &fcontents, &fsize)) {
+		if (!strcmp(fname, name)) {
+			memiter_init(it, fcontents, fsize);
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool load_secondary(struct cpio *c,
+			   uint64_t mem_start, uint64_t *mem_size)
+{
+	struct memiter it;
+	struct memiter str;
+	uint64_t mem;
+	uint64_t cpu;
+	uint32_t count;
+
+	if (!find_file(c, "vms.txt", &it)) {
+		dlog("Unable to find vms.txt\n");
+		return false;
+	}
+
+	for (count = 0; memiter_parse_uint(&it, &mem) &&
+	     memiter_parse_uint(&it, &cpu) &&
+	     memiter_parse_str(&it, &str) &&
+	     count < MAX_VMS; count++) {
+		struct memiter kernel;
+
+		if (!memiter_find_file(c, &str, &kernel)) {
+			dlog("Unable to load kernel for vm %u\n", count);
+			continue;
+		}
+
+		if (mem > *mem_size) {
+			dlog("Not enough memory for vm %u (%u bytes)\n", count,
+			     mem);
+			continue;
+		}
+
+		if (mem < kernel.limit - kernel.next) {
+			dlog("Kernel is larger than available memory for vm %u\n", count);
+			continue;
+		}
+
+		*mem_size -= mem;
+		memcpy((void *)(mem_start + *mem_size), kernel.next,
+		       kernel.limit - kernel.next);
+
+		dlog("Loaded VM%u with %u vcpus, entry at 0x%x\n", count, cpu,
+		     mem_start + *mem_size);
+		vm_init(secondary_vm + count, cpu);
+		vm_start_vcpu(secondary_vm + count, 0,
+			      mem_start + *mem_size, 0, false);
+	}
+
+	secondary_vm_count = count;
+
+	return true;
+}
+
+static bool load_primary(struct cpio *c, struct fdt_node *chosen)
+{
+	struct memiter it;
+
+	if (!find_file(c, "vmlinuz", &it)) {
+		dlog("Unable to find vmlinuz\n");
+		return false;
+	}
+
+	relocate(it.next, it.limit - it.next);
+
+	if (!find_file(c, "initrd.img", &it)) {
+		dlog("Unable to find initrd.img\n");
+		return false;
+	}
+
+	/* Patch FDT to point to new ramdisk. */
+	if (!fdt_write_number(chosen, "linux,initrd-start", (size_t)it.next)) {
+		dlog("Unable to write linux,initrd-start\n");
+		return false;
+	}
+
+	if (!fdt_write_number(chosen, "linux,initrd-end", (size_t)it.limit)) {
+		dlog("Unable to write linux,initrd-end\n");
+		return false;
+	}
+
+	/*
+	 * Patch fdt to reserve memory.
+	 */
+	{
+		size_t tmp = (size_t)&relocate;
+		tmp = (tmp + 0x80000 - 1) & ~(0x80000 - 1);
+
+		fdt_add_mem_reservation(fdt, tmp & ~0xfffff, 0x80000);
+		vm_init(&primary_vm, MAX_CPUS);
+		vm_start_vcpu(&primary_vm, 0, tmp, (size_t)fdt, true);
+	}
+
+	return true;
+}
 
 static void one_time_init(void)
 {
@@ -114,12 +415,6 @@
 
 	dlog("Initializing hafnium\n");
 
-	/*
-	 * TODO: Re-enable this.
-	irq_init();
-	timer_init();
-	*/
-
 	/* Initialize all CPUs. */
 	for (i = 0; i < MAX_CPUS; i++) {
 		struct cpu *c = cpus + i;
@@ -133,98 +428,49 @@
 
 	do {
 		struct fdt_node n;
+		uint64_t mem_start = 0;
+		uint64_t mem_size = 0;
 
 		fdt_root_node(&n, fdt);
-		if (!fdt_find_node(&n, "chosen\0")) {
-			dlog("Unable to find 'chosen'\n");
-			break;
-		}
+		fdt_find_child(&n, "");
+
+		/* TODO: Use this. */
+		find_memory_range(&n, &mem_start, &mem_size);
+		dlog("Memory range: 0x%x - 0x%x\n", mem_start,
+		     mem_start + mem_size - 1);
 
 		uint64_t begin;
 		uint64_t end;
 
-		if (!fdt_read_number(&n, "linux,initrd-start", &begin)) {
-			dlog("Unable to read linux,initrd-start\n");
+		if (!find_initrd(&n, &begin, &end))
 			break;
-		}
 
-		if (!fdt_read_number(&n, "linux,initrd-end", &end)) {
-			dlog("Unable to read linux,initrd-end\n");
-			break;
-		}
-
-		dlog("Ramdisk: from %x to %x\n", begin, end);
+		dlog("Ramdisk range: 0x%x - 0x%x\n", begin, end - 1);
 
 		struct cpio c;
-		struct cpio_iter iter;
 		cpio_init(&c, (void *)begin, end - begin);
-		cpio_init_iter(&c, &iter);
 
-		const char *name;
-		const void *fcontents;
-		size_t ramdisk = 0;
-		size_t ramdisk_end = 0;
-		size_t fsize;
-		while (cpio_next(&iter, &name, &fcontents, &fsize)) {
-			dlog("File: %s, size=%u\n", name, fsize);
-			if (!strcmp(name, "vm/vmlinuz")) {
-				relocate(fcontents, fsize);
-				continue;
-			}
-
-			if (!strcmp(name, "vm/initrd.img")) {
-				dlog("Found vm/ramdisk @ %p, %u bytes\n", fcontents, fsize);
-				ramdisk = (size_t)fcontents;
-				ramdisk_end = ramdisk + fsize;
-				continue;
-			}
-		}
-
-		dlog("Ramdisk; %p\n", ramdisk);
-
-		/* Patch FDT to point to new ramdisk. */
-		if (!fdt_write_number(&n, "linux,initrd-start", ramdisk)) {
-			dlog("Unable to write linux,initrd-start\n");
-			break;
-		}
-
-		if (!fdt_write_number(&n, "linux,initrd-end", ramdisk_end)) {
-			dlog("Unable to write linux,initrd-end\n");
-			break;
-		}
-
-		/*
-		 * Patch fdt to point remove memory.
-		 */
-		{
-			size_t tmp = (size_t)&relocate;
-			tmp = (tmp + 0x80000 - 1) & ~(0x80000 - 1);
-
-
-			fdt_add_mem_reservation(fdt, tmp & ~0xfffff, 0x80000);
-			vm_init(&vm0, cpus);
-			vm_start_vcpu(&vm0, 0, tmp, (size_t)fdt);
-		}
+		load_secondary(&c, mem_start, &mem_size);
+		load_primary(&c, &n);
 	} while (0);
+
+	arch_set_vm_mm(&primary_vm.page_table);
 }
 
 /*
  * The entry point of CPUs when they are turned on. It is supposed to initialise
- * all state and return; the caller will ensure that the next vcpu runs.
+ * all state and return the first vCPU to run.
  */
-void cpu_main(void)
+struct vcpu *cpu_main(void)
 {
+	struct cpu *c = cpu();
+
 	/* Do global one-time initialization just once. */
 	static atomic_flag inited = ATOMIC_FLAG_INIT;
 	if (!atomic_flag_test_and_set_explicit(&inited, memory_order_acq_rel))
 		one_time_init();
 
-	dlog("Starting up cpu %d\n", cpu() - cpus);
+	dlog("Starting up cpu %d\n", c - cpus);
 
-	/* Do per-cpu initialization. */
-	/* TODO: What to do here? */
-	/*
-	irq_init_percpu();
-	timer_init_percpu();
-	*/
+	return primary_vm.vcpus + (c - cpus);
 }
diff --git a/src/rules.mk b/src/rules.mk
index 1bed7fa..e8022f2 100644
--- a/src/rules.mk
+++ b/src/rules.mk
@@ -2,7 +2,6 @@
 SRCS += cpio.c
 SRCS += cpu.c
 SRCS += fdt.c
-SRCS += irq.c
 SRCS += main.c
 SRCS += std.c
 SRCS += vm.c
diff --git a/src/vm.c b/src/vm.c
index ee6a5cd..05dd2e3 100644
--- a/src/vm.c
+++ b/src/vm.c
@@ -2,23 +2,26 @@
 
 #include "cpu.h"
 
-void vm_init(struct vm *vm, struct cpu *cpus)
+void vm_init(struct vm *vm, uint32_t vcpu_count)
 {
-	size_t i;
+	uint32_t i;
+
+	vm->vcpu_count = vcpu_count;
 
 	/* Do basic initialization of vcpus. */
-	for (i = 0; i < MAX_CPUS; i++) {
-		vcpu_init(vm->vcpus + i, cpus + i, vm);
-	}
+	for (i = 0; i < vcpu_count; i++)
+		vcpu_init(vm->vcpus + i, vm);
 
 	arch_vptable_init(&vm->page_table);
 }
 
 /* TODO: Shall we use index or id here? */
-void vm_start_vcpu(struct vm *vm, size_t index, size_t entry, size_t arg)
+void vm_start_vcpu(struct vm *vm, size_t index, size_t entry, size_t arg,
+		   bool is_primary)
 {
 	struct vcpu *vcpu = vm->vcpus + index;
-	arch_regs_init(&vcpu->regs, entry, arg);
-	vcpu_ready(vcpu);
-	cpu_on(vcpu->cpu);
+	if (index < vm->vcpu_count) {
+		arch_regs_init(&vcpu->regs, entry, arg, is_primary);
+		vcpu_on(vcpu);
+	}
 }