Initial commit.

commit: 987c0ff61c3b95845b2a2491e3f9a848b0c36ec7 [log] [tgz]
author: Wedson Almeida Filho <wedsonaf@google.com> Wed Jun 20 16:34:38 2018 +0100
committer: Wedson Almeida Filho <wedsonaf@google.com> Thu Jun 28 15:09:42 2018 +0100
tree: 8f29452c208a8fd8f9f5cf5c7d87e7ebc4e380d4
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..89f9ac0
--- /dev/null
+++ b/.gitignore

@@ -0,0 +1 @@
+out/

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f2e590c
--- /dev/null
+++ b/Makefile

@@ -0,0 +1,144 @@
+ROOT_DIR := $(dir $(lastword $(MAKEFILE_LIST)))
+ifeq ($(ROOT_DIR),./)
+  ROOT_DIR :=
+endif
+
+# Defaults.
+ARCH ?= aarch64
+PLAT ?= qemu
+DEBUG ?= 1
+NAME := hafnium
+
+OUT := $(ROOT_DIR)out/$(ARCH)/$(PLAT)
+
+all: $(OUT)/$(NAME).bin
+
+# Include platform-specific constants.
+include $(ROOT_DIR)src/arch/$(ARCH)/$(PLAT).mk
+
+define include_module
+  SRCS :=
+  OFFSET_SRCS :=
+  include $(ROOT_DIR)$(1)/rules.mk
+  GLOBAL_SRCS += $$(addprefix $(1)/,$$(SRCS))
+  GLOBAL_OFFSET_SRCS += $$(addprefix $(1)/,$$(OFFSET_SRCS))
+endef
+
+#
+# Include each module.
+#
+MODULES := src
+MODULES += src/arch/$(ARCH)
+GLOBAL_SRCS :=
+GLOBAL_OFFSET_SRCS :=
+$(foreach mod,$(MODULES),$(eval $(call include_module,$(mod))))
+
+CROSS_PREFIX := aarch64-linux-gnu-
+
+#
+# Rules to build C files.
+#
+COPTS = -mcpu=cortex-a57+nofp
+COPTS += -fno-stack-protector
+COPTS += -fno-builtin -ffreestanding
+COPTS += -g
+COPTS += -O2
+COPTS += -fpic
+COPTS += -std=c11
+COPTS += -Wall -Wpedantic -Werror
+COPTS += -DDEBUG=$(DEBUG)
+COPTS += -MMD -MP -MF $$(patsubst %,%.d,$$@)
+COPTS += -DMAX_CPUS=8
+COPTS += -DSTACK_SIZE=4096
+COPTS += -I$(ROOT_DIR)inc
+COPTS += -I$(ROOT_DIR)src/arch/$(ARCH)/inc
+COPTS += -I$(OUT)/arch/$(ARCH)/inc
+COPTS += -DGICD_BASE=$(GICD_BASE)
+COPTS += -DGICC_BASE=$(GICC_BASE)
+COPTS += -DGICR_BASE=$(GICR_BASE)
+COPTS += -DTIMER_IRQ=$(TIMER_IRQ)
+
+ifeq ($(PL011),1)
+  COPTS += -DPL011_BASE=$(PL011_BASE)
+endif
+
+ifeq ($(DEBUG),1)
+  COPTS += -DDEBUG
+endif
+
+define build_c
+  TGT := $(patsubst %.c,%.o,$(OUT)/$(patsubst src/%,%,$(1)))
+  GLOBAL_OBJS += $$(TGT)
+  REMAIN_SRCS := $$(filter-out $(1),$$(REMAIN_SRCS))
+$$(TGT): $(ROOT_DIR)$(1) | $$(dir $$(TGT))
+	$$(info CC $(ROOT_DIR)$1)
+	@$(CROSS_PREFIX)gcc $(COPTS) -c $(ROOT_DIR)$(1) -o $$@
+endef
+
+#
+# Rules to generate offsets.
+#
+define gen_offsets
+  TMP := $(patsubst src/%,%,$(1))
+  TMP := $$(dir $$(TMP))inc/$$(notdir $$(TMP))
+  TGT := $$(patsubst %.c,%.h,$(OUT)/$$(TMP))
+  GLOBAL_OFFSETS += $$(TGT)
+$$(TGT): $(ROOT_DIR)$(1) | $$(dir $$(TGT))
+	$$(info GENOFFSET $(ROOT_DIR)$1)
+	@$(CROSS_PREFIX)gcc $(COPTS) -MT $$@ -S -c $(ROOT_DIR)$(1) -o - | grep DEFINE_OFFSET | sed 's/\tDEFINE_OFFSET/#define/g' > $$@
+endef
+
+#
+# Rules to build S files.
+#
+define build_S
+  TGT := $(patsubst %.S,%.o,$(OUT)/$(patsubst src/%,%,$(1)))
+  GLOBAL_OBJS += $$(TGT)
+  REMAIN_SRCS := $$(filter-out $(1),$$(REMAIN_SRCS))
+$$(TGT): $(ROOT_DIR)$(1) $(GLOBAL_OFFSETS) | $$(dir $$(TGT))
+	$$(info AS $(ROOT_DIR)$1)
+	@$(CROSS_PREFIX)gcc $(COPTS) -c $(ROOT_DIR)$(1) -o $$@
+endef
+
+#
+# Generate the build rules for all .c and .S files.
+#
+GLOBAL_OBJS :=
+GLOBAL_OFFSETS :=
+REMAIN_SRCS := $(GLOBAL_SRCS)
+$(foreach file,$(filter %.c,$(GLOBAL_OFFSET_SRCS)),$(eval $(call gen_offsets,$(file))))
+$(foreach file,$(filter %.c,$(GLOBAL_SRCS)),$(eval $(call build_c,$(file))))
+$(foreach file,$(filter %.S,$(GLOBAL_SRCS)),$(eval $(call build_S,$(file))))
+
+#
+# Check if there are any source files which we don't know to handle.
+#
+ifneq ($(REMAIN_SRCS),)
+  $(error Don't know how to handle $(REMAIN_SRCS))
+endif
+
+#
+# Rule to create all output directories.
+#
+define create_dir
+$1:
+	@mkdir -p $1
+endef
+$(foreach name,$(sort $(dir $(GLOBAL_OBJS))),$(eval $(call create_dir,$(name))))
+$(foreach name,$(sort $(dir $(GLOBAL_OFFSETS))),$(eval $(call create_dir,$(name))))
+
+#
+# Rules to build the hypervisor.
+#
+$(OUT)/$(NAME): $(GLOBAL_OBJS) $(ROOT_DIR)src/$(NAME).ld
+	$(info LD $(ROOT_DIR)src/$(NAME).ld)
+	@$(CROSS_PREFIX)ld -g -pie $(GLOBAL_OBJS) -T$(ROOT_DIR)src/$(NAME).ld --defsym PREFERRED_LOAD_ADDRESS=$(LOAD_ADDRESS) -o $@
+
+$(OUT)/$(NAME).bin: $(OUT)/$(NAME)
+	$(info OBJCOPY $@)
+	@$(CROSS_PREFIX)objcopy -O binary $< $@
+
+clean:
+	rm -rf $(ROOT_DIR)out
+
+-include $(patsubst %,%.d,$(GLOBAL_OBJS),$(GLOBAL_OFFSETS))

diff --git a/inc/alloc.h b/inc/alloc.h
new file mode 100644
index 0000000..b3fc110
--- /dev/null
+++ b/inc/alloc.h

@@ -0,0 +1,11 @@
+#ifndef _ALLOC_H
+#define _ALLOC_H
+
+#include <stddef.h>
+
+void halloc_init(size_t base, size_t size);
+void *halloc(size_t size);
+void hfree(void *ptr);
+void *halloc_aligned(size_t size, size_t align);
+
+#endif  /* _ALLOC_H */

diff --git a/inc/arch.h b/inc/arch.h
new file mode 100644
index 0000000..e12a8cf
--- /dev/null
+++ b/inc/arch.h

@@ -0,0 +1,12 @@
+#ifndef _ARCH_H
+#define _ARCH_H
+
+#include "cpu.h"
+#include "irq.h"
+
+void arch_init(struct cpu *cpu);
+void arch_irq_init_percpu(void);
+void arch_irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p);
+void arch_putchar(char c);
+
+#endif  /* _ARCH_H */

diff --git a/inc/cpio.h b/inc/cpio.h
new file mode 100644
index 0000000..cc244ce
--- /dev/null
+++ b/inc/cpio.h

@@ -0,0 +1,22 @@
+#ifndef _CPIO_H
+#define _CPIO_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+struct cpio {
+	const struct cpio_header *first;
+	size_t total_size;
+};
+
+struct cpio_iter {
+	const struct cpio_header *cur;
+	size_t size_left;
+};
+
+void cpio_init(struct cpio *c, const void *buf, size_t size);
+void cpio_init_iter(struct cpio *c, struct cpio_iter *iter);
+bool cpio_next(struct cpio_iter *iter, const char **name,
+	       const void **contents, size_t *size);
+
+#endif  /* _CPIO_H */

diff --git a/inc/cpu.h b/inc/cpu.h
new file mode 100644
index 0000000..6eb90ab
--- /dev/null
+++ b/inc/cpu.h

@@ -0,0 +1,61 @@
+#ifndef _CPU_H
+#define _CPU_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "arch_cpu.h"
+#include "list.h"
+#include "spinlock.h"
+
+struct vcpu {
+	struct list_entry links;
+	bool is_runnable;
+	bool interrupt;
+	struct arch_regs regs;
+	struct cpu *cpu;
+	struct vm *vm;
+};
+
+/* TODO: Update alignment such that cpus are in different cache lines. */
+struct cpu {
+	struct spinlock lock;
+
+	struct vcpu *current;
+
+	struct list_entry ready_queue;
+
+	/*
+	 * Enabling/disabling irqs are counted per-cpu. They are enabled when
+	 * the count is zero, and disabled when it's non-zero.
+	 */
+	uint32_t irq_disable_count;
+
+	/*
+	 * The number of VMs that have turned this CPU on. CPUs are off when
+	 * this count is zero, and on when this count is ono-zero.
+	 */
+	uint32_t cpu_on_count;
+
+	bool (*timer_cb)(void *context);
+	void *timer_context;
+
+	/* CPU identifier. Doesn't have to be contiguous. */
+	size_t id;
+
+	/* Pointer to bottom of the stack. */
+	void *stack_bottom;
+};
+
+void cpu_init(struct cpu *c);
+void cpu_irq_enable(struct cpu *c);
+void cpu_irq_disable(struct cpu *c);
+void cpu_on(struct cpu *c);
+void cpu_off(struct cpu *c);
+
+void vcpu_init(struct vcpu *vcpu, struct cpu *cpu, struct vm *vm);
+void vcpu_ready(struct vcpu *v);
+void vcpu_unready(struct vcpu *v);
+
+#endif  /* _CPU_H */

diff --git a/inc/decl_offsets.h b/inc/decl_offsets.h
new file mode 100644
index 0000000..01f182f
--- /dev/null
+++ b/inc/decl_offsets.h

@@ -0,0 +1,10 @@
+#ifndef _DECL_OFFSETS_H
+#define _DECL_OFFSETS_H
+
+#define DECL(name, type, field) \
+	__asm("DEFINE_OFFSET " #name " %0" : : "n" (offsetof(type, field)))
+
+#define DECL_SIZE(name, type) \
+	__asm("DEFINE_OFFSET " #name " %0" : : "n" (sizeof(type)))
+
+#endif  /* _DECL_OFFSETS_H */

diff --git a/inc/dlog.h b/inc/dlog.h
new file mode 100644
index 0000000..d4c08c0
--- /dev/null
+++ b/inc/dlog.h

@@ -0,0 +1,12 @@
+#ifndef _DLOG_H
+#define _DLOG_H
+
+#if DEBUG
+void dlog(const char *fmt, ...);
+#else
+#define dlog(...)
+#endif
+
+void dlog_init(void (*pchar)(char));
+
+#endif  /* _DLOG_H */

diff --git a/inc/fdt.h b/inc/fdt.h
new file mode 100644
index 0000000..fb23c8e
--- /dev/null
+++ b/inc/fdt.h

@@ -0,0 +1,23 @@
+#ifndef _FDT_H
+#define _FDT_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct fdt_node {
+	/* TODO: What do we need here? */
+	const struct fdt_header *hdr;
+	const char *begin;
+	const char *end;
+	const char *strs;
+};
+
+void fdt_dump(struct fdt_header *hdr);
+void fdt_root_node(struct fdt_node *node, const struct fdt_header *hdr);
+bool fdt_find_child(struct fdt_node *node, const char *child);
+bool fdt_read_property(const struct fdt_node *node, const char *name,
+		       const char **buf, uint32_t *size);
+
+void fdt_add_mem_reservation(struct fdt_header *hdr, uint64_t addr, uint64_t len);
+
+#endif  /* _FDT_H */

diff --git a/inc/irq.h b/inc/irq.h
new file mode 100644
index 0000000..af5faba
--- /dev/null
+++ b/inc/irq.h

@@ -0,0 +1,32 @@
+#ifndef _IRQ_H
+#define _IRQ_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+struct irq_handle;
+
+enum irq_trigger {
+	irq_trigger_level,
+	irq_trigger_edge,
+};
+
+enum irq_polarity {
+	irq_polarity_active_high,
+	irq_polarity_active_low,
+};
+
+/* TODO: Add target CPUs here. */
+void irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p,
+		bool (*cb)(void *, struct irq_handle *), void *context);
+void irq_enable(uint32_t num);
+
+void irq_dismiss(struct irq_handle *h);
+
+/* TODO: These don't really belong here, do they?. */
+bool irq_handle(uint32_t num, struct irq_handle *h);
+void irq_init(void);
+void irq_init_percpu(void);
+
+#endif

diff --git a/inc/list.h b/inc/list.h
new file mode 100644
index 0000000..3001394
--- /dev/null
+++ b/inc/list.h

@@ -0,0 +1,50 @@
+#ifndef _LIST_H
+#define _LIST_H
+
+#include <stdbool.h>
+
+struct list_entry {
+	struct list_entry *next;
+	struct list_entry *prev;
+};
+
+#define LIST_INIT(l) {.next = &l, .prev = &l}
+#define LIST_ELEM(ptr, type, field) \
+	((type*)(char*)ptr - offsetof(type, field))
+
+static inline void list_init(struct list_entry *e)
+{
+	e->next = e;
+	e->prev = e;
+}
+
+static inline void list_append(struct list_entry *l, struct list_entry *e)
+{
+	e->next = l;
+	e->prev = l->prev;
+
+	e->next->prev = e;
+	e->prev->next = e;
+}
+
+static inline void list_prepend(struct list_entry *l, struct list_entry *e)
+{
+	e->next = l->next;
+	e->prev = l;
+
+	e->next->prev = e;
+	e->prev->next = e;
+}
+
+static inline bool list_empty(struct list_entry *l)
+{
+	return l->next == l;
+}
+
+static inline void list_remove(struct list_entry *e)
+{
+	e->prev->next = e->next;
+	e->next->prev = e->prev;
+}
+
+#endif  /* _LIST_H */

diff --git a/inc/spinlock.h b/inc/spinlock.h
new file mode 100644
index 0000000..7761980
--- /dev/null
+++ b/inc/spinlock.h

@@ -0,0 +1,27 @@
+#ifndef _SPINLOCK_H
+#define _SPINLOCK_H
+
+#include <stdatomic.h>
+
+struct spinlock {
+	atomic_flag v;
+};
+
+#define SPINLOCK_INIT {.v = ATOMIC_FLAG_INIT}
+
+static inline void sl_init(struct spinlock *l)
+{
+	*l = (struct spinlock)SPINLOCK_INIT;
+}
+
+static inline void sl_lock(struct spinlock *l)
+{
+	while (atomic_flag_test_and_set_explicit(&l->v, memory_order_acquire));
+}
+
+static inline void sl_unlock(struct spinlock *l)
+{
+	atomic_flag_clear_explicit(&l->v, memory_order_release);
+}
+
+#endif  /* _SPINLOCK_H */

diff --git a/inc/std.h b/inc/std.h
new file mode 100644
index 0000000..d2438a2
--- /dev/null
+++ b/inc/std.h

@@ -0,0 +1,52 @@
+#ifndef _STD_H
+#define _STD_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void *memset(void *s, int c, size_t n);
+void *memcpy(void *dst, const void *src, size_t n);
+void *memmove(void *dst, const void *src, size_t n);
+int memcmp(const void *a, const void *b, size_t n);
+
+size_t strlen(const char *str);
+int strcmp(const char *a, const char *b);
+
+static inline uint16_t ntohs(uint16_t v)
+{
+	return v << 8 | v >> 8;
+}
+
+static inline uint32_t ntohl(uint32_t v)
+{
+	/* TODO: no conversion needed if native is big endian. */
+	return (v << 24) |
+	       (v >> 24) |
+	       ((v & 0xff00) << 8) |
+	       ((v & 0xff0000) >> 8);
+}
+
+static inline uint64_t ntohll(uint64_t v)
+{
+	/* TODO: no conversion needed if native is big endian. */
+	return (v << 56) |
+	       (v >> 56) |
+	       ((v & 0xff00) << 40) |
+	       ((v & 0xff000000000000) >> 40) |
+	       ((v & 0xff0000) << 24) |
+	       ((v & 0xff0000000000) >> 24) |
+	       ((v & 0xff000000) << 8) |
+	       ((v & 0xff00000000) >> 8);
+}
+
+static inline uint32_t htonl(uint32_t v)
+{
+	return ntohl(v);
+}
+
+static inline uint64_t htonll(uint64_t v)
+{
+	return ntohll(v);
+}
+
+#endif  /* STD_H */

diff --git a/inc/timer.h b/inc/timer.h
new file mode 100644
index 0000000..aac58c4
--- /dev/null
+++ b/inc/timer.h

@@ -0,0 +1,10 @@
+#ifndef _TIMER_H
+#define _TIMER_H
+
+#include <stdbool.h>
+
+void timer_init(void);
+void timer_init_percpu(void);
+void timer_set(uint64_t time, bool (*cb)(void *), void *context);
+
+#endif  /* _TIMER_H */

diff --git a/inc/vm.h b/inc/vm.h
new file mode 100644
index 0000000..aa9133c
--- /dev/null
+++ b/inc/vm.h

@@ -0,0 +1,14 @@
+#ifndef _VM_H
+#define _VM_H
+
+#include "cpu.h"
+
+struct vm {
+	struct vcpu vcpus[MAX_CPUS];
+	struct arch_page_table page_table;
+};
+
+void vm_init(struct vm *vm, struct cpu *cpus);
+void vm_start_vcpu(struct vm *vm, size_t index, size_t entry, size_t arg);
+
+#endif  /* _VM_H */

diff --git a/src/alloc.c b/src/alloc.c
new file mode 100644
index 0000000..cd7c614
--- /dev/null
+++ b/src/alloc.c

@@ -0,0 +1,63 @@
+#include "alloc.h"
+
+#include "dlog.h"
+#include "spinlock.h"
+
+static size_t alloc_base;
+static size_t alloc_limit;
+static struct spinlock alloc_lock = SPINLOCK_INIT;
+
+/*
+ * Initializes the allocator.
+ */
+void halloc_init(size_t base, size_t size)
+{
+	alloc_base = base;
+	alloc_limit = base + size;
+}
+
+/*
+ * Allocates the requested amount of memory. Return NULL when there isn't enough
+ * free memory.
+ */
+void *halloc(size_t size)
+{
+	return halloc_aligned(size, 2 * sizeof(size_t));
+}
+
+/*
+ * Frees the provided memory.
+ *
+ * Currently unimplemented.
+ */
+void hfree(void *ptr)
+{
+	dlog("Attempted to free pointer %p\n", ptr);
+}
+
+/*
+ * Allocates the requested amount of memory, with the requested alignment.
+ *
+ * Alignment must be a power of two. Returns NULL when there isn't enough free
+ * memory.
+ */
+void *halloc_aligned(size_t size, size_t align)
+{
+	size_t begin;
+	size_t end;
+
+	sl_lock(&alloc_lock);
+
+	begin = (alloc_base + align - 1) & ~(align - 1);
+	end = begin + size;
+
+	/* Check for overflows, and that there is enough free mem. */
+	if (end > begin && begin >= alloc_base && end <= alloc_limit)
+		alloc_base = end;
+	else
+		begin = 0;
+
+	sl_unlock(&alloc_lock);
+
+	return (void *)begin;
+}

diff --git a/src/arch/aarch64/entry.S b/src/arch/aarch64/entry.S
new file mode 100644
index 0000000..560aaf8
--- /dev/null
+++ b/src/arch/aarch64/entry.S

@@ -0,0 +1,112 @@
+#include "offsets.h"
+
+#define PECOFF_FILE_ALIGNMENT 0x200
+
+.section .init.entry, "ax"
+
+.global entry
+entry:
+
+	add x13, x18, #0x16
+	b 0f
+	.quad 4096        /* text_offset */
+	.quad file_size   /* image_size */
+	.quad 0           /* flags */
+	.quad 0           /* res2 */
+	.quad 0           /* res3 */
+	.quad 0           /* res4 */
+	.word 0x644d5241  /* magic */
+	.word 0
+
+	/*
+	 * Calculate the difference between the actual load address and the
+	 * preferred one. We'll use this to relocate.
+	 */
+0:	adrp x25, entry
+	add x25, x25, :lo12:entry
+
+	ldr w29, =_orig_base
+
+	sub x25, x25, x29
+
+	/* Find where the relocations begin and end. */
+	adrp x29, rela_begin
+	add x29, x29, :lo12:rela_begin
+
+	adrp x30, rela_end
+	add x30, x30, :lo12:rela_end
+
+	/* Iterate over all relocations. */
+1:	cmp x29, x30
+	b.eq 2f
+
+	ldp x26, x27, [x29], #16
+	ldr x28, [x29], #8
+
+	cmp w27, #1027 /* R_AARCH64_RELATIVE */
+#	b.ne 1b
+	b.ne .
+
+	add x28, x28, x25
+	str x28, [x26, x25]
+	b 1b
+
+	/* Zero out the bss section. */
+2:	adrp x29, bss_begin
+	add x29, x29, :lo12:bss_begin
+
+	adrp x30, bss_end
+	add x30, x30, :lo12:bss_end
+
+3:	cmp x29, x30
+	b.hs 4f
+
+	stp xzr, xzr, [x29], #16
+	b 3b
+
+4:	/* Save the FDT to a global variable. */
+	adrp x30, fdt
+	add x30, x30, :lo12:fdt
+	str x0, [x30]
+
+	/* Get pointer to first cpu. */
+	adrp x0, cpus
+	add x0, x0, :lo12:cpus
+
+
+.globl cpu_entry
+cpu_entry:
+	/* Disable interrupts. */
+	msr DAIFSet, #0xf
+
+	/* Save pointer to CPU struct for later reference. */
+	msr tpidr_el2, x0
+
+	/* Use SPx (instead of SP0). */
+	msr spsel, #1
+
+	/* Prepare the stack. */
+	ldr x30, [x0, #CPU_STACK_BOTTOM]
+	mov sp, x30
+
+	/* Configure exception handlers. */
+	adrp x30, vector_table_el2
+	add x30, x30, :lo12:vector_table_el2
+	msr vbar_el2, x30
+
+	/* Call into C code. */
+	bl cpu_main
+
+	/* Run the first vcpu. */
+	bl cpu_next_vcpu
+	b vcpu_enter_restore_all
+
+	/* Loop forever waiting for interrupts. */
+5:	wfi
+	b 5b
+
+/* TODO: Move this elsewhere. */
+.globl smc
+smc:
+	SMC #0
+	ret

diff --git a/src/arch/aarch64/exceptions.S b/src/arch/aarch64/exceptions.S
new file mode 100644
index 0000000..97bd2fd
--- /dev/null
+++ b/src/arch/aarch64/exceptions.S

@@ -0,0 +1,353 @@
+#include "offsets.h"
+
+.text
+
+.balign 0x800
+.global vector_table_el2
+vector_table_el2:
+	/* sync_cur_sp0 */
+	b .
+
+.balign 0x80
+	/* irq_cur_sp0 */
+	b irq_current
+
+.balign 0x80
+	/* fiq_cur_sp0 */
+	b .
+
+.balign 0x80
+	/* serr_cur_sp0 */
+	b .
+
+.balign 0x80
+	/* sync_cur_spx */
+	mrs x0, esr_el2
+	mrs x1, elr_el2
+	b sync_current_exception
+
+.balign 0x80
+	/* irq_cur_spx */
+	b irq_current
+
+.balign 0x80
+	/* fiq_cur_spx */
+	b .
+
+.balign 0x80
+	/* serr_cur_spx */
+	b .
+
+.balign 0x80
+	/* sync_lower_64 */
+
+	/* Save x18 since we're about to clobber it. */
+	str x18, [sp, #-8]
+
+	/* Extract the exception class (EC) from exception syndrome register. */
+	mrs x18, esr_el2
+	lsr x18, x18, #26
+
+	/* Take the slow path if exception is not due to an HVC instruction. */
+	subs x18, x18, #0x16
+	b.ne slow_sync_lower_64
+
+	/* Save x29 and x30, then jump to HVC handler. */
+	stp x29, x30, [sp, #-16]!
+	bl hvc_handler
+	ldp x29, x30, [sp], #16
+	cbnz x1, sync_lower_64_switch
+
+	/* Zero out all volatile registers (except x0) and return. */
+	stp xzr, xzr, [sp, #-16]
+	ldp x1, x2, [sp, #-16]
+	ldp x3, x4, [sp, #-16]
+	ldp x5, x6, [sp, #-16]
+	ldp x7, x8, [sp, #-16]
+	ldp x9, x10, [sp, #-16]
+	ldp x11, x12, [sp, #-16]
+	ldp x13, x14, [sp, #-16]
+	ldp x15, x16, [sp, #-16]
+	ldp x17, x18, [sp, #-16]
+	eret
+
+.balign 0x80
+	/* irq_lower_64 */
+
+	/* Save x0 since we're about to clobber it. */
+	str x0, [sp, #-8]
+
+	/* Get the current vcpu. */
+	mrs x0, tpidr_el2
+	ldr x0, [x0, #CPU_CURRENT]
+
+	/* Save volatile registers. */
+	add x0, x0, #VCPU_REGS
+	stp x2, x3, [x0, #8 * 2]
+	stp x4, x5, [x0, #8 * 4]
+	stp x6, x7, [x0, #8 * 6]
+	stp x8, x9, [x0, #8 * 8]
+	stp x10, x11, [x0, #8 * 10]
+	stp x12, x13, [x0, #8 * 12]
+	stp x14, x15, [x0, #8 * 14]
+	stp x16, x17, [x0, #8 * 16]
+	str x18, [x0, #8 * 18]
+	stp x29, x30, [x0, #8 * 29]
+
+	ldr x2, [sp, #-8]
+	stp x2, x1, [x0, #8 * 0]
+
+	/* Save return address & mode. */
+	mrs x1, elr_el2
+	mrs x2, spsr_el2
+	stp x1, x2, [x0, #8 * 31]
+
+	/* Call C handler. */
+	bl irq_handle_lower
+	cbz x0, vcpu_return
+
+	/* Find a new vcpu to run. */
+	bl cpu_next_vcpu
+	cbz x0, vcpu_return
+	b vcpu_switch
+
+.balign 0x80
+	/* fiq_lower_64 */
+	b .
+
+.balign 0x80
+	/* serr_lower_64 */
+	b .
+
+.balign 0x80
+	/* sync_lower_32 */
+	b .
+
+.balign 0x80
+	/* irq_lower_32 */
+	b .
+
+.balign 0x80
+	/* fiq_lower_32 */
+	b .
+
+.balign 0x80
+	/* serr_lower_32 */
+	b .
+
+.balign 0x80
+
+vcpu_switch:
+	/* Save non-volatile registers. */
+	add x1, x1, #VCPU_REGS
+	stp x19, x20, [x1, #8 * 19]
+	stp x21, x22, [x1, #8 * 21]
+	stp x23, x24, [x1, #8 * 23]
+	stp x25, x26, [x1, #8 * 25]
+	stp x27, x28, [x1, #8 * 27]
+
+	/* Save lazy state. */
+	add x1, x1, #(VCPU_LAZY - VCPU_REGS)
+
+	mrs x24, vmpidr_el2
+	mrs x25, csselr_el1
+	stp x24, x25, [x1, #16 * 0]
+
+	mrs x2, sctlr_el1
+	mrs x3, actlr_el1
+	stp x2, x3, [x1, #16 * 1]
+
+	mrs x4, cpacr_el1
+	mrs x5, ttbr0_el1
+	stp x4, x5, [x1, #16 * 2]
+
+	mrs x6, ttbr1_el1
+	mrs x7, tcr_el1
+	stp x6, x7, [x1, #16 * 3]
+
+	mrs x8, esr_el1
+	mrs x9, afsr0_el1
+	stp x8, x9, [x1, #16 * 4]
+
+	mrs x10, afsr1_el1
+	mrs x11, far_el1
+	stp x10, x11, [x1, #16 * 5]
+
+	mrs x12, mair_el1
+	mrs x13, vbar_el1
+	stp x12, x13, [x1, #16 * 6]
+
+	mrs x14, contextidr_el1
+	mrs x15, tpidr_el0
+	stp x14, x15, [x1, #16 * 7]
+
+	mrs x16, tpidrro_el0
+	mrs x17, tpidr_el1
+	stp x16, x17, [x1, #16 * 8]
+
+	mrs x18, amair_el1
+	mrs x19, cntkctl_el1
+	stp x18, x19, [x1, #16 * 9]
+
+	mrs x20, sp_el0
+	mrs x21, sp_el1
+	stp x20, x21, [x1, #16 * 10]
+
+	mrs x22, par_el1
+	str x22, [x1, #16 * 11]
+
+.globl vcpu_enter_restore_all
+vcpu_enter_restore_all:
+	/* Get a pointer to the lazy registers. */
+	add x0, x0, #VCPU_LAZY
+
+	ldp x24, x25, [x0, #16 * 0]
+	msr vmpidr_el2, x24
+	msr csselr_el1, x25
+
+	ldp x2, x3, [x0, #16 * 1]
+	msr sctlr_el1, x2
+	msr actlr_el1, x3
+
+	ldp x4, x5, [x0, #16 * 2]
+	msr cpacr_el1, x4
+	msr ttbr0_el1, x5
+
+	ldp x6, x7, [x0, #16 * 3]
+	msr ttbr1_el1, x6
+	msr tcr_el1, x7
+
+	ldp x8, x9, [x0, #16 * 4]
+	msr esr_el1, x8
+	msr afsr0_el1, x9
+
+	ldp x10, x11, [x0, #16 * 5]
+	msr afsr1_el1, x10
+	msr far_el1, x11
+
+	ldp x12, x13, [x0, #16 * 6]
+	msr mair_el1, x12
+	msr vbar_el1, x13
+
+	ldp x14, x15, [x0, #16 * 7]
+	msr contextidr_el1, x14
+	msr tpidr_el0, x15
+
+	ldp x16, x17, [x0, #16 * 8]
+	msr tpidrro_el0, x16
+	msr tpidr_el1, x17
+
+	ldp x18, x19, [x0, #16 * 9]
+	msr amair_el1, x18
+	msr cntkctl_el1, x19
+
+	ldp x20, x21, [x0, #16 * 10]
+	msr sp_el0, x20
+	msr sp_el1, x21
+
+	ldp x22, x23, [x0, #16 * 11]
+	msr par_el1, x22
+	msr hcr_el2, x23
+
+	/* Restore non-volatile registers. */
+	add x0, x0, #(VCPU_REGS - VCPU_LAZY)
+
+	ldp x19, x20, [x0, #8 * 19]
+	ldp x21, x22, [x0, #8 * 21]
+	ldp x23, x24, [x0, #8 * 23]
+	ldp x25, x26, [x0, #8 * 25]
+	ldp x27, x28, [x0, #8 * 27]
+
+	/* Restore volatile registers and return. */
+	sub x0, x0, #VCPU_REGS
+
+vcpu_return:
+	/* Restore volatile registers. */
+	add x0, x0, #VCPU_REGS
+
+	ldp x4, x5, [x0, #8 * 4]
+	ldp x6, x7, [x0, #8 * 6]
+	ldp x8, x9, [x0, #8 * 8]
+	ldp x10, x11, [x0, #8 * 10]
+	ldp x12, x13, [x0, #8 * 12]
+	ldp x14, x15, [x0, #8 * 14]
+	ldp x16, x17, [x0, #8 * 16]
+	ldr x18, [x0, #8 * 18]
+	ldp x29, x30, [x0, #8 * 29]
+
+	/* Restore return address & mode. */
+	ldp x1, x2, [x0, #8 * 31]
+	msr elr_el2, x1
+	msr spsr_el2, x2
+
+	/* Restore x0..x3, which we have used as scratch before. */
+	ldp x2, x3, [x0, #8 * 2]
+	ldp x0, x1, [x0, #8 * 0]
+	eret
+
+slow_sync_lower_64:
+	/* Get the current vcpu. */
+	mrs x18, tpidr_el2
+	/* TODO: tpidr_el2 should store the vcpu pointer directly. */
+	ldr x18, [x18, #CPU_CURRENT]
+
+	/* Save volatile registers. */
+	add x18, x18, #VCPU_REGS
+	stp x0, x1, [x18, #8 * 0]
+	stp x2, x3, [x18, #8 * 2]
+	stp x4, x5, [x18, #8 * 4]
+	stp x6, x7, [x18, #8 * 6]
+	stp x8, x9, [x18, #8 * 8]
+	stp x10, x11, [x18, #8 * 10]
+	stp x12, x13, [x18, #8 * 12]
+	stp x14, x15, [x18, #8 * 14]
+	stp x16, x17, [x18, #8 * 16]
+	stp x29, x30, [x18, #8 * 29]
+
+	ldr x0, [sp, #-8]
+	str x0, [x18, #8 * 18]
+
+	/* Save return address & mode. */
+	mrs x1, elr_el2
+	mrs x2, spsr_el2
+	stp x1, x2, [x18, #8 * 31]
+
+	/* Read syndrome register and call C handler. */
+	mrs x0, esr_el2
+	bl sync_lower_exception
+	cbz x0, vcpu_return
+
+	/* Find a new vcpu to run. */
+	bl cpu_next_vcpu
+	cbz x0, vcpu_return
+	b vcpu_switch
+
+sync_lower_64_switch:
+	/* We'll have to reschedule, so store state before doing so. */
+	mrs x18, tpidr_el2
+	ldr x18, [x18, #CPU_CURRENT]
+
+	/* Store zeroes in volatile register storage, except x0. */
+	add x18, x18, #VCPU_REGS
+	stp x0, xzr, [x18, #8 * 0]
+	stp xzr, xzr, [x18, #8 * 2]
+	stp xzr, xzr, [x18, #8 * 4]
+	stp xzr, xzr, [x18, #8 * 6]
+	stp xzr, xzr, [x18, #8 * 8]
+	stp xzr, xzr, [x18, #8 * 10]
+	stp xzr, xzr, [x18, #8 * 12]
+	stp xzr, xzr, [x18, #8 * 14]
+	stp xzr, xzr, [x18, #8 * 16]
+	stp x29, x30, [x18, #8 * 29]
+
+	str xzr, [x18, #8 * 18]
+
+	/* Save return address & mode. */
+	mrs x1, elr_el2
+	mrs x2, spsr_el2
+	stp x1, x2, [x18, #8 * 31]
+
+	/* Find a new vcpu to run. */
+	bl cpu_next_vcpu
+	cbz x0, vcpu_return
+	b vcpu_switch

diff --git a/src/arch/aarch64/gicv2.c b/src/arch/aarch64/gicv2.c
new file mode 100644
index 0000000..c160007
--- /dev/null
+++ b/src/arch/aarch64/gicv2.c

@@ -0,0 +1,102 @@
+#include "dlog.h"
+#include "io.h"
+#include "irq.h"
+
+#define GICD_CTLR       (0x00)
+#define GICD_TYPER      (0x04)
+#define GICD_ISENABLER  (0x100)
+#define GICD_ICENABLER  (0x180)
+#define GICD_ICPENDR    (0x280)
+#define GICD_ICACTIVER  (0x380)
+#define GICD_IPRIORITYR (0x400)
+#define GICD_ITARGETSR  (0x800)
+#define GICD_ICFGR      (0xc00)
+
+#define GICC_CTLR (0x000)
+#define GICC_PMR  (0x004)
+#define GICC_IAR  (0x00c)
+#define GICC_EOIR (0x010)
+
+struct irq_handle {
+	uint32_t iar;
+};
+
+/*
+ * Dismisses an irq that was signaled and is being processed.
+ */
+void irq_dismiss(struct irq_handle *h)
+{
+	io_write(GICC_BASE + GICC_EOIR, h->iar);
+}
+
+/*
+ * Configures the given irq number before it can be enabled.
+ */
+void arch_irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p)
+{
+	uint32_t v = io_read(GICD_BASE + GICD_ICFGR + (num / 16) * 4);
+
+	if (t == irq_trigger_level)
+		v &= ~(2u << ((num % 16) * 2));
+	else
+		v |= 2u << ((num % 16) * 2);
+
+	io_write(GICD_BASE + GICD_ICFGR + (num / 16) * 4, v);
+}
+
+/*
+ * Enables the given irq number such that interrupts will be signaled when its
+ * interrupt line is asserted. A caller must first configure the irq before
+ * enabling it.
+ */
+void irq_enable(uint32_t num)
+{
+	io_write(GICD_BASE + GICD_ISENABLER + (num / 32) * 4,
+		 (1u << (num % 32)));
+}
+
+/*
+ * Handles an interrupt signaled when the CPU was in a lower level (EL0 or EL1),
+ * it is called directly from the exception handler.
+ *
+ * The return value indicates whether a new vcpu should be scheduled.
+ */
+bool irq_handle_lower(void)
+{
+	struct irq_handle h = {
+		.iar = io_read(GICC_BASE + GICC_IAR),
+	};
+
+	dlog("irq: %u\n", h.iar & 0x3ff);
+
+	return irq_handle(h.iar & 0x3ff, &h);
+}
+
+/*
+ * Initializes the GICv2 for use as the interrupt controller.
+ */
+void arch_irq_init_percpu(void)
+{
+	uint32_t i;
+	uint32_t max = 32 * (1 + (io_read(GICD_BASE + GICD_TYPER) & 0x1f));
+
+	/* Disable all irqs, clear pending & active states. */
+	for (i = 0; i < (max + 31) / 32; i++) {
+		io_write(GICD_BASE + GICD_ICENABLER + i * 4, 0xffffffff);
+		io_write(GICD_BASE + GICD_ICACTIVER + i * 4, 0xffffffff);
+		io_write(GICD_BASE + GICD_ICPENDR + i * 4, 0xffffffff);
+	}
+
+	/* Set the priority to zero, and cpu target to cpu 0 by default. */
+	for (i = 0; i < (max + 3) / 4; i++) {
+		io_write(GICD_BASE + GICD_IPRIORITYR + i * 4, 0);
+		io_write(GICD_BASE + GICD_ITARGETSR + i * 4, 0x01010101);
+	}
+
+	/* Allow all irq levels to interrupt the current CPU. */
+	io_write(GICC_BASE + GICC_PMR, 0xff);
+
+	/* Enable distributor and CPU interfaces. */
+	io_write(GICD_BASE + GICD_CTLR, 1);
+	io_write(GICC_BASE + GICC_CTLR, 1);
+}

diff --git a/src/arch/aarch64/gicv3.c b/src/arch/aarch64/gicv3.c
new file mode 100644
index 0000000..c160007
--- /dev/null
+++ b/src/arch/aarch64/gicv3.c

@@ -0,0 +1,102 @@
+#include "dlog.h"
+#include "io.h"
+#include "irq.h"
+
+#define GICD_CTLR       (0x00)
+#define GICD_TYPER      (0x04)
+#define GICD_ISENABLER  (0x100)
+#define GICD_ICENABLER  (0x180)
+#define GICD_ICPENDR    (0x280)
+#define GICD_ICACTIVER  (0x380)
+#define GICD_IPRIORITYR (0x400)
+#define GICD_ITARGETSR  (0x800)
+#define GICD_ICFGR      (0xc00)
+
+#define GICC_CTLR (0x000)
+#define GICC_PMR  (0x004)
+#define GICC_IAR  (0x00c)
+#define GICC_EOIR (0x010)
+
+struct irq_handle {
+	uint32_t iar;
+};
+
+/*
+ * Dismisses an irq that was signaled and is being processed.
+ */
+void irq_dismiss(struct irq_handle *h)
+{
+	io_write(GICC_BASE + GICC_EOIR, h->iar);
+}
+
+/*
+ * Configures the given irq number before it can be enabled.
+ */
+void arch_irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p)
+{
+	uint32_t v = io_read(GICD_BASE + GICD_ICFGR + (num / 16) * 4);
+
+	if (t == irq_trigger_level)
+		v &= ~(2u << ((num % 16) * 2));
+	else
+		v |= 2u << ((num % 16) * 2);
+
+	io_write(GICD_BASE + GICD_ICFGR + (num / 16) * 4, v);
+}
+
+/*
+ * Enables the given irq number such that interrupts will be signaled when its
+ * interrupt line is asserted. A caller must first configure the irq before
+ * enabling it.
+ */
+void irq_enable(uint32_t num)
+{
+	io_write(GICD_BASE + GICD_ISENABLER + (num / 32) * 4,
+		 (1u << (num % 32)));
+}
+
+/*
+ * Handles an interrupt signaled when the CPU was in a lower level (EL0 or EL1),
+ * it is called directly from the exception handler.
+ *
+ * The return value indicates whether a new vcpu should be scheduled.
+ */
+bool irq_handle_lower(void)
+{
+	struct irq_handle h = {
+		.iar = io_read(GICC_BASE + GICC_IAR),
+	};
+
+	dlog("irq: %u\n", h.iar & 0x3ff);
+
+	return irq_handle(h.iar & 0x3ff, &h);
+}
+
+/*
+ * Initializes the GICv2 for use as the interrupt controller.
+ */
+void arch_irq_init_percpu(void)
+{
+	uint32_t i;
+	uint32_t max = 32 * (1 + (io_read(GICD_BASE + GICD_TYPER) & 0x1f));
+
+	/* Disable all irqs, clear pending & active states. */
+	for (i = 0; i < (max + 31) / 32; i++) {
+		io_write(GICD_BASE + GICD_ICENABLER + i * 4, 0xffffffff);
+		io_write(GICD_BASE + GICD_ICACTIVER + i * 4, 0xffffffff);
+		io_write(GICD_BASE + GICD_ICPENDR + i * 4, 0xffffffff);
+	}
+
+	/* Set the priority to zero, and cpu target to cpu 0 by default. */
+	for (i = 0; i < (max + 3) / 4; i++) {
+		io_write(GICD_BASE + GICD_IPRIORITYR + i * 4, 0);
+		io_write(GICD_BASE + GICD_ITARGETSR + i * 4, 0x01010101);
+	}
+
+	/* Allow all irq levels to interrupt the current CPU. */
+	io_write(GICC_BASE + GICC_PMR, 0xff);
+
+	/* Enable distributor and CPU interfaces. */
+	io_write(GICD_BASE + GICD_CTLR, 1);
+	io_write(GICC_BASE + GICC_CTLR, 1);
+}

diff --git a/src/arch/aarch64/handler.c b/src/arch/aarch64/handler.c
new file mode 100644
index 0000000..07b1291
--- /dev/null
+++ b/src/arch/aarch64/handler.c

@@ -0,0 +1,91 @@
+#include "cpu.h"
+#include "dlog.h"
+#include "irq.h"
+#include "vm.h"
+
+#include "msr.h"
+
+struct hvc_handler_return {
+	size_t user_ret;
+	bool schedule;
+};
+
+void irq_current(void)
+{
+	dlog("IRQ from current\n");
+	for (;;);
+}
+
+void sync_current_exception(uint64_t esr, uint64_t elr)
+{
+	dlog("Exception: esr=%#x, elr=%#x\n", esr, elr);
+	for (;;);
+}
+
+struct hvc_handler_return hvc_handler(size_t arg1)
+{
+	struct hvc_handler_return ret;
+
+	ret.schedule = true;
+
+	switch (arg1) {
+	case 0x84000000: /* PSCI_VERSION */
+		ret.user_ret = 2;
+		break;
+
+	case 0x84000006: /* PSCI_MIGRATE */
+		ret.user_ret = 2;
+		break;
+
+#if 0
+	TODO: Remove this.
+	case 1: /* TODO: Fix. */
+		{
+			extern struct vm vm0;
+			struct vcpu *vcpu = vm0.vcpus;
+			vcpu->interrupt = true;
+			vcpu_ready(vcpu);
+			dlog("Readying VCPU0 again\n");
+		}
+		ret.user_ret = 0;
+		break;
+#endif
+
+	default:
+		ret.user_ret = -1;
+	}
+
+	return ret;
+}
+
+bool sync_lower_exception(uint64_t esr)
+{
+	struct cpu *c = cpu();
+	struct vcpu *vcpu = c->current;
+
+	switch (esr >> 26) {
+	case 0x01: /* EC = 000001, WFI or WFE. */
+		/* Check TI bit of ISS. */
+		if (esr & 1)
+			return true;
+		//vcpu_unready(vcpu);
+		return true;
+
+	case 0x24: /* EC = 100100, Data abort. */
+		dlog("Data abort: pc=0x%x, esr=0x%x, ec=0x%x", vcpu->regs.pc, esr, esr >> 26);
+		if (!(esr & (1u << 10))) /* Check FnV bit. */
+			dlog(", far=0x%x, hpfar=0x%x", read_msr(far_el2), read_msr(hpfar_el2) << 8);
+		else
+			dlog(", far=invalid");
+
+		dlog("\n");
+		for (;;);
+
+	default:
+		dlog("Unknown sync exception pc=0x%x, esr=0x%x, ec=0x%x\n", vcpu->regs.pc, esr, esr >> 26);
+		for (;;);
+	}
+
+	/* TODO: For now we always reschedule. But we shoudln't. */
+	return true;
+}

diff --git a/src/arch/aarch64/hikey.mk b/src/arch/aarch64/hikey.mk
new file mode 100644
index 0000000..1bf62c1
--- /dev/null
+++ b/src/arch/aarch64/hikey.mk

@@ -0,0 +1,9 @@
+LOAD_ADDRESS := 0x1000000
+PL011_BASE := 0xf8015000
+PL011 := 1
+GICV2 := 1
+
+GICD_BASE := 0xf6801000
+GICC_BASE := 0xf6802000
+
+TIMER_IRQ := 26

diff --git a/src/arch/aarch64/inc/arch_barriers.h b/src/arch/aarch64/inc/arch_barriers.h
new file mode 100644
index 0000000..4d4cf08
--- /dev/null
+++ b/src/arch/aarch64/inc/arch_barriers.h

@@ -0,0 +1,19 @@
+#ifndef _ARCH_BARRIERS_H
+#define _ARCH_BARRIERS_H
+
+static inline void dmb(void)
+{
+	__asm__ volatile("dmb sy");
+}
+
+static inline void dsb(void)
+{
+	__asm__ volatile("dsb sy");
+}
+
+static inline void isb(void)
+{
+	__asm__ volatile("isb");
+}
+
+#endif  /* _ARCH_BARRIERS_H */

diff --git a/src/arch/aarch64/inc/arch_cpu.h b/src/arch/aarch64/inc/arch_cpu.h
new file mode 100644
index 0000000..e86983e
--- /dev/null
+++ b/src/arch/aarch64/inc/arch_cpu.h

@@ -0,0 +1,124 @@
+#ifndef _ARCH_CPU_H
+#define _ARCH_CPU_H
+
+#include <stdalign.h>
+#include <stddef.h>
+#include <stdint.h>
+
+struct arch_regs {
+	/* General purpose registers. */
+	uint64_t r[31];
+	uint64_t pc;
+	uint64_t spsr;
+
+	struct {
+		uint64_t vmpidr_el2;
+		uint64_t csselr_el1;
+		uint64_t sctlr_el1;
+		uint64_t actlr_el1;
+		uint64_t cpacr_el1;
+		uint64_t ttbr0_el1;
+		uint64_t ttbr1_el1;
+		uint64_t tcr_el1;
+		uint64_t esr_el1;
+		uint64_t afsr0_el1;
+		uint64_t afsr1_el1;
+		uint64_t far_el1;
+		uint64_t mair_el1;
+		uint64_t vbar_el1;
+		uint64_t contextidr_el1;
+		uint64_t tpidr_el0;
+		uint64_t tpidrro_el0;
+		uint64_t tpidr_el1;
+		uint64_t amair_el1;
+		uint64_t cntkctl_el1;
+		uint64_t sp_el0;
+		uint64_t sp_el1;
+		uint64_t par_el1;
+		uint64_t hcr_el2;
+	} lazy;
+};
+
+struct arch_page_table {
+	alignas(4096) uint64_t first[512];
+	alignas(4096) uint64_t entry0[512];
+	alignas(4096) uint64_t entry1[512];
+};
+
+static inline struct cpu *cpu(void)
+{
+	struct cpu *p;
+	__asm volatile("mrs %0, tpidr_el2" : "=r"(p));
+	return p;
+}
+
+static inline void arch_irq_disable(void)
+{
+	__asm volatile("msr DAIFSet, #0xf");
+}
+
+static inline void arch_irq_enable(void)
+{
+	__asm volatile("msr DAIFClr, #0xf");
+}
+
+static inline
+void arch_regs_init(struct arch_regs *r, size_t pc, size_t arg)
+{
+	/* TODO: Use constant here. */
+	r->spsr = 5 |         /* M bits, set to EL1h. */
+		  (0xf << 6); /* DAIF bits set; disable interrupts. */
+	r->pc = pc;
+	r->r[0] = arg;
+	r->lazy.hcr_el2 = (1u << 31) |  /* RW bit. */
+//			  (7u << 3) |   /* AMO, IMO, FMO bits. */
+			  (3u << 13) |  /* TWI, TWE bits. */
+			  (1u << 2) |   /* PTW, Protected Table Walk. */
+			  (1u << 0);    /* VM: enable stage-2 translation. */
+}
+
+static inline void arch_regs_set_irq(struct arch_regs *r)
+{
+	/* Set the VI bit. */
+	r->lazy.hcr_el2 |= (1u << 7);
+}
+
+static inline void arch_regs_clear_irq(struct arch_regs *r)
+{
+	/* Clear the VI bit. */
+	r->lazy.hcr_el2 &= ~(1u << 7);
+}
+
+/* TODO: Figure out what to do with this. */
+int32_t smc(size_t arg0, size_t arg1, size_t arg2, size_t arg3);
+
+static inline void arch_cpu_on(size_t id, void *ctx)
+{
+	void cpu_entry(void *ctx);
+	int32_t ret;
+
+	/*
+	 * There's a race when turning a CPU on when it's in the process of
+	 * turning off. We need to loop here while it is reported that the CPU
+	 * is on (because it's about to turn itself off).
+	 */
+	do {
+		/* CPU_ON */
+		ret = smc(0xC4000003, id, (size_t)&cpu_entry, (size_t)ctx);
+	} while (ret == -4); /* ALREADY_ON */
+}
+
+static inline void arch_cpu_off(void)
+{
+	/* CPU_OFF */
+	smc(0xC4000002, 0, 0, 0);
+}
+
+static inline void arch_set_vm_mm(struct arch_page_table *table)
+{
+	__asm volatile("msr vttbr_el2, %0" : : "r" ((size_t)table));
+}
+
+void arch_vptable_init(struct arch_page_table *table);
+
+#endif  /* _ARCH_CPU_H */

diff --git a/src/arch/aarch64/inc/io.h b/src/arch/aarch64/inc/io.h
new file mode 100644
index 0000000..16f3112
--- /dev/null
+++ b/src/arch/aarch64/inc/io.h

@@ -0,0 +1,34 @@
+#ifndef _IO_H
+#define _IO_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "arch_barriers.h"
+
+static inline uint32_t io_read(size_t addr)
+{
+	return *(volatile uint32_t *)addr;
+}
+
+static inline uint32_t io_read_mb(size_t addr)
+{
+	uint32_t v = io_read(addr);
+	dsb();
+	isb();
+	return v;
+}
+
+static inline void io_write(size_t addr, uint32_t v)
+{
+	*(volatile uint32_t *)addr = v;
+}
+
+static inline void io_write_mb(size_t addr, uint32_t v)
+{
+	dsb();
+	isb();
+	io_write(addr, v);
+}
+
+#endif  /* _IO_H */

diff --git a/src/arch/aarch64/mm.c b/src/arch/aarch64/mm.c
new file mode 100644
index 0000000..b062f85
--- /dev/null
+++ b/src/arch/aarch64/mm.c

@@ -0,0 +1,333 @@
+#include "arch_cpu.h"
+#include "dlog.h" /* TODO: Remove? */
+#include "msr.h"
+
+void arch_vptable_init(struct arch_page_table *table)
+{
+	uint64_t i;
+	uint64_t v;
+
+	dlog("ID_AA64MMFR0_EL1=0x%x\n", read_msr(ID_AA64MMFR0_EL1));
+
+	/* TODO: Check each bit. */
+	for (i = 0; i < 512; i++) {
+		table->entry0[i] = 1 |
+			(i << 30) | /* Address */
+			(1 << 10) | /* Access flag. */
+			(0 << 8) | /* sh: non-shareable. this preserves EL1. */
+			(3 << 6) | /* rw */
+			(0xf << 2); /* normal mem; preserves EL0/1. */
+		table->entry1[i] = 1 |
+			((i+512) << 30) | /* Address */
+			(1 << 10) | /* Access flag. */
+			(0 << 8) | /* sh: non-shareable. this preserves EL1. */
+			(3 << 6) | /* rw */
+			(0xf << 2); /* normal mem; preserves EL0/1. */
+		table->first[i] = 0;
+	}
+
+	table->first[0] = (uint64_t)&table->entry0[0] | 3;
+	table->first[1] = (uint64_t)&table->entry1[0] | 3;
+
+	/* TODO: Where should this go? */
+	v =
+		(1u << 31) | /* RES1. */
+		(4 << 16) | /* PS: 44 bits. */
+		(0 << 14) | /* TG0: 4 KB granule. */
+		(3 << 12) | /* SH0: inner shareable. */
+		(1 << 10) | /* ORGN0: normal, cacheable ... */
+		(1 << 8) | /* IRGN0: normal, cacheable ... */
+		(2 << 6) | /* SL0: Start at level 0. */
+		(20 << 0); /* T0SZ: 44-bit input address size. */
+	write_msr(vtcr_el2, v);
+}
+
+#if 0
+#include "arch.h"
+
+#include <stdint.h>
+
+#include "alloc.h"
+#include "log.h"
+#include "msr.h"
+
+#define PAGE_BITS 12
+#define PAGE_SIZE (1 << PAGE_BITS)
+#define ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(uint64_t))
+#define INITIAL_LEVEL 1
+
+extern char text_begin[];
+extern char text_end[];
+extern char rodata_begin[];
+extern char rodata_end[];
+extern char data_begin[];
+extern char data_end[];
+extern char bin_end[];
+
+static uint64_t *ttbr;
+
+static inline size_t mm_entry_size(int level)
+{
+	return 1ull << (PAGE_BITS + (3 - level) * (PAGE_BITS - 3));
+}
+
+static inline size_t mm_level_end(size_t va, int level)
+{
+	size_t offset = (PAGE_BITS + (4 - level) * (PAGE_BITS - 3));
+	return ((va >> offset) + 1) << offset;
+}
+
+static inline size_t mm_index(size_t va, int level)
+{
+	size_t v = va >> (PAGE_BITS + (3 - level) * (PAGE_BITS - 3));
+	return v & ((1 << (PAGE_BITS - 3)) - 1);
+}
+
+static inline uint64_t mm_clear_attrs(uint64_t v)
+{
+	/* Clean bottom bits. */
+	v &= ~((1 << PAGE_BITS) - 1);
+
+	/* Clean top bits. */
+	v &= ((1ull << 59) - 1);
+
+	return v;
+}
+
+static inline uint64_t *mm_table_ptr(uint64_t pa)
+{
+	return (uint64_t *)mm_clear_attrs(pa);
+}
+
+static inline uint64_t mm_mode_to_attrs(uint64_t mode)
+{
+	uint64_t attrs =
+		(1 << 10) | /* Access flag. */
+		(2 << 8); /* sh -> outer shareable. */
+
+	/* TODO: This is different in s2. */
+	if (!(mode & MM_X)) {
+		attrs |= (1ull << 54); /* XN or UXN, [user] execute never. */
+
+		/* TODO: This is only ok in EL1, it is RES0 in EL2. */
+		attrs |= (1ull << 53); /* PXN, privileged execute never. */
+	}
+
+	/* TODO: This is different in s2. */
+	if (mode & MM_W)
+		attrs |= (0 << 6); /* rw, no EL0 access. */
+	else
+		attrs |= (2 << 6); /* read-only, no EL0 access. */
+
+	if (mode & MM_D)
+		attrs |= (0 << 2); /* device memory in MAIR_ELx. */
+	else
+		attrs |= (1 << 2); /* normal memory in MAIR_ELx. */
+
+	return attrs;
+}
+
+static uint64_t *mm_populate_table(uint64_t *table, uint64_t index)
+{
+	uint64_t *ntable;
+	uint64_t v = table[index];
+	uint64_t i;
+
+	/* Check if table entry already exists. */
+	if (v & 1) {
+		/* Fail if it's a block one. */
+		if (!(v & 2))
+			return NULL;
+		return mm_table_ptr(v);
+	}
+
+	/* Allocate a new table entry and initialize it. */
+	ntable = halloc_aligned(PAGE_SIZE, PAGE_SIZE);
+	if (!ntable)
+		return NULL;
+
+	for (i = 0; i < ENTRIES_PER_LEVEL; i++)
+		ntable[i] = 0;
+
+	/* Fill in the new entry. */
+	table[index] = (size_t)ntable | 0x3;
+
+	return ntable;
+}
+
+static bool mm_map_level(size_t va, size_t va_end, size_t pa,
+			 uint64_t attrs, uint64_t *table, int level)
+{
+	size_t i = mm_index(va, level);
+	size_t va_level_end = mm_level_end(va, level);
+	size_t entry_size = mm_entry_size(level);
+
+	/* Cap va_end so that we don't go over of the current level max. */
+	if (va_end > va_level_end)
+		va_end = va_level_end;
+
+	/* Fill each entry in the table. */
+	while (va < va_end) {
+		if (level == 3) {
+			table[i] = pa | 0x3 | attrs;
+		} else {
+			uint64_t *nt = mm_populate_table(table, i);
+			if (!nt) {
+				/* TODO: Undo all the work so far? */
+				return false;
+			}
+
+			if (!mm_map_level(va, va_end, pa, attrs, nt, level+1)) {
+				/* TODO: Undo all the work so far? */
+				return false;
+			}
+		}
+
+		va += entry_size;
+		pa += entry_size;
+		i++;
+	}
+
+	return true;
+}
+
+bool mm_map_range(size_t va, size_t size, uint64_t pa, uint64_t mode)
+{
+	uint64_t attrs = mm_mode_to_attrs(mode);
+	uint64_t end = mm_clear_attrs(va + size + PAGE_SIZE - 1);
+
+	va = mm_clear_attrs(va);
+	pa = mm_clear_attrs(pa);
+
+	return mm_map_level(va, end, pa, attrs, ttbr, INITIAL_LEVEL);
+}
+
+bool mm_map_page(size_t va, size_t pa, uint64_t mode)
+{
+	size_t i;
+	uint64_t attrs = mm_mode_to_attrs(mode);
+	uint64_t *table = ttbr;
+
+	va = mm_clear_attrs(va);
+	pa = mm_clear_attrs(pa);
+	for (i = INITIAL_LEVEL; i < 3; i++) {
+		table = mm_populate_table(table, mm_index(va, i));
+		if (!table)
+			return false;
+	}
+
+	/* We reached level 3. */
+	i = mm_index(va, 3);
+	table[i] = pa | 0x3 | attrs;
+	return true;
+}
+
+bool arch_init_mm(void)
+{
+#if 0
+	size_t i;
+
+	/* Allocate the first level, then zero it out. */
+	ttbr = halloc_aligned(PAGE_SIZE, PAGE_SIZE);
+	if (!ttbr)
+		return false;
+
+	for (i = 0; i < ENTRIES_PER_LEVEL; i++)
+		ttbr[i] = 0;
+
+	/* Map page for uart. */
+	mm_map_page(PL011_BASE, PL011_BASE, MM_R | MM_W | MM_D);
+
+	/* Map page for gic. */
+	mm_map_page(GICD_BASE, GICD_BASE, MM_R | MM_W | MM_D);
+	mm_map_page(GICC_BASE, GICC_BASE, MM_R | MM_W | MM_D);
+
+	/* Map each section. */
+	mm_map_range((size_t)text_begin, text_end - text_begin,
+		     (size_t)text_begin,  MM_X);
+
+	mm_map_range((size_t)rodata_begin, rodata_end - rodata_begin,
+		     (size_t)rodata_begin, MM_R);
+
+	mm_map_range((size_t)data_begin, data_end - data_begin,
+		     (size_t)data_begin, MM_R | MM_W);
+
+	mm_map_range((size_t)bin_end, 20 * 1024 * 1024, (size_t)bin_end,
+		     MM_R | MM_W);
+#endif
+	log(INFO, "About to enable mmu.\n");
+	enable_mmu(ttbr);
+	log(INFO, "mmu is on.\n");
+
+	return true;
+}
+
+static void arch_mm_dump_table(uint64_t *table, int level)
+{
+	uint64_t i, j;
+	for (i = 0; i < ENTRIES_PER_LEVEL; i++) {
+		if ((table[i] & 1) == 0)
+			continue;
+
+		for (j = 1 * (level - INITIAL_LEVEL + 1); j; j--)
+			log(INFO, "\t");
+		log(INFO, "%x: %x\n", i, table[i]);
+		if (level >= 3)
+			continue;
+
+		if ((table[i] & 3) == 3)
+			arch_mm_dump_table(mm_table_ptr(table[i]), level + 1);
+	}
+}
+
+void enable_mmu(uint64_t *table)
+{
+	//uint32_t v;
+
+	enable_s2();
+#if 0
+	/*
+	 * 0 -> Device-nGnRnE memory
+	 * 1 -> Normal memory, Inner/Outer Write-Back Non-transient,
+	 *      Write-Alloc, Read-Alloc.
+	 */
+	write_msr(mair_el2, 0xff00);
+	write_msr(ttbr0_el2, table);
+
+	/*
+	 * Configure tcr_el2.
+	 */
+	v =
+		(1 << 20) | /* TBI, top byte ignored. */
+		(2 << 16) | /* PS, Physical Address Size, 40 bits, 1TB. */
+		(0 << 14) | /* TG0, granule size, 4KB. */
+		(3 << 12) | /* SH0, inner shareable. */
+		(1 << 10) | /* ORGN0, normal mem, WB RA WA Cacheable. */
+		(1 << 8) |  /* IRGN0, normal mem, WB RA WA Cacheable. */
+		(25 << 0) | /* T0SZ, input address is 2^39 bytes. */
+		0;
+	write_msr(tcr_el2, v);
+
+	v =
+		(1 << 0) | /* M, enable stage 1 EL2 MMU. */
+		(1 << 1) | /* A, enable alignment check faults. */
+		// TODO: Enable this.
+//		(1 << 2) | /* C, data cache enable. */
+		(1 << 3) | /* SA, enable stack alignment check. */
+		(3 << 4) | /* RES1 bits. */
+		(1 << 11) | /* RES1 bit. */
+		(1 << 12) | /* I, instruction cache enable. */
+		(1 << 16) | /* RES1 bit. */
+		(1 << 18) | /* RES1 bit. */
+		(1 << 19) | /* WXN bit, writable execute never . */
+		(3 << 22) | /* RES1 bits. */
+		(3 << 28) | /* RES1 bits. */
+		0;
+
+	__asm volatile("dsb sy");
+	__asm volatile("isb");
+	write_msr(sctlr_el2, v);
+	__asm volatile("isb");
+#endif
+}
+#endif

diff --git a/src/arch/aarch64/msr.h b/src/arch/aarch64/msr.h
new file mode 100644
index 0000000..e242cc2
--- /dev/null
+++ b/src/arch/aarch64/msr.h

@@ -0,0 +1,19 @@
+#ifndef _MSR_H
+#define _MSR_H
+
+#include <stddef.h>
+
+#define read_msr(name) \
+	__extension__({ \
+		size_t __v; \
+		__asm volatile("mrs %0, " #name : "=r" (__v)); \
+		__v; \
+	})
+
+#define write_msr(name, value) \
+	do { \
+		__asm volatile("msr " #name ", %x0" \
+			       : : "rZ" ((size_t)value)); \
+	} while (0)
+
+#endif  /* _MSR_H */

diff --git a/src/arch/aarch64/offsets.c b/src/arch/aarch64/offsets.c
new file mode 100644
index 0000000..f16e242
--- /dev/null
+++ b/src/arch/aarch64/offsets.c

@@ -0,0 +1,10 @@
+#include "cpu.h"
+#include "decl_offsets.h"
+
+void dummy(void)
+{
+	DECL(CPU_CURRENT, struct cpu, current);
+	DECL(CPU_STACK_BOTTOM, struct cpu, stack_bottom);
+	DECL(VCPU_REGS, struct vcpu, regs);
+	DECL(VCPU_LAZY, struct vcpu, regs.lazy);
+}

diff --git a/src/arch/aarch64/pl011.c b/src/arch/aarch64/pl011.c
new file mode 100644
index 0000000..02df2eb
--- /dev/null
+++ b/src/arch/aarch64/pl011.c

@@ -0,0 +1,34 @@
+#include "dlog.h"
+#include "io.h"
+
+/* UART Data Register. */
+#define UARTDR 0
+
+/* UART Flag Register. */
+#define UARTFR 0x018
+
+/* UART Flag Register bit: transmit fifo is full. */
+#define UARTFR_TXFF (1 << 5)
+
+/* UART Flag Register bit: UART is busy. */
+#define UARTFR_BUSY (1 << 3)
+
+void arch_putchar(char c)
+{
+	/* Print a carriage-return as well. */
+	if (c == '\n')
+		arch_putchar('\r');
+
+	/* Wait until there is room in the tx buffer. */
+	while (io_read(PL011_BASE + UARTFR) & UARTFR_TXFF);
+
+	dmb();
+
+	/* Write the character out. */
+	io_write(PL011_BASE + UARTDR, c);
+
+	dmb();
+
+	/* Wait until the UART is no longer busy. */
+	while (io_read_mb(PL011_BASE + UARTFR) & UARTFR_BUSY);
+}

diff --git a/src/arch/aarch64/qemu.mk b/src/arch/aarch64/qemu.mk
new file mode 100644
index 0000000..c137ca8
--- /dev/null
+++ b/src/arch/aarch64/qemu.mk

@@ -0,0 +1,10 @@
+LOAD_ADDRESS := 0x40001000
+PL011_BASE := 0x09000000
+PL011 := 1
+GICV3 := 1
+
+GICD_BASE := 0x08000000
+GICC_BASE := 0x08010000
+GICR_BASE := 0x080A0000
+
+TIMER_IRQ := 26

diff --git a/src/arch/aarch64/rules.mk b/src/arch/aarch64/rules.mk
new file mode 100644
index 0000000..ea0d53d
--- /dev/null
+++ b/src/arch/aarch64/rules.mk

@@ -0,0 +1,19 @@
+SRCS += entry.S
+SRCS += exceptions.S
+SRCS += handler.c
+SRCS += mm.c
+SRCS += timer.c
+
+OFFSET_SRCS += offsets.c
+
+ifeq ($(GICV2),1)
+  SRCS += gicv2.c
+endif
+
+ifeq ($(GICV3),1)
+  SRCS += gicv3.c
+endif
+
+ifeq ($(PL011),1)
+  SRCS += pl011.c
+endif

diff --git a/src/arch/aarch64/timer.c b/src/arch/aarch64/timer.c
new file mode 100644
index 0000000..c4eaecd
--- /dev/null
+++ b/src/arch/aarch64/timer.c

@@ -0,0 +1,49 @@
+#include <stdint.h>
+
+#include "cpu.h"
+#include "irq.h"
+#include "msr.h"
+
+static bool timer_irq_handler(void *context, struct irq_handle *h)
+{
+	struct cpu *c = cpu();
+
+	/* Mask timer interrupt and dismiss current interrupt. */
+	write_msr(cnthp_ctl_el2, read_msr(cnthp_ctl_el2) | 0x2);
+	irq_dismiss(h);
+
+	/* Execute user-supplied callback. */
+	if (c->timer_cb)
+		return c->timer_cb(c->timer_context);
+
+	return false;
+}
+
+void timer_set(uint64_t time, bool (*cb)(void *), void *context)
+{
+	uint64_t v;
+	struct cpu *c = cpu();
+
+	/* Save callback. */
+	c->timer_cb = cb;
+	c->timer_context = context;
+
+	/* TODO: There's a better way to advance this. */
+	v = read_msr(cntpct_el0);
+	write_msr(CNTHP_CVAL_EL2, v + time);
+	write_msr(cnthp_ctl_el2, 1); /* enable. */
+}
+
+void timer_init(void)
+{
+	irq_config(TIMER_IRQ, irq_trigger_level, irq_polarity_active_high,
+		   timer_irq_handler, NULL);
+}
+
+void timer_init_percpu(void)
+{
+	/* Mask timer interrupt for now. */
+	write_msr(cnthp_ctl_el2, read_msr(cnthp_ctl_el2) | 0x2);
+
+	irq_enable(TIMER_IRQ);
+}

diff --git a/src/cpio.c b/src/cpio.c
new file mode 100644
index 0000000..c4add22
--- /dev/null
+++ b/src/cpio.c

@@ -0,0 +1,77 @@
+#include "cpio.h"
+
+#include <stdint.h>
+
+#include "std.h"
+
+#pragma pack(push, 1)
+struct cpio_header {
+	uint16_t magic;
+	uint16_t dev;
+	uint16_t ino;
+	uint16_t mode;
+	uint16_t uid;
+	uint16_t gid;
+	uint16_t nlink;
+	uint16_t rdev;
+	uint16_t mtime[2];
+	uint16_t namesize;
+	uint16_t filesize[2];
+};
+#pragma pack(pop)
+
+void cpio_init(struct cpio *c, const void *buf, size_t size)
+{
+	c->first = buf;
+	c->total_size = size;
+}
+
+void cpio_init_iter(struct cpio *c, struct cpio_iter *iter)
+{
+	iter->cur = c->first;
+	iter->size_left = c->total_size;
+}
+
+bool cpio_next(struct cpio_iter *iter, const char **name,
+	       const void **contents, size_t *size)
+{
+	const struct cpio_header *h = iter->cur;
+	size_t size_left;
+	size_t filelen;
+	size_t namelen;
+
+	size_left = iter->size_left;
+	if (size_left < sizeof(struct cpio_header))
+		return false;
+
+	/* TODO: Check magic. */
+
+	size_left -= sizeof(struct cpio_header);
+	namelen = (h->namesize + 1) & ~1;
+	if (size_left < namelen)
+		return false;
+
+	size_left -= namelen;
+	filelen = (size_t)h->filesize[0] << 16 | h->filesize[1];
+	if (size_left < filelen)
+		return false;
+
+	/* TODO: Check that string is null-terminated. */
+	/* TODO: Check that trailler is not returned. */
+
+	/* Stop enumerating files when we hit the end marker. */
+	if (!strcmp((const char *)(iter->cur + 1), "TRAILER!!!"))
+		return false;
+
+	size_left -= filelen;
+
+	*name = (const char *)(iter->cur + 1);
+	*contents = *name + namelen;
+	*size = filelen;
+
+	iter->cur = (struct cpio_header *)((char *)*contents + filelen);
+	iter->cur = (struct cpio_header *)(char *)(((size_t)iter->cur + 1) & ~1);
+	iter->size_left = size_left;
+
+	return true;
+}

diff --git a/src/cpu.c b/src/cpu.c
new file mode 100644
index 0000000..85646c6
--- /dev/null
+++ b/src/cpu.c

@@ -0,0 +1,161 @@
+#include "cpu.h"
+
+#include "arch_cpu.h"
+#include "dlog.h"
+#include "std.h"
+#include "timer.h"
+#include "vm.h"
+
+struct new_old_vcpu {
+	struct vcpu *new;
+	struct vcpu *old;
+};
+
+void cpu_init(struct cpu *c)
+{
+	/* TODO: Assumes that c is zeroed out already. */
+	sl_init(&c->lock);
+	list_init(&c->ready_queue);
+	c->irq_disable_count = 1;
+}
+
+void cpu_irq_enable(struct cpu *c)
+{
+	c->irq_disable_count--;
+	if (!c->irq_disable_count)
+		arch_irq_enable();
+}
+
+void cpu_irq_disable(struct cpu *c)
+{
+	if (!c->irq_disable_count)
+		arch_irq_disable();
+	c->irq_disable_count++;
+}
+
+void cpu_on(struct cpu *c)
+{
+	sl_lock(&c->lock);
+	if (!c->cpu_on_count) {
+		/* The CPU is currently off, we need to turn it on. */
+		arch_cpu_on(c->id, c);
+	}
+	c->cpu_on_count++;
+	sl_unlock(&c->lock);
+}
+
+/*
+ * This must be called only from the same CPU.
+ */
+void cpu_off(struct cpu *c)
+{
+	bool on;
+
+	sl_lock(&c->lock);
+	c->cpu_on_count--;
+	on = c->cpu_on_count > 0;
+	sl_unlock(&c->lock);
+
+	if (!on)
+		arch_cpu_off();
+}
+
+void vcpu_ready(struct vcpu *v)
+{
+	struct cpu *c = v->cpu;
+
+	sl_lock(&c->lock);
+	if (!v->is_runnable) {
+		v->is_runnable = true;
+		list_append(&c->ready_queue, &v->links);
+		/* TODO: Send IPI to cpu if needed. */
+	}
+	sl_unlock(&c->lock);
+}
+
+void vcpu_unready(struct vcpu *v)
+{
+	struct cpu *c = v->cpu;
+
+	sl_lock(&c->lock);
+	if (v->is_runnable) {
+		v->is_runnable = false;
+		list_remove(&v->links);
+	}
+	sl_unlock(&c->lock);
+}
+
+#if 0
+static bool cpu_schedule_next(void *ctx)
+{
+	/* Indicate that a new vcpu should be chosen. */
+	return true;
+}
+#endif
+
+struct new_old_vcpu cpu_next_vcpu(void)
+{
+	struct cpu *c = cpu();
+	struct new_old_vcpu ret;
+	struct vcpu *next;
+	bool switch_mm;
+
+	/* TODO: Check if too soon. */
+
+	sl_lock(&c->lock);
+
+	ret.old = c->current;
+	if (list_empty(&c->ready_queue)) {
+		bool first = true;
+		c->current = NULL;
+		do {
+			sl_unlock(&c->lock);
+			/* TODO: Implement this. Enable irqs. */
+			if (first) {
+				dlog("CPU%d waiting for work...\n", c->id);
+				first = false;
+			}
+			sl_lock(&c->lock);
+		} while (list_empty(&c->ready_queue));
+		dlog("CPU%d found work!\n", c->id);
+	}
+
+	next = LIST_ELEM(c->ready_queue.next, struct vcpu, links);
+	if (next->links.next != &c->ready_queue) {
+		/* Move new vcpu to the end of ready queue. */
+		list_remove(&next->links);
+		list_append(&c->ready_queue, &next->links);
+	}
+
+	c->current = next;
+
+	if (next->interrupt) {
+		arch_regs_set_irq(&next->regs);
+		next->interrupt = false;
+	} else {
+		arch_regs_clear_irq(&next->regs);
+	}
+
+	switch_mm = !ret.old || ret.old->vm != next->vm;
+
+	sl_unlock(&c->lock);
+
+	ret.new = next;
+
+	if (switch_mm)
+		arch_set_vm_mm(&next->vm->page_table);
+
+	/* TODO: Only set this when there is a next thing to run. */
+	/* Set timer again. */
+	//timer_set(5 * 1000000, cpu_schedule_next, NULL);
+
+	return ret;
+}
+
+void vcpu_init(struct vcpu *vcpu, struct cpu *cpu, struct vm *vm)
+{
+	memset(vcpu, 0, sizeof(*vcpu));
+	vcpu->cpu = cpu;
+	vcpu->vm = vm;
+	/* TODO: Initialize vmid register. */
+}

diff --git a/src/dlog.c b/src/dlog.c
new file mode 100644
index 0000000..c4d49ce
--- /dev/null
+++ b/src/dlog.c

@@ -0,0 +1,272 @@
+#include "dlog.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdarg.h>
+
+#include "arch.h"
+#include "spinlock.h"
+#include "std.h"
+
+#define FLAG_SPACE 0x01
+#define FLAG_ZERO  0x02
+#define FLAG_MINUS 0x04
+#define FLAG_PLUS  0x08
+#define FLAG_ALT   0x10
+#define FLAG_UPPER 0x20
+#define FLAG_NEG   0x40
+
+/*
+ * Prints a raw string to the debug log and returns its length.
+ */
+static size_t print_raw_string(const char *str)
+{
+	const char *c = str;
+	while (*c != '\0')
+		arch_putchar(*c++);
+	return c - str;
+}
+
+/*
+ * Prints a formatted string to the debug log. The format includes a minimum
+ * width, the fill character, and flags (whether to align to left or right).
+ *
+ * str is the full string, while suffix is a pointer within str that indicates
+ * where the suffix begins. This is used when printing right-aligned numbers
+ * with a zero fill; for example, -10 with width 4 should be padded to -010,
+ * so suffix would point to index one of the "-10" string .
+ */
+static void print_string(const char *str, const char *suffix, size_t width,
+			 int flags, char fill)
+{
+	size_t len = suffix - str;
+
+	/* Print the string up to the beginning of the suffix. */
+	while (str != suffix)
+		arch_putchar(*str++);
+
+	if (flags & FLAG_MINUS) {
+		/* Left-aligned. Print suffix, then print padding if needed. */
+		len += print_raw_string(suffix);
+		while (len < width) {
+			arch_putchar(' ');
+			len++;
+		}
+		return;
+	}
+
+	/* Fill until we reach the desired length. */
+	len += strlen(suffix);
+	while (len < width) {
+		arch_putchar(fill);
+		len++;
+	}
+
+	/* Now print the rest of the string. */
+	print_raw_string(suffix);
+}
+
+/*
+ * Prints a number to the debug log. The caller specifies the base, its minimum
+ * width and printf-style flags.
+ */
+static void print_num(size_t v, size_t base, size_t width, int flags)
+{
+	static const char *digits_lower = "0123456789abcdefx";
+	static const char *digits_upper = "0123456789ABCDEFX";
+	const char *d = (flags & FLAG_UPPER) ? digits_upper : digits_lower;
+	char buf[51];
+	char *ptr = buf + sizeof(buf) - 1;
+	char *num;
+	*ptr = '\0';
+	do {
+		--ptr;
+		*ptr = d[v % base];
+		v /= base;
+	} while (v);
+
+	/* Num stores where the actual number begins. */
+	num = ptr;
+
+	/* Add prefix if requested. */
+	if (flags & FLAG_ALT) {
+		switch (base) {
+		case 16:
+			ptr -= 2;
+			ptr[0] = '0';
+			ptr[1] = d[16];
+			break;
+
+		case 8:
+			ptr--;
+			*ptr = '0';
+			break;
+		}
+	}
+
+	/* Add sign if requested. */
+	if (flags & FLAG_NEG)
+		*--ptr = '-';
+	else if (flags & FLAG_PLUS)
+		*--ptr = '+';
+	else if (flags & FLAG_SPACE)
+		*--ptr = ' ';
+
+	if (flags & FLAG_ZERO)
+		print_string(ptr, num, width, flags, '0');
+	else
+		print_string(ptr, ptr, width, flags, ' ');
+}
+
+/*
+ * Parses the optional flags field of a printf-style format. It returns the spot
+ * on the string where a non-flag character was found.
+ */
+static const char *parse_flags(const char *p, int *flags)
+{
+	for (;;) {
+		switch (*p) {
+		case ' ':
+			*flags |= FLAG_SPACE;
+			break;
+
+		case '0':
+			*flags |= FLAG_ZERO;
+			break;
+
+		case '-':
+			*flags |= FLAG_MINUS;
+			break;
+
+		case '+':
+			*flags |= FLAG_PLUS;
+
+		case '#':
+			*flags |= FLAG_ALT;
+			break;
+
+		default:
+			return p;
+		}
+		p++;
+	}
+}
+
+/*
+ * Prints the given format string to the debug log.
+ */
+void dlog(const char *str, ...)
+{
+	static struct spinlock sl = SPINLOCK_INIT;
+	const char *p;
+	va_list args;
+	size_t w;
+	int flags;
+	char buf[2];
+
+	va_start(args, str);
+
+	sl_lock(&sl);
+
+	for (p = str; *p; p++) {
+		switch (*p) {
+		default:
+			arch_putchar(*p);
+			break;
+
+		case '%':
+			/* Read optional flags. */
+			flags = 0;
+			p = parse_flags(p + 1, &flags) - 1;
+
+			/* Read the minimum width, if one is specified. */
+			w = 0;
+			while (p[1] >= '0' && p[1] <= '9') {
+				w = (w * 10) + (p[1] - '0');
+				p++;
+			}
+
+			/* Read minimum width from arguments. */
+			if (w == 0 && p[1] == '*') {
+				int v = va_arg(args, int);
+				if (v >= 0) {
+					w = v;
+				} else {
+					w = -v;
+					flags |= FLAG_MINUS;
+				}
+				p++;
+			}
+
+			/* Handle the format specifier. */
+			switch (p[1]) {
+			case 's':
+				{
+					char *str = va_arg(args, char *);
+					print_string(str, str, w, flags, ' ');
+				}
+				p++;
+				break;
+
+			case 'd':
+			case 'i':
+				{
+					int v = va_arg(args, int);
+					if (v < 0) {
+						flags |= FLAG_NEG;
+						v = -v;
+					}
+
+					print_num((size_t)v, 10, w, flags);
+				}
+				p++;
+				break;
+
+			case 'X':
+				flags |= FLAG_UPPER;
+				print_num(va_arg(args, size_t), 16, w, flags);
+				break;
+
+			case 'p':
+				print_num(va_arg(args, size_t), 16,
+					  sizeof(size_t) * 2, FLAG_ZERO);
+				p++;
+				break;
+
+			case 'x':
+				print_num(va_arg(args, size_t), 16, w, flags);
+				p++;
+				break;
+
+			case 'u':
+				print_num(va_arg(args, size_t), 10, w, flags);
+				p++;
+				break;
+
+			case 'o':
+				print_num(va_arg(args, size_t), 8, w, flags);
+				p++;
+				break;
+
+			case 'c':
+				buf[1] = 0;
+				buf[0] = va_arg(args, int);
+				print_string(buf, buf, w, flags, ' ');
+				p++;
+				break;
+
+			case '%':
+				break;
+
+			default:
+				arch_putchar('%');
+			}
+
+			break;
+		}
+	}
+
+	sl_unlock(&sl);
+
+	va_end(args);
+}

diff --git a/src/fdt.c b/src/fdt.c
new file mode 100644
index 0000000..035ec64
--- /dev/null
+++ b/src/fdt.c

@@ -0,0 +1,327 @@
+#include "fdt.h"
+
+#include <stdint.h>
+
+#include "dlog.h"
+#include "std.h"
+
+struct fdt_header {
+	uint32_t magic;
+	uint32_t totalsize;
+	uint32_t off_dt_struct;
+	uint32_t off_dt_strings;
+	uint32_t off_mem_rsvmap;
+	uint32_t version;
+	uint32_t last_comp_version;
+	uint32_t boot_cpuid_phys;
+	uint32_t size_dt_strings;
+	uint32_t size_dt_struct;
+};
+
+struct fdt_reserve_entry {
+	uint64_t address;
+	uint64_t size;
+};
+
+enum fdt_token {
+	FDT_BEGIN_NODE = 1,
+	FDT_END_NODE = 2,
+	FDT_PROP = 3,
+	FDT_NOP = 4,
+	FDT_END = 9,
+};
+
+struct fdt_tokenizer {
+	const char *cur;
+	const char *end;
+	const char *strs;
+};
+
+#define FDT_VERSION 17
+#define FDT_MAGIC 0xd00dfeed
+
+static void fdt_tokenizer_init(struct fdt_tokenizer *t, const char *strs,
+			       const char *begin, const char *end)
+{
+	t->strs = strs;
+	t->cur = begin;
+	t->end = end;
+}
+
+static void fdt_tokenizer_align(struct fdt_tokenizer *t)
+{
+	t->cur = (char *)(((size_t)t->cur + 3) & ~3);
+}
+
+static bool fdt_tokenizer_uint32(struct fdt_tokenizer *t, uint32_t *res)
+{
+	const char *next = t->cur + sizeof(*res);
+	if (next > t->end)
+		return false;
+
+	*res = ntohl(*(uint32_t *)t->cur);
+	t->cur = next;
+
+	return true;
+}
+
+static bool fdt_tokenizer_token(struct fdt_tokenizer *t, uint32_t *res)
+{
+	uint32_t v;
+
+	while (fdt_tokenizer_uint32(t, &v)) {
+		if (v != FDT_NOP) {
+			*res = v;
+			return true;
+		}
+	}
+	return false;
+}
+
+static bool fdt_tokenizer_bytes(struct fdt_tokenizer *t,
+				const char **res, size_t size)
+{
+	const char *next = t->cur + size;
+	if (next > t->end)
+		return false;
+
+	*res = t->cur;
+	t->cur = next;
+	fdt_tokenizer_align(t);
+
+	return true;
+}
+
+static bool fdt_tokenizer_str(struct fdt_tokenizer *t, const char **res)
+{
+	const char *p;
+	for (p = t->cur; p < t->end; p++) {
+		if (!*p) {
+			/* Found the end of the string. */
+			*res = t->cur;
+			t->cur = p + 1;
+			fdt_tokenizer_align(t);
+			return true;
+		}
+	}
+
+	return false;
+}
+
+void fdt_root_node(struct fdt_node *node, const struct fdt_header *hdr)
+{
+	uint32_t max_ver;
+	uint32_t min_ver;
+	uint32_t begin = ntohl(hdr->off_dt_struct);
+	uint32_t size = ntohl(hdr->size_dt_struct);
+
+	memset(node, 0, sizeof(*node));
+
+	/* Check the magic number before anything else. */
+	if (hdr->magic != ntohl(FDT_MAGIC))
+		return;
+
+	/* Check the version. */
+	max_ver = ntohl(hdr->version);
+	min_ver = ntohl(hdr->last_comp_version);
+	if (FDT_VERSION < min_ver || FDT_VERSION > max_ver)
+		return;
+
+	/* TODO: Verify that it is all within the fdt. */
+	node->begin = (const char *)hdr + begin;
+	node->end = node->begin + size;
+
+	/* TODO: Verify strings as well. */
+	node->strs = (char *)hdr + ntohl(hdr->off_dt_strings);
+}
+
+static bool fdt_next_property(struct fdt_tokenizer *t, const char **name,
+			      const char **buf, uint32_t *size)
+{
+	uint32_t token;
+	uint32_t nameoff;
+
+	if (!fdt_tokenizer_token(t, &token))
+		return false;
+
+	if (token != FDT_PROP) {
+		/* Rewind so that caller will get the same token. */
+		t->cur -= sizeof(uint32_t);
+		return false;
+	}
+
+	if (!fdt_tokenizer_uint32(t, size) ||
+	    !fdt_tokenizer_uint32(t, &nameoff) ||
+	    !fdt_tokenizer_bytes(t, buf, *size)) {
+		/*
+		 * Move cursor to the end so that caller won't get any new
+		 * tokens.
+		 */
+		t->cur = t->end;
+		return false;
+	}
+
+	/* TODO: Need to verify the strings. */
+	*name = t->strs + nameoff;
+
+	return true;
+}
+
+static bool fdt_next_subnode(struct fdt_tokenizer *t, const char **name)
+{
+	uint32_t token;
+
+	if (!fdt_tokenizer_token(t, &token))
+		return false;
+
+	if (token != FDT_BEGIN_NODE) {
+		/* Rewind so that caller will get the same token. */
+		t->cur -= sizeof(uint32_t);
+		return false;
+	}
+
+	if (!fdt_tokenizer_str(t, name)) {
+		/*
+		 * Move cursor to the end so that caller won't get any new
+		 * tokens.
+		 */
+		t->cur = t->end;
+		return false;
+	}
+
+	return true;
+}
+
+static void fdt_skip_properties(struct fdt_tokenizer *t)
+{
+	const char *name;
+	const char *buf;
+	uint32_t size;
+	while (fdt_next_property(t, &name, &buf, &size));
+}
+
+static bool fdt_skip_node(struct fdt_tokenizer *t)
+{
+	const char *name;
+	uint32_t token;
+	size_t pending = 1;
+
+	fdt_skip_properties(t);
+
+	do {
+		while (fdt_next_subnode(t, &name)) {
+			fdt_skip_properties(t);
+			pending++;
+		}
+
+		if (!fdt_tokenizer_token(t, &token))
+			return false;
+
+		if (token != FDT_END_NODE) {
+			t->cur = t->end;
+			return false;
+		}
+
+		pending--;
+	} while (pending);
+
+	return true;
+}
+
+bool fdt_read_property(const struct fdt_node *node, const char *name,
+		       const char **buf, uint32_t *size)
+{
+	struct fdt_tokenizer t;
+	const char *prop_name;
+
+	fdt_tokenizer_init(&t, node->strs, node->begin, node->end);
+
+	while (fdt_next_property(&t, &prop_name, buf, size)) {
+		if (!strcmp(prop_name, name))
+			return true;
+	}
+
+	return false;
+}
+
+bool fdt_find_child(struct fdt_node *node, const char *child)
+{
+	struct fdt_tokenizer t;
+	const char *name;
+
+	fdt_tokenizer_init(&t, node->strs, node->begin, node->end);
+
+	fdt_skip_properties(&t);
+
+	while (fdt_next_subnode(&t, &name)) {
+		if (!strcmp(name, child)) {
+			node->begin = t.cur;
+			return true;
+		}
+
+		fdt_skip_node(&t);
+	}
+
+	return false;
+}
+
+void fdt_dump(struct fdt_header *hdr)
+{
+	uint32_t token;
+	size_t depth = 0;
+	const char *name;
+	struct fdt_tokenizer t;
+	struct fdt_node node;
+
+	/* Traverse the whole thing. */
+	fdt_root_node(&node, hdr);
+
+	fdt_tokenizer_init(&t, node.strs, node.begin, node.end);
+
+	do {
+		while (fdt_next_subnode(&t, &name)) {
+			const char *buf;
+			uint32_t size;
+
+			dlog("%*sNew node: \"%s\"\n", 2 * depth, "", name);
+			depth++;
+			while (fdt_next_property(&t, &name, &buf, &size)) {
+				size_t i;
+				dlog("%*sproperty: \"%s\" (", 2 * depth, "", name);
+				for (i = 0; i < size; i++)
+					dlog("%s%02x", i == 0 ? "" : " ", buf[i]);
+				dlog(")\n");
+			}
+		}
+
+		if (!fdt_tokenizer_token(&t, &token))
+			return;
+
+		if (token != FDT_END_NODE)
+			return;
+
+		depth--;
+	} while (depth);
+
+	dlog("fdt: off_mem_rsvmap=%u\n", ntohl(hdr->off_mem_rsvmap));
+	{
+		struct fdt_reserve_entry *e = (struct fdt_reserve_entry *)((size_t)hdr + ntohl(hdr->off_mem_rsvmap));
+		while (e->address || e->size) {
+			dlog("Entry: %p (0x%x bytes)\n", ntohll(e->address), ntohll(e->size));
+			e++;
+		}
+	}
+}
+
+void fdt_add_mem_reservation(struct fdt_header *hdr, size_t addr, size_t len)
+{
+	/* TODO: Clean this up. */
+	char *begin = (char *)hdr + ntohl(hdr->off_mem_rsvmap);
+	struct fdt_reserve_entry *e = (struct fdt_reserve_entry *)begin;
+	hdr->totalsize = htonl(ntohl(hdr->totalsize) + sizeof(struct fdt_reserve_entry));
+	hdr->off_dt_struct = htonl(ntohl(hdr->off_dt_struct) + sizeof(struct fdt_reserve_entry));
+	hdr->off_dt_strings = htonl(ntohl(hdr->off_dt_strings) + sizeof(struct fdt_reserve_entry));
+	memmove(begin + sizeof(struct fdt_reserve_entry), begin, ntohl(hdr->totalsize) - ntohl(hdr->off_mem_rsvmap));
+	e->address = htonll(addr);
+	e->size = htonll(len);
+}

diff --git a/src/hafnium.ld b/src/hafnium.ld
new file mode 100644
index 0000000..9949f03
--- /dev/null
+++ b/src/hafnium.ld

@@ -0,0 +1,41 @@
+ENTRY(entry)
+SECTIONS
+{
+	. = PREFERRED_LOAD_ADDRESS;
+	_orig_base = ABSOLUTE(.);
+
+	text_begin = .;
+	.init : {
+		*(.init.entry)
+		*(.init)
+	}
+	.text : { *(.text) }
+	text_end = .;
+
+	. = ALIGN(4096);
+	rodata_begin = .;
+	.rodata : { *(.rodata) }
+	.rela : ALIGN(8) {
+		rela_begin = .;
+		*(.rela .rela*)
+		rela_end = .;
+	}
+	rodata_end = .;
+
+	. = ALIGN(4096);
+	data_begin = .;
+	.data : { *(.data) }
+
+	/* The entry point code assumes that bss is 16-byte aligned. */
+	.bss ALIGN(16) : {
+		file_size = ABSOLUTE(. - PREFERRED_LOAD_ADDRESS);
+		bss_begin = .;
+		*(.bss COMMON)
+		. = ALIGN(16);
+		bss_end = .;
+	}
+	data_end = .;
+
+	. = ALIGN(4096);
+	bin_end = .;
+}

diff --git a/src/irq.c b/src/irq.c
new file mode 100644
index 0000000..d751c56
--- /dev/null
+++ b/src/irq.c

@@ -0,0 +1,41 @@
+#include "irq.h"
+
+#include "arch.h"
+
+struct irq_config {
+	void *cb_context;
+	bool (*cb)(void *context, struct irq_handle *);
+};
+
+/*
+ * TODO: Move this to write-once page so that we know it won't change in the
+ * future.
+ */
+static struct irq_config irq_configs[300];
+
+void irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p,
+		bool (*cb)(void *, struct irq_handle *), void *context)
+{
+	struct irq_config *cfg = irq_configs + num;
+
+	cfg->cb = cb;
+	cfg->cb_context = context;
+
+	arch_irq_config(num, t, p);
+}
+
+bool irq_handle(uint32_t num, struct irq_handle *h)
+{
+	struct irq_config *cfg = irq_configs + num;
+
+	return cfg->cb(cfg->cb_context, h);
+}
+
+void irq_init(void)
+{
+}
+
+void irq_init_percpu(void)
+{
+	arch_irq_init_percpu();
+}

diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..346c798
--- /dev/null
+++ b/src/main.c

@@ -0,0 +1,230 @@
+#include <stdalign.h>
+#include <stdatomic.h>
+#include <stddef.h>
+
+#include "cpio.h"
+#include "cpu.h"
+#include "dlog.h"
+#include "fdt.h"
+#include "irq.h"
+#include "std.h"
+#include "timer.h"
+#include "vm.h"
+
+void *fdt;
+
+/* The stack to be used by the CPUs. */
+alignas(2 * sizeof(size_t)) char callstacks[STACK_SIZE * MAX_CPUS];
+
+/* State of all supported CPUs. The stack of the first one is initialized. */
+struct cpu cpus[MAX_CPUS] = {
+	{
+		.cpu_on_count = 1,
+		.stack_bottom = callstacks + STACK_SIZE,
+	},
+};
+
+bool fdt_find_node(struct fdt_node *node, const char *path)
+{
+	if (!fdt_find_child(node, ""))
+		return false;
+
+	while (*path) {
+		if (!fdt_find_child(node, path))
+			return false;
+		path += strlen(path);
+	}
+
+	return true;
+}
+
+bool fdt_read_number(struct fdt_node *node, const char *name, uint64_t *value)
+{
+	const char *data;
+	uint32_t size;
+	union {
+		volatile uint64_t v;
+		char a[8];
+	} t;
+
+	if (!fdt_read_property(node, name, &data, &size))
+		return false;
+
+	switch (size) {
+	case sizeof(uint32_t):
+		*value = ntohl(*(uint32_t *)data);
+		break;
+
+	case sizeof(uint64_t):
+		memcpy(t.a, data, sizeof(uint64_t));
+		*value = ntohll(t.v);
+		break;
+
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+bool fdt_write_number(struct fdt_node *node, const char *name, uint64_t value)
+{
+	const char *data;
+	uint32_t size;
+	union {
+		volatile uint64_t v;
+		char a[8];
+	} t;
+
+	if (!fdt_read_property(node, name, &data, &size))
+		return false;
+
+	switch (size) {
+	case sizeof(uint32_t):
+		*(uint32_t *)data = ntohl(value);
+		break;
+
+	case sizeof(uint64_t):
+		t.v = ntohll(value);
+		memcpy((void *)data, t.a, sizeof(uint64_t));
+		break;
+
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+static void relocate(const char *from, size_t size)
+{
+	extern char bin_end[];
+	size_t tmp = (size_t)&bin_end[0];
+	char *dest = (char *)((tmp + 0x80000 - 1) & ~(0x80000 - 1));
+	dlog("bin_end is at %p, copying to %p\n", &bin_end[0], dest);
+	memcpy(dest, from, size);
+}
+
+/* TODO: Remove this. */
+struct vm vm0;
+
+static void one_time_init(void)
+{
+	size_t i;
+
+	dlog("Initializing hafnium\n");
+
+	/*
+	 * TODO: Re-enable this.
+	irq_init();
+	timer_init();
+	*/
+
+	/* Initialize all CPUs. */
+	for (i = 0; i < MAX_CPUS; i++) {
+		struct cpu *c = cpus + i;
+		cpu_init(c);
+		c->id = i; /* TODO: Initialize ID. */
+		c->stack_bottom = callstacks + STACK_SIZE * (i + 1);
+	}
+
+	/* TODO: Code below this point should be removed from this function. */
+	/* TODO: Remove this. */
+
+	do {
+		struct fdt_node n;
+
+		fdt_root_node(&n, fdt);
+		if (!fdt_find_node(&n, "chosen\0")) {
+			dlog("Unable to find 'chosen'\n");
+			break;
+		}
+
+		uint64_t begin;
+		uint64_t end;
+
+		if (!fdt_read_number(&n, "linux,initrd-start", &begin)) {
+			dlog("Unable to read linux,initrd-start\n");
+			break;
+		}
+
+		if (!fdt_read_number(&n, "linux,initrd-end", &end)) {
+			dlog("Unable to read linux,initrd-end\n");
+			break;
+		}
+
+		dlog("Ramdisk: from %x to %x\n", begin, end);
+
+		struct cpio c;
+		struct cpio_iter iter;
+		cpio_init(&c, (void *)begin, end - begin);
+		cpio_init_iter(&c, &iter);
+
+		const char *name;
+		const void *fcontents;
+		size_t ramdisk = 0;
+		size_t ramdisk_end = 0;
+		size_t fsize;
+		while (cpio_next(&iter, &name, &fcontents, &fsize)) {
+			dlog("File: %s, size=%u\n", name, fsize);
+			if (!strcmp(name, "vm/vmlinuz")) {
+				relocate(fcontents, fsize);
+				continue;
+			}
+
+			if (!strcmp(name, "vm/initrd.img")) {
+				dlog("Found vm/ramdisk @ %p, %u bytes\n", fcontents, fsize);
+				ramdisk = (size_t)fcontents;
+				ramdisk_end = ramdisk + fsize;
+				continue;
+			}
+		}
+
+		dlog("Ramdisk; %p\n", ramdisk);
+
+		/* Patch FDT to point to new ramdisk. */
+		if (!fdt_write_number(&n, "linux,initrd-start", ramdisk)) {
+			dlog("Unable to write linux,initrd-start\n");
+			break;
+		}
+
+		if (!fdt_write_number(&n, "linux,initrd-end", ramdisk_end)) {
+			dlog("Unable to write linux,initrd-end\n");
+			break;
+		}
+
+		/*
+		 * Patch fdt to point remove memory.
+		 */
+		{
+			size_t tmp = (size_t)&relocate;
+			tmp = (tmp + 0x80000 - 1) & ~(0x80000 - 1);
+
+
+			fdt_add_mem_reservation(fdt, tmp & ~0xfffff, 0x80000);
+			vm_init(&vm0, cpus);
+			vm_start_vcpu(&vm0, 0, tmp, (size_t)fdt);
+		}
+	} while (0);
+}
+
+/*
+ * The entry point of CPUs when they are turned on. It is supposed to initialise
+ * all state and return; the caller will ensure that the next vcpu runs.
+ */
+void cpu_main(void)
+{
+	/* Do global one-time initialization just once. */
+	static atomic_flag inited = ATOMIC_FLAG_INIT;
+	if (!atomic_flag_test_and_set_explicit(&inited, memory_order_acq_rel))
+		one_time_init();
+
+	dlog("Starting up cpu %d\n", cpu() - cpus);
+
+	/* Do per-cpu initialization. */
+	/* TODO: What to do here? */
+	/*
+	irq_init_percpu();
+	timer_init_percpu();
+	*/
+}

diff --git a/src/rules.mk b/src/rules.mk
new file mode 100644
index 0000000..1bed7fa
--- /dev/null
+++ b/src/rules.mk

@@ -0,0 +1,12 @@
+SRCS += alloc.c
+SRCS += cpio.c
+SRCS += cpu.c
+SRCS += fdt.c
+SRCS += irq.c
+SRCS += main.c
+SRCS += std.c
+SRCS += vm.c
+
+ifeq ($(DEBUG),1)
+  SRCS += dlog.c
+endif

diff --git a/src/std.c b/src/std.c
new file mode 100644
index 0000000..39ba972
--- /dev/null
+++ b/src/std.c

@@ -0,0 +1,84 @@
+#include "std.h"
+
+void *memset(void *s, int c, size_t n)
+{
+	char *p = (char *)s;
+	while (n--)
+		*p++ = c;
+	return s;
+}
+
+/*
+ * Calculates the length of the provided null-terminated string.
+ */
+size_t strlen(const char *str)
+{
+	const char *p = str;
+	while (*p)
+		p++;
+	return p - str;
+}
+
+void *memcpy(void *dst, const void *src, size_t n)
+{
+	char *x = dst;
+	const char *y = src;
+
+	while (n--) {
+		*x = *y;
+		x++;
+		y++;
+	}
+
+	return dst;
+}
+
+void *memmove(void *dst, const void *src, size_t n)
+{
+	char *x;
+	const char *y;
+
+	if (dst < src)
+		return memcpy(dst, src, n);
+
+	x = (char *)dst + n - 1;
+	y = (const char *)src + n - 1;
+
+	while (n--) {
+		*x = *y;
+		x--;
+		y--;
+	}
+
+	return dst;
+}
+
+int memcmp(const void *a, const void *b, size_t n)
+{
+	const char *x = a;
+	const char *y = b;
+
+	while (n--) {
+		if (*x != *y)
+			return *x - *y;
+		x++;
+		y++;
+	}
+
+	return 0;
+}
+
+int strcmp(const char *a, const char *b)
+{
+	const char *x = a;
+	const char *y = b;
+
+	while (*x != 0 && *y != 0) {
+		if (*x != *y)
+			return *x - *y;
+		x++;
+		y++;
+	}
+
+	return *x - *y;
+}

diff --git a/src/vm.c b/src/vm.c
new file mode 100644
index 0000000..ee6a5cd
--- /dev/null
+++ b/src/vm.c

@@ -0,0 +1,24 @@
+#include "vm.h"
+
+#include "cpu.h"
+
+void vm_init(struct vm *vm, struct cpu *cpus)
+{
+	size_t i;
+
+	/* Do basic initialization of vcpus. */
+	for (i = 0; i < MAX_CPUS; i++) {
+		vcpu_init(vm->vcpus + i, cpus + i, vm);
+	}
+
+	arch_vptable_init(&vm->page_table);
+}
+
+/* TODO: Shall we use index or id here? */
+void vm_start_vcpu(struct vm *vm, size_t index, size_t entry, size_t arg)
+{
+	struct vcpu *vcpu = vm->vcpus + index;
+	arch_regs_init(&vcpu->regs, entry, arg);
+	vcpu_ready(vcpu);
+	cpu_on(vcpu->cpu);
+}
commit	987c0ff61c3b95845b2a2491e3f9a848b0c36ec7	[log] [tgz]
author	Wedson Almeida Filho <wedsonaf@google.com>	Wed Jun 20 16:34:38 2018 +0100
committer	Wedson Almeida Filho <wedsonaf@google.com>	Thu Jun 28 15:09:42 2018 +0100
tree	8f29452c208a8fd8f9f5cf5c7d87e7ebc4e380d4