Initial commit.
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..89f9ac0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+out/
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f2e590c
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,144 @@
+ROOT_DIR := $(dir $(lastword $(MAKEFILE_LIST)))
+ifeq ($(ROOT_DIR),./)
+ ROOT_DIR :=
+endif
+
+# Defaults.
+ARCH ?= aarch64
+PLAT ?= qemu
+DEBUG ?= 1
+NAME := hafnium
+
+OUT := $(ROOT_DIR)out/$(ARCH)/$(PLAT)
+
+all: $(OUT)/$(NAME).bin
+
+# Include platform-specific constants.
+include $(ROOT_DIR)src/arch/$(ARCH)/$(PLAT).mk
+
+define include_module
+ SRCS :=
+ OFFSET_SRCS :=
+ include $(ROOT_DIR)$(1)/rules.mk
+ GLOBAL_SRCS += $$(addprefix $(1)/,$$(SRCS))
+ GLOBAL_OFFSET_SRCS += $$(addprefix $(1)/,$$(OFFSET_SRCS))
+endef
+
+#
+# Include each module.
+#
+MODULES := src
+MODULES += src/arch/$(ARCH)
+GLOBAL_SRCS :=
+GLOBAL_OFFSET_SRCS :=
+$(foreach mod,$(MODULES),$(eval $(call include_module,$(mod))))
+
+CROSS_PREFIX := aarch64-linux-gnu-
+
+#
+# Rules to build C files.
+#
+COPTS = -mcpu=cortex-a57+nofp
+COPTS += -fno-stack-protector
+COPTS += -fno-builtin -ffreestanding
+COPTS += -g
+COPTS += -O2
+COPTS += -fpic
+COPTS += -std=c11
+COPTS += -Wall -Wpedantic -Werror
+COPTS += -DDEBUG=$(DEBUG)
+COPTS += -MMD -MP -MF $$(patsubst %,%.d,$$@)
+COPTS += -DMAX_CPUS=8
+COPTS += -DSTACK_SIZE=4096
+COPTS += -I$(ROOT_DIR)inc
+COPTS += -I$(ROOT_DIR)src/arch/$(ARCH)/inc
+COPTS += -I$(OUT)/arch/$(ARCH)/inc
+COPTS += -DGICD_BASE=$(GICD_BASE)
+COPTS += -DGICC_BASE=$(GICC_BASE)
+COPTS += -DGICR_BASE=$(GICR_BASE)
+COPTS += -DTIMER_IRQ=$(TIMER_IRQ)
+
+ifeq ($(PL011),1)
+ COPTS += -DPL011_BASE=$(PL011_BASE)
+endif
+
+ifeq ($(DEBUG),1)
+ COPTS += -DDEBUG
+endif
+
+define build_c
+ TGT := $(patsubst %.c,%.o,$(OUT)/$(patsubst src/%,%,$(1)))
+ GLOBAL_OBJS += $$(TGT)
+ REMAIN_SRCS := $$(filter-out $(1),$$(REMAIN_SRCS))
+$$(TGT): $(ROOT_DIR)$(1) | $$(dir $$(TGT))
+ $$(info CC $(ROOT_DIR)$1)
+ @$(CROSS_PREFIX)gcc $(COPTS) -c $(ROOT_DIR)$(1) -o $$@
+endef
+
+#
+# Rules to generate offsets.
+#
+define gen_offsets
+ TMP := $(patsubst src/%,%,$(1))
+ TMP := $$(dir $$(TMP))inc/$$(notdir $$(TMP))
+ TGT := $$(patsubst %.c,%.h,$(OUT)/$$(TMP))
+ GLOBAL_OFFSETS += $$(TGT)
+$$(TGT): $(ROOT_DIR)$(1) | $$(dir $$(TGT))
+ $$(info GENOFFSET $(ROOT_DIR)$1)
+ @$(CROSS_PREFIX)gcc $(COPTS) -MT $$@ -S -c $(ROOT_DIR)$(1) -o - | grep DEFINE_OFFSET | sed 's/\tDEFINE_OFFSET/#define/g' > $$@
+endef
+
+#
+# Rules to build S files.
+#
+define build_S
+ TGT := $(patsubst %.S,%.o,$(OUT)/$(patsubst src/%,%,$(1)))
+ GLOBAL_OBJS += $$(TGT)
+ REMAIN_SRCS := $$(filter-out $(1),$$(REMAIN_SRCS))
+$$(TGT): $(ROOT_DIR)$(1) $(GLOBAL_OFFSETS) | $$(dir $$(TGT))
+ $$(info AS $(ROOT_DIR)$1)
+ @$(CROSS_PREFIX)gcc $(COPTS) -c $(ROOT_DIR)$(1) -o $$@
+endef
+
+#
+# Generate the build rules for all .c and .S files.
+#
+GLOBAL_OBJS :=
+GLOBAL_OFFSETS :=
+REMAIN_SRCS := $(GLOBAL_SRCS)
+$(foreach file,$(filter %.c,$(GLOBAL_OFFSET_SRCS)),$(eval $(call gen_offsets,$(file))))
+$(foreach file,$(filter %.c,$(GLOBAL_SRCS)),$(eval $(call build_c,$(file))))
+$(foreach file,$(filter %.S,$(GLOBAL_SRCS)),$(eval $(call build_S,$(file))))
+
+#
+# Check if there are any source files which we don't know to handle.
+#
+ifneq ($(REMAIN_SRCS),)
+ $(error Don't know how to handle $(REMAIN_SRCS))
+endif
+
+#
+# Rule to create all output directories.
+#
+define create_dir
+$1:
+ @mkdir -p $1
+endef
+$(foreach name,$(sort $(dir $(GLOBAL_OBJS))),$(eval $(call create_dir,$(name))))
+$(foreach name,$(sort $(dir $(GLOBAL_OFFSETS))),$(eval $(call create_dir,$(name))))
+
+#
+# Rules to build the hypervisor.
+#
+$(OUT)/$(NAME): $(GLOBAL_OBJS) $(ROOT_DIR)src/$(NAME).ld
+ $(info LD $(ROOT_DIR)src/$(NAME).ld)
+ @$(CROSS_PREFIX)ld -g -pie $(GLOBAL_OBJS) -T$(ROOT_DIR)src/$(NAME).ld --defsym PREFERRED_LOAD_ADDRESS=$(LOAD_ADDRESS) -o $@
+
+$(OUT)/$(NAME).bin: $(OUT)/$(NAME)
+ $(info OBJCOPY $@)
+ @$(CROSS_PREFIX)objcopy -O binary $< $@
+
+clean:
+ rm -rf $(ROOT_DIR)out
+
+-include $(patsubst %,%.d,$(GLOBAL_OBJS),$(GLOBAL_OFFSETS))
diff --git a/inc/alloc.h b/inc/alloc.h
new file mode 100644
index 0000000..b3fc110
--- /dev/null
+++ b/inc/alloc.h
@@ -0,0 +1,11 @@
+#ifndef _ALLOC_H
+#define _ALLOC_H
+
+#include <stddef.h>
+
+void halloc_init(size_t base, size_t size);
+void *halloc(size_t size);
+void hfree(void *ptr);
+void *halloc_aligned(size_t size, size_t align);
+
+#endif /* _ALLOC_H */
diff --git a/inc/arch.h b/inc/arch.h
new file mode 100644
index 0000000..e12a8cf
--- /dev/null
+++ b/inc/arch.h
@@ -0,0 +1,12 @@
+#ifndef _ARCH_H
+#define _ARCH_H
+
+#include "cpu.h"
+#include "irq.h"
+
+void arch_init(struct cpu *cpu);
+void arch_irq_init_percpu(void);
+void arch_irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p);
+void arch_putchar(char c);
+
+#endif /* _ARCH_H */
diff --git a/inc/cpio.h b/inc/cpio.h
new file mode 100644
index 0000000..cc244ce
--- /dev/null
+++ b/inc/cpio.h
@@ -0,0 +1,22 @@
+#ifndef _CPIO_H
+#define _CPIO_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+struct cpio {
+ const struct cpio_header *first;
+ size_t total_size;
+};
+
+struct cpio_iter {
+ const struct cpio_header *cur;
+ size_t size_left;
+};
+
+void cpio_init(struct cpio *c, const void *buf, size_t size);
+void cpio_init_iter(struct cpio *c, struct cpio_iter *iter);
+bool cpio_next(struct cpio_iter *iter, const char **name,
+ const void **contents, size_t *size);
+
+#endif /* _CPIO_H */
diff --git a/inc/cpu.h b/inc/cpu.h
new file mode 100644
index 0000000..6eb90ab
--- /dev/null
+++ b/inc/cpu.h
@@ -0,0 +1,61 @@
+#ifndef _CPU_H
+#define _CPU_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "arch_cpu.h"
+#include "list.h"
+#include "spinlock.h"
+
+struct vcpu {
+ struct list_entry links;
+ bool is_runnable;
+ bool interrupt;
+ struct arch_regs regs;
+ struct cpu *cpu;
+ struct vm *vm;
+};
+
+/* TODO: Update alignment such that cpus are in different cache lines. */
+struct cpu {
+ struct spinlock lock;
+
+ struct vcpu *current;
+
+ struct list_entry ready_queue;
+
+ /*
+ * Enabling/disabling irqs are counted per-cpu. They are enabled when
+ * the count is zero, and disabled when it's non-zero.
+ */
+ uint32_t irq_disable_count;
+
+ /*
+ * The number of VMs that have turned this CPU on. CPUs are off when
+ * this count is zero, and on when this count is ono-zero.
+ */
+ uint32_t cpu_on_count;
+
+ bool (*timer_cb)(void *context);
+ void *timer_context;
+
+ /* CPU identifier. Doesn't have to be contiguous. */
+ size_t id;
+
+ /* Pointer to bottom of the stack. */
+ void *stack_bottom;
+};
+
+void cpu_init(struct cpu *c);
+void cpu_irq_enable(struct cpu *c);
+void cpu_irq_disable(struct cpu *c);
+void cpu_on(struct cpu *c);
+void cpu_off(struct cpu *c);
+
+void vcpu_init(struct vcpu *vcpu, struct cpu *cpu, struct vm *vm);
+void vcpu_ready(struct vcpu *v);
+void vcpu_unready(struct vcpu *v);
+
+#endif /* _CPU_H */
diff --git a/inc/decl_offsets.h b/inc/decl_offsets.h
new file mode 100644
index 0000000..01f182f
--- /dev/null
+++ b/inc/decl_offsets.h
@@ -0,0 +1,10 @@
+#ifndef _DECL_OFFSETS_H
+#define _DECL_OFFSETS_H
+
+#define DECL(name, type, field) \
+ __asm("DEFINE_OFFSET " #name " %0" : : "n" (offsetof(type, field)))
+
+#define DECL_SIZE(name, type) \
+ __asm("DEFINE_OFFSET " #name " %0" : : "n" (sizeof(type)))
+
+#endif /* _DECL_OFFSETS_H */
diff --git a/inc/dlog.h b/inc/dlog.h
new file mode 100644
index 0000000..d4c08c0
--- /dev/null
+++ b/inc/dlog.h
@@ -0,0 +1,12 @@
+#ifndef _DLOG_H
+#define _DLOG_H
+
+#if DEBUG
+void dlog(const char *fmt, ...);
+#else
+#define dlog(...)
+#endif
+
+void dlog_init(void (*pchar)(char));
+
+#endif /* _DLOG_H */
diff --git a/inc/fdt.h b/inc/fdt.h
new file mode 100644
index 0000000..fb23c8e
--- /dev/null
+++ b/inc/fdt.h
@@ -0,0 +1,23 @@
+#ifndef _FDT_H
+#define _FDT_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct fdt_node {
+ /* TODO: What do we need here? */
+ const struct fdt_header *hdr;
+ const char *begin;
+ const char *end;
+ const char *strs;
+};
+
+void fdt_dump(struct fdt_header *hdr);
+void fdt_root_node(struct fdt_node *node, const struct fdt_header *hdr);
+bool fdt_find_child(struct fdt_node *node, const char *child);
+bool fdt_read_property(const struct fdt_node *node, const char *name,
+ const char **buf, uint32_t *size);
+
+void fdt_add_mem_reservation(struct fdt_header *hdr, uint64_t addr, uint64_t len);
+
+#endif /* _FDT_H */
diff --git a/inc/irq.h b/inc/irq.h
new file mode 100644
index 0000000..af5faba
--- /dev/null
+++ b/inc/irq.h
@@ -0,0 +1,32 @@
+#ifndef _IRQ_H
+#define _IRQ_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+struct irq_handle;
+
+enum irq_trigger {
+ irq_trigger_level,
+ irq_trigger_edge,
+};
+
+enum irq_polarity {
+ irq_polarity_active_high,
+ irq_polarity_active_low,
+};
+
+/* TODO: Add target CPUs here. */
+void irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p,
+ bool (*cb)(void *, struct irq_handle *), void *context);
+void irq_enable(uint32_t num);
+
+void irq_dismiss(struct irq_handle *h);
+
+/* TODO: These don't really belong here, do they?. */
+bool irq_handle(uint32_t num, struct irq_handle *h);
+void irq_init(void);
+void irq_init_percpu(void);
+
+#endif
diff --git a/inc/list.h b/inc/list.h
new file mode 100644
index 0000000..3001394
--- /dev/null
+++ b/inc/list.h
@@ -0,0 +1,50 @@
+#ifndef _LIST_H
+#define _LIST_H
+
+#include <stdbool.h>
+
+struct list_entry {
+ struct list_entry *next;
+ struct list_entry *prev;
+};
+
+#define LIST_INIT(l) {.next = &l, .prev = &l}
+#define LIST_ELEM(ptr, type, field) \
+ ((type*)(char*)ptr - offsetof(type, field))
+
+static inline void list_init(struct list_entry *e)
+{
+ e->next = e;
+ e->prev = e;
+}
+
+static inline void list_append(struct list_entry *l, struct list_entry *e)
+{
+ e->next = l;
+ e->prev = l->prev;
+
+ e->next->prev = e;
+ e->prev->next = e;
+}
+
+static inline void list_prepend(struct list_entry *l, struct list_entry *e)
+{
+ e->next = l->next;
+ e->prev = l;
+
+ e->next->prev = e;
+ e->prev->next = e;
+}
+
+static inline bool list_empty(struct list_entry *l)
+{
+ return l->next == l;
+}
+
+static inline void list_remove(struct list_entry *e)
+{
+ e->prev->next = e->next;
+ e->next->prev = e->prev;
+}
+
+#endif /* _LIST_H */
diff --git a/inc/spinlock.h b/inc/spinlock.h
new file mode 100644
index 0000000..7761980
--- /dev/null
+++ b/inc/spinlock.h
@@ -0,0 +1,27 @@
+#ifndef _SPINLOCK_H
+#define _SPINLOCK_H
+
+#include <stdatomic.h>
+
+struct spinlock {
+ atomic_flag v;
+};
+
+#define SPINLOCK_INIT {.v = ATOMIC_FLAG_INIT}
+
+static inline void sl_init(struct spinlock *l)
+{
+ *l = (struct spinlock)SPINLOCK_INIT;
+}
+
+static inline void sl_lock(struct spinlock *l)
+{
+ while (atomic_flag_test_and_set_explicit(&l->v, memory_order_acquire));
+}
+
+static inline void sl_unlock(struct spinlock *l)
+{
+ atomic_flag_clear_explicit(&l->v, memory_order_release);
+}
+
+#endif /* _SPINLOCK_H */
diff --git a/inc/std.h b/inc/std.h
new file mode 100644
index 0000000..d2438a2
--- /dev/null
+++ b/inc/std.h
@@ -0,0 +1,52 @@
+#ifndef _STD_H
+#define _STD_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void *memset(void *s, int c, size_t n);
+void *memcpy(void *dst, const void *src, size_t n);
+void *memmove(void *dst, const void *src, size_t n);
+int memcmp(const void *a, const void *b, size_t n);
+
+size_t strlen(const char *str);
+int strcmp(const char *a, const char *b);
+
+static inline uint16_t ntohs(uint16_t v)
+{
+ return v << 8 | v >> 8;
+}
+
+static inline uint32_t ntohl(uint32_t v)
+{
+ /* TODO: no conversion needed if native is big endian. */
+ return (v << 24) |
+ (v >> 24) |
+ ((v & 0xff00) << 8) |
+ ((v & 0xff0000) >> 8);
+}
+
+static inline uint64_t ntohll(uint64_t v)
+{
+ /* TODO: no conversion needed if native is big endian. */
+ return (v << 56) |
+ (v >> 56) |
+ ((v & 0xff00) << 40) |
+ ((v & 0xff000000000000) >> 40) |
+ ((v & 0xff0000) << 24) |
+ ((v & 0xff0000000000) >> 24) |
+ ((v & 0xff000000) << 8) |
+ ((v & 0xff00000000) >> 8);
+}
+
+static inline uint32_t htonl(uint32_t v)
+{
+ return ntohl(v);
+}
+
+static inline uint64_t htonll(uint64_t v)
+{
+ return ntohll(v);
+}
+
+#endif /* STD_H */
diff --git a/inc/timer.h b/inc/timer.h
new file mode 100644
index 0000000..aac58c4
--- /dev/null
+++ b/inc/timer.h
@@ -0,0 +1,10 @@
+#ifndef _TIMER_H
+#define _TIMER_H
+
+#include <stdbool.h>
+
+void timer_init(void);
+void timer_init_percpu(void);
+void timer_set(uint64_t time, bool (*cb)(void *), void *context);
+
+#endif /* _TIMER_H */
diff --git a/inc/vm.h b/inc/vm.h
new file mode 100644
index 0000000..aa9133c
--- /dev/null
+++ b/inc/vm.h
@@ -0,0 +1,14 @@
+#ifndef _VM_H
+#define _VM_H
+
+#include "cpu.h"
+
+struct vm {
+ struct vcpu vcpus[MAX_CPUS];
+ struct arch_page_table page_table;
+};
+
+void vm_init(struct vm *vm, struct cpu *cpus);
+void vm_start_vcpu(struct vm *vm, size_t index, size_t entry, size_t arg);
+
+#endif /* _VM_H */
diff --git a/src/alloc.c b/src/alloc.c
new file mode 100644
index 0000000..cd7c614
--- /dev/null
+++ b/src/alloc.c
@@ -0,0 +1,63 @@
+#include "alloc.h"
+
+#include "dlog.h"
+#include "spinlock.h"
+
+static size_t alloc_base;
+static size_t alloc_limit;
+static struct spinlock alloc_lock = SPINLOCK_INIT;
+
+/*
+ * Initializes the allocator.
+ */
+void halloc_init(size_t base, size_t size)
+{
+ alloc_base = base;
+ alloc_limit = base + size;
+}
+
+/*
+ * Allocates the requested amount of memory. Return NULL when there isn't enough
+ * free memory.
+ */
+void *halloc(size_t size)
+{
+ return halloc_aligned(size, 2 * sizeof(size_t));
+}
+
+/*
+ * Frees the provided memory.
+ *
+ * Currently unimplemented.
+ */
+void hfree(void *ptr)
+{
+ dlog("Attempted to free pointer %p\n", ptr);
+}
+
+/*
+ * Allocates the requested amount of memory, with the requested alignment.
+ *
+ * Alignment must be a power of two. Returns NULL when there isn't enough free
+ * memory.
+ */
+void *halloc_aligned(size_t size, size_t align)
+{
+ size_t begin;
+ size_t end;
+
+ sl_lock(&alloc_lock);
+
+ begin = (alloc_base + align - 1) & ~(align - 1);
+ end = begin + size;
+
+ /* Check for overflows, and that there is enough free mem. */
+ if (end > begin && begin >= alloc_base && end <= alloc_limit)
+ alloc_base = end;
+ else
+ begin = 0;
+
+ sl_unlock(&alloc_lock);
+
+ return (void *)begin;
+}
diff --git a/src/arch/aarch64/entry.S b/src/arch/aarch64/entry.S
new file mode 100644
index 0000000..560aaf8
--- /dev/null
+++ b/src/arch/aarch64/entry.S
@@ -0,0 +1,112 @@
+#include "offsets.h"
+
+#define PECOFF_FILE_ALIGNMENT 0x200
+
+.section .init.entry, "ax"
+
+.global entry
+entry:
+
+ add x13, x18, #0x16
+ b 0f
+ .quad 4096 /* text_offset */
+ .quad file_size /* image_size */
+ .quad 0 /* flags */
+ .quad 0 /* res2 */
+ .quad 0 /* res3 */
+ .quad 0 /* res4 */
+ .word 0x644d5241 /* magic */
+ .word 0
+
+ /*
+ * Calculate the difference between the actual load address and the
+ * preferred one. We'll use this to relocate.
+ */
+0: adrp x25, entry
+ add x25, x25, :lo12:entry
+
+ ldr w29, =_orig_base
+
+ sub x25, x25, x29
+
+ /* Find where the relocations begin and end. */
+ adrp x29, rela_begin
+ add x29, x29, :lo12:rela_begin
+
+ adrp x30, rela_end
+ add x30, x30, :lo12:rela_end
+
+ /* Iterate over all relocations. */
+1: cmp x29, x30
+ b.eq 2f
+
+ ldp x26, x27, [x29], #16
+ ldr x28, [x29], #8
+
+ cmp w27, #1027 /* R_AARCH64_RELATIVE */
+# b.ne 1b
+ b.ne .
+
+ add x28, x28, x25
+ str x28, [x26, x25]
+ b 1b
+
+ /* Zero out the bss section. */
+2: adrp x29, bss_begin
+ add x29, x29, :lo12:bss_begin
+
+ adrp x30, bss_end
+ add x30, x30, :lo12:bss_end
+
+3: cmp x29, x30
+ b.hs 4f
+
+ stp xzr, xzr, [x29], #16
+ b 3b
+
+4: /* Save the FDT to a global variable. */
+ adrp x30, fdt
+ add x30, x30, :lo12:fdt
+ str x0, [x30]
+
+ /* Get pointer to first cpu. */
+ adrp x0, cpus
+ add x0, x0, :lo12:cpus
+
+
+.globl cpu_entry
+cpu_entry:
+ /* Disable interrupts. */
+ msr DAIFSet, #0xf
+
+ /* Save pointer to CPU struct for later reference. */
+ msr tpidr_el2, x0
+
+ /* Use SPx (instead of SP0). */
+ msr spsel, #1
+
+ /* Prepare the stack. */
+ ldr x30, [x0, #CPU_STACK_BOTTOM]
+ mov sp, x30
+
+ /* Configure exception handlers. */
+ adrp x30, vector_table_el2
+ add x30, x30, :lo12:vector_table_el2
+ msr vbar_el2, x30
+
+ /* Call into C code. */
+ bl cpu_main
+
+ /* Run the first vcpu. */
+ bl cpu_next_vcpu
+ b vcpu_enter_restore_all
+
+ /* Loop forever waiting for interrupts. */
+5: wfi
+ b 5b
+
+/* TODO: Move this elsewhere. */
+.globl smc
+smc:
+ SMC #0
+ ret
diff --git a/src/arch/aarch64/exceptions.S b/src/arch/aarch64/exceptions.S
new file mode 100644
index 0000000..97bd2fd
--- /dev/null
+++ b/src/arch/aarch64/exceptions.S
@@ -0,0 +1,353 @@
+#include "offsets.h"
+
+.text
+
+.balign 0x800
+.global vector_table_el2
+vector_table_el2:
+ /* sync_cur_sp0 */
+ b .
+
+.balign 0x80
+ /* irq_cur_sp0 */
+ b irq_current
+
+.balign 0x80
+ /* fiq_cur_sp0 */
+ b .
+
+.balign 0x80
+ /* serr_cur_sp0 */
+ b .
+
+.balign 0x80
+ /* sync_cur_spx */
+ mrs x0, esr_el2
+ mrs x1, elr_el2
+ b sync_current_exception
+
+.balign 0x80
+ /* irq_cur_spx */
+ b irq_current
+
+.balign 0x80
+ /* fiq_cur_spx */
+ b .
+
+.balign 0x80
+ /* serr_cur_spx */
+ b .
+
+.balign 0x80
+ /* sync_lower_64 */
+
+ /* Save x18 since we're about to clobber it. */
+ str x18, [sp, #-8]
+
+ /* Extract the exception class (EC) from exception syndrome register. */
+ mrs x18, esr_el2
+ lsr x18, x18, #26
+
+ /* Take the slow path if exception is not due to an HVC instruction. */
+ subs x18, x18, #0x16
+ b.ne slow_sync_lower_64
+
+ /* Save x29 and x30, then jump to HVC handler. */
+ stp x29, x30, [sp, #-16]!
+ bl hvc_handler
+ ldp x29, x30, [sp], #16
+ cbnz x1, sync_lower_64_switch
+
+ /* Zero out all volatile registers (except x0) and return. */
+ stp xzr, xzr, [sp, #-16]
+ ldp x1, x2, [sp, #-16]
+ ldp x3, x4, [sp, #-16]
+ ldp x5, x6, [sp, #-16]
+ ldp x7, x8, [sp, #-16]
+ ldp x9, x10, [sp, #-16]
+ ldp x11, x12, [sp, #-16]
+ ldp x13, x14, [sp, #-16]
+ ldp x15, x16, [sp, #-16]
+ ldp x17, x18, [sp, #-16]
+ eret
+
+.balign 0x80
+ /* irq_lower_64 */
+
+ /* Save x0 since we're about to clobber it. */
+ str x0, [sp, #-8]
+
+ /* Get the current vcpu. */
+ mrs x0, tpidr_el2
+ ldr x0, [x0, #CPU_CURRENT]
+
+ /* Save volatile registers. */
+ add x0, x0, #VCPU_REGS
+ stp x2, x3, [x0, #8 * 2]
+ stp x4, x5, [x0, #8 * 4]
+ stp x6, x7, [x0, #8 * 6]
+ stp x8, x9, [x0, #8 * 8]
+ stp x10, x11, [x0, #8 * 10]
+ stp x12, x13, [x0, #8 * 12]
+ stp x14, x15, [x0, #8 * 14]
+ stp x16, x17, [x0, #8 * 16]
+ str x18, [x0, #8 * 18]
+ stp x29, x30, [x0, #8 * 29]
+
+ ldr x2, [sp, #-8]
+ stp x2, x1, [x0, #8 * 0]
+
+ /* Save return address & mode. */
+ mrs x1, elr_el2
+ mrs x2, spsr_el2
+ stp x1, x2, [x0, #8 * 31]
+
+ /* Call C handler. */
+ bl irq_handle_lower
+ cbz x0, vcpu_return
+
+ /* Find a new vcpu to run. */
+ bl cpu_next_vcpu
+ cbz x0, vcpu_return
+ b vcpu_switch
+
+.balign 0x80
+ /* fiq_lower_64 */
+ b .
+
+.balign 0x80
+ /* serr_lower_64 */
+ b .
+
+.balign 0x80
+ /* sync_lower_32 */
+ b .
+
+.balign 0x80
+ /* irq_lower_32 */
+ b .
+
+.balign 0x80
+ /* fiq_lower_32 */
+ b .
+
+.balign 0x80
+ /* serr_lower_32 */
+ b .
+
+.balign 0x80
+
+vcpu_switch:
+ /* Save non-volatile registers. */
+ add x1, x1, #VCPU_REGS
+ stp x19, x20, [x1, #8 * 19]
+ stp x21, x22, [x1, #8 * 21]
+ stp x23, x24, [x1, #8 * 23]
+ stp x25, x26, [x1, #8 * 25]
+ stp x27, x28, [x1, #8 * 27]
+
+ /* Save lazy state. */
+ add x1, x1, #(VCPU_LAZY - VCPU_REGS)
+
+ mrs x24, vmpidr_el2
+ mrs x25, csselr_el1
+ stp x24, x25, [x1, #16 * 0]
+
+ mrs x2, sctlr_el1
+ mrs x3, actlr_el1
+ stp x2, x3, [x1, #16 * 1]
+
+ mrs x4, cpacr_el1
+ mrs x5, ttbr0_el1
+ stp x4, x5, [x1, #16 * 2]
+
+ mrs x6, ttbr1_el1
+ mrs x7, tcr_el1
+ stp x6, x7, [x1, #16 * 3]
+
+ mrs x8, esr_el1
+ mrs x9, afsr0_el1
+ stp x8, x9, [x1, #16 * 4]
+
+ mrs x10, afsr1_el1
+ mrs x11, far_el1
+ stp x10, x11, [x1, #16 * 5]
+
+ mrs x12, mair_el1
+ mrs x13, vbar_el1
+ stp x12, x13, [x1, #16 * 6]
+
+ mrs x14, contextidr_el1
+ mrs x15, tpidr_el0
+ stp x14, x15, [x1, #16 * 7]
+
+ mrs x16, tpidrro_el0
+ mrs x17, tpidr_el1
+ stp x16, x17, [x1, #16 * 8]
+
+ mrs x18, amair_el1
+ mrs x19, cntkctl_el1
+ stp x18, x19, [x1, #16 * 9]
+
+ mrs x20, sp_el0
+ mrs x21, sp_el1
+ stp x20, x21, [x1, #16 * 10]
+
+ mrs x22, par_el1
+ str x22, [x1, #16 * 11]
+
+.globl vcpu_enter_restore_all
+vcpu_enter_restore_all:
+ /* Get a pointer to the lazy registers. */
+ add x0, x0, #VCPU_LAZY
+
+ ldp x24, x25, [x0, #16 * 0]
+ msr vmpidr_el2, x24
+ msr csselr_el1, x25
+
+ ldp x2, x3, [x0, #16 * 1]
+ msr sctlr_el1, x2
+ msr actlr_el1, x3
+
+ ldp x4, x5, [x0, #16 * 2]
+ msr cpacr_el1, x4
+ msr ttbr0_el1, x5
+
+ ldp x6, x7, [x0, #16 * 3]
+ msr ttbr1_el1, x6
+ msr tcr_el1, x7
+
+ ldp x8, x9, [x0, #16 * 4]
+ msr esr_el1, x8
+ msr afsr0_el1, x9
+
+ ldp x10, x11, [x0, #16 * 5]
+ msr afsr1_el1, x10
+ msr far_el1, x11
+
+ ldp x12, x13, [x0, #16 * 6]
+ msr mair_el1, x12
+ msr vbar_el1, x13
+
+ ldp x14, x15, [x0, #16 * 7]
+ msr contextidr_el1, x14
+ msr tpidr_el0, x15
+
+ ldp x16, x17, [x0, #16 * 8]
+ msr tpidrro_el0, x16
+ msr tpidr_el1, x17
+
+ ldp x18, x19, [x0, #16 * 9]
+ msr amair_el1, x18
+ msr cntkctl_el1, x19
+
+ ldp x20, x21, [x0, #16 * 10]
+ msr sp_el0, x20
+ msr sp_el1, x21
+
+ ldp x22, x23, [x0, #16 * 11]
+ msr par_el1, x22
+ msr hcr_el2, x23
+
+ /* Restore non-volatile registers. */
+ add x0, x0, #(VCPU_REGS - VCPU_LAZY)
+
+ ldp x19, x20, [x0, #8 * 19]
+ ldp x21, x22, [x0, #8 * 21]
+ ldp x23, x24, [x0, #8 * 23]
+ ldp x25, x26, [x0, #8 * 25]
+ ldp x27, x28, [x0, #8 * 27]
+
+ /* Restore volatile registers and return. */
+ sub x0, x0, #VCPU_REGS
+
+vcpu_return:
+ /* Restore volatile registers. */
+ add x0, x0, #VCPU_REGS
+
+ ldp x4, x5, [x0, #8 * 4]
+ ldp x6, x7, [x0, #8 * 6]
+ ldp x8, x9, [x0, #8 * 8]
+ ldp x10, x11, [x0, #8 * 10]
+ ldp x12, x13, [x0, #8 * 12]
+ ldp x14, x15, [x0, #8 * 14]
+ ldp x16, x17, [x0, #8 * 16]
+ ldr x18, [x0, #8 * 18]
+ ldp x29, x30, [x0, #8 * 29]
+
+ /* Restore return address & mode. */
+ ldp x1, x2, [x0, #8 * 31]
+ msr elr_el2, x1
+ msr spsr_el2, x2
+
+ /* Restore x0..x3, which we have used as scratch before. */
+ ldp x2, x3, [x0, #8 * 2]
+ ldp x0, x1, [x0, #8 * 0]
+ eret
+
+slow_sync_lower_64:
+ /* Get the current vcpu. */
+ mrs x18, tpidr_el2
+ /* TODO: tpidr_el2 should store the vcpu pointer directly. */
+ ldr x18, [x18, #CPU_CURRENT]
+
+ /* Save volatile registers. */
+ add x18, x18, #VCPU_REGS
+ stp x0, x1, [x18, #8 * 0]
+ stp x2, x3, [x18, #8 * 2]
+ stp x4, x5, [x18, #8 * 4]
+ stp x6, x7, [x18, #8 * 6]
+ stp x8, x9, [x18, #8 * 8]
+ stp x10, x11, [x18, #8 * 10]
+ stp x12, x13, [x18, #8 * 12]
+ stp x14, x15, [x18, #8 * 14]
+ stp x16, x17, [x18, #8 * 16]
+ stp x29, x30, [x18, #8 * 29]
+
+ ldr x0, [sp, #-8]
+ str x0, [x18, #8 * 18]
+
+ /* Save return address & mode. */
+ mrs x1, elr_el2
+ mrs x2, spsr_el2
+ stp x1, x2, [x18, #8 * 31]
+
+ /* Read syndrome register and call C handler. */
+ mrs x0, esr_el2
+ bl sync_lower_exception
+ cbz x0, vcpu_return
+
+ /* Find a new vcpu to run. */
+ bl cpu_next_vcpu
+ cbz x0, vcpu_return
+ b vcpu_switch
+
+sync_lower_64_switch:
+ /* We'll have to reschedule, so store state before doing so. */
+ mrs x18, tpidr_el2
+ ldr x18, [x18, #CPU_CURRENT]
+
+ /* Store zeroes in volatile register storage, except x0. */
+ add x18, x18, #VCPU_REGS
+ stp x0, xzr, [x18, #8 * 0]
+ stp xzr, xzr, [x18, #8 * 2]
+ stp xzr, xzr, [x18, #8 * 4]
+ stp xzr, xzr, [x18, #8 * 6]
+ stp xzr, xzr, [x18, #8 * 8]
+ stp xzr, xzr, [x18, #8 * 10]
+ stp xzr, xzr, [x18, #8 * 12]
+ stp xzr, xzr, [x18, #8 * 14]
+ stp xzr, xzr, [x18, #8 * 16]
+ stp x29, x30, [x18, #8 * 29]
+
+ str xzr, [x18, #8 * 18]
+
+ /* Save return address & mode. */
+ mrs x1, elr_el2
+ mrs x2, spsr_el2
+ stp x1, x2, [x18, #8 * 31]
+
+ /* Find a new vcpu to run. */
+ bl cpu_next_vcpu
+ cbz x0, vcpu_return
+ b vcpu_switch
diff --git a/src/arch/aarch64/gicv2.c b/src/arch/aarch64/gicv2.c
new file mode 100644
index 0000000..c160007
--- /dev/null
+++ b/src/arch/aarch64/gicv2.c
@@ -0,0 +1,102 @@
+#include "dlog.h"
+#include "io.h"
+#include "irq.h"
+
+#define GICD_CTLR (0x00)
+#define GICD_TYPER (0x04)
+#define GICD_ISENABLER (0x100)
+#define GICD_ICENABLER (0x180)
+#define GICD_ICPENDR (0x280)
+#define GICD_ICACTIVER (0x380)
+#define GICD_IPRIORITYR (0x400)
+#define GICD_ITARGETSR (0x800)
+#define GICD_ICFGR (0xc00)
+
+#define GICC_CTLR (0x000)
+#define GICC_PMR (0x004)
+#define GICC_IAR (0x00c)
+#define GICC_EOIR (0x010)
+
+struct irq_handle {
+ uint32_t iar;
+};
+
+/*
+ * Dismisses an irq that was signaled and is being processed.
+ */
+void irq_dismiss(struct irq_handle *h)
+{
+ io_write(GICC_BASE + GICC_EOIR, h->iar);
+}
+
+/*
+ * Configures the given irq number before it can be enabled.
+ */
+void arch_irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p)
+{
+ uint32_t v = io_read(GICD_BASE + GICD_ICFGR + (num / 16) * 4);
+
+ if (t == irq_trigger_level)
+ v &= ~(2u << ((num % 16) * 2));
+ else
+ v |= 2u << ((num % 16) * 2);
+
+ io_write(GICD_BASE + GICD_ICFGR + (num / 16) * 4, v);
+}
+
+/*
+ * Enables the given irq number such that interrupts will be signaled when its
+ * interrupt line is asserted. A caller must first configure the irq before
+ * enabling it.
+ */
+void irq_enable(uint32_t num)
+{
+ io_write(GICD_BASE + GICD_ISENABLER + (num / 32) * 4,
+ (1u << (num % 32)));
+}
+
+/*
+ * Handles an interrupt signaled when the CPU was in a lower level (EL0 or EL1),
+ * it is called directly from the exception handler.
+ *
+ * The return value indicates whether a new vcpu should be scheduled.
+ */
+bool irq_handle_lower(void)
+{
+ struct irq_handle h = {
+ .iar = io_read(GICC_BASE + GICC_IAR),
+ };
+
+ dlog("irq: %u\n", h.iar & 0x3ff);
+
+ return irq_handle(h.iar & 0x3ff, &h);
+}
+
+/*
+ * Initializes the GICv2 for use as the interrupt controller.
+ */
+void arch_irq_init_percpu(void)
+{
+ uint32_t i;
+ uint32_t max = 32 * (1 + (io_read(GICD_BASE + GICD_TYPER) & 0x1f));
+
+ /* Disable all irqs, clear pending & active states. */
+ for (i = 0; i < (max + 31) / 32; i++) {
+ io_write(GICD_BASE + GICD_ICENABLER + i * 4, 0xffffffff);
+ io_write(GICD_BASE + GICD_ICACTIVER + i * 4, 0xffffffff);
+ io_write(GICD_BASE + GICD_ICPENDR + i * 4, 0xffffffff);
+ }
+
+ /* Set the priority to zero, and cpu target to cpu 0 by default. */
+ for (i = 0; i < (max + 3) / 4; i++) {
+ io_write(GICD_BASE + GICD_IPRIORITYR + i * 4, 0);
+ io_write(GICD_BASE + GICD_ITARGETSR + i * 4, 0x01010101);
+ }
+
+ /* Allow all irq levels to interrupt the current CPU. */
+ io_write(GICC_BASE + GICC_PMR, 0xff);
+
+ /* Enable distributor and CPU interfaces. */
+ io_write(GICD_BASE + GICD_CTLR, 1);
+ io_write(GICC_BASE + GICC_CTLR, 1);
+}
diff --git a/src/arch/aarch64/gicv3.c b/src/arch/aarch64/gicv3.c
new file mode 100644
index 0000000..c160007
--- /dev/null
+++ b/src/arch/aarch64/gicv3.c
@@ -0,0 +1,102 @@
+#include "dlog.h"
+#include "io.h"
+#include "irq.h"
+
+#define GICD_CTLR (0x00)
+#define GICD_TYPER (0x04)
+#define GICD_ISENABLER (0x100)
+#define GICD_ICENABLER (0x180)
+#define GICD_ICPENDR (0x280)
+#define GICD_ICACTIVER (0x380)
+#define GICD_IPRIORITYR (0x400)
+#define GICD_ITARGETSR (0x800)
+#define GICD_ICFGR (0xc00)
+
+#define GICC_CTLR (0x000)
+#define GICC_PMR (0x004)
+#define GICC_IAR (0x00c)
+#define GICC_EOIR (0x010)
+
+struct irq_handle {
+ uint32_t iar;
+};
+
+/*
+ * Dismisses an irq that was signaled and is being processed.
+ */
+void irq_dismiss(struct irq_handle *h)
+{
+ io_write(GICC_BASE + GICC_EOIR, h->iar);
+}
+
+/*
+ * Configures the given irq number before it can be enabled.
+ */
+void arch_irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p)
+{
+ uint32_t v = io_read(GICD_BASE + GICD_ICFGR + (num / 16) * 4);
+
+ if (t == irq_trigger_level)
+ v &= ~(2u << ((num % 16) * 2));
+ else
+ v |= 2u << ((num % 16) * 2);
+
+ io_write(GICD_BASE + GICD_ICFGR + (num / 16) * 4, v);
+}
+
+/*
+ * Enables the given irq number such that interrupts will be signaled when its
+ * interrupt line is asserted. A caller must first configure the irq before
+ * enabling it.
+ */
+void irq_enable(uint32_t num)
+{
+ io_write(GICD_BASE + GICD_ISENABLER + (num / 32) * 4,
+ (1u << (num % 32)));
+}
+
+/*
+ * Handles an interrupt signaled when the CPU was in a lower level (EL0 or EL1),
+ * it is called directly from the exception handler.
+ *
+ * The return value indicates whether a new vcpu should be scheduled.
+ */
+bool irq_handle_lower(void)
+{
+ struct irq_handle h = {
+ .iar = io_read(GICC_BASE + GICC_IAR),
+ };
+
+ dlog("irq: %u\n", h.iar & 0x3ff);
+
+ return irq_handle(h.iar & 0x3ff, &h);
+}
+
+/*
+ * Initializes the GICv2 for use as the interrupt controller.
+ */
+void arch_irq_init_percpu(void)
+{
+ uint32_t i;
+ uint32_t max = 32 * (1 + (io_read(GICD_BASE + GICD_TYPER) & 0x1f));
+
+ /* Disable all irqs, clear pending & active states. */
+ for (i = 0; i < (max + 31) / 32; i++) {
+ io_write(GICD_BASE + GICD_ICENABLER + i * 4, 0xffffffff);
+ io_write(GICD_BASE + GICD_ICACTIVER + i * 4, 0xffffffff);
+ io_write(GICD_BASE + GICD_ICPENDR + i * 4, 0xffffffff);
+ }
+
+ /* Set the priority to zero, and cpu target to cpu 0 by default. */
+ for (i = 0; i < (max + 3) / 4; i++) {
+ io_write(GICD_BASE + GICD_IPRIORITYR + i * 4, 0);
+ io_write(GICD_BASE + GICD_ITARGETSR + i * 4, 0x01010101);
+ }
+
+ /* Allow all irq levels to interrupt the current CPU. */
+ io_write(GICC_BASE + GICC_PMR, 0xff);
+
+ /* Enable distributor and CPU interfaces. */
+ io_write(GICD_BASE + GICD_CTLR, 1);
+ io_write(GICC_BASE + GICC_CTLR, 1);
+}
diff --git a/src/arch/aarch64/handler.c b/src/arch/aarch64/handler.c
new file mode 100644
index 0000000..07b1291
--- /dev/null
+++ b/src/arch/aarch64/handler.c
@@ -0,0 +1,91 @@
+#include "cpu.h"
+#include "dlog.h"
+#include "irq.h"
+#include "vm.h"
+
+#include "msr.h"
+
+struct hvc_handler_return {
+ size_t user_ret;
+ bool schedule;
+};
+
+void irq_current(void)
+{
+ dlog("IRQ from current\n");
+ for (;;);
+}
+
+void sync_current_exception(uint64_t esr, uint64_t elr)
+{
+ dlog("Exception: esr=%#x, elr=%#x\n", esr, elr);
+ for (;;);
+}
+
+struct hvc_handler_return hvc_handler(size_t arg1)
+{
+ struct hvc_handler_return ret;
+
+ ret.schedule = true;
+
+ switch (arg1) {
+ case 0x84000000: /* PSCI_VERSION */
+ ret.user_ret = 2;
+ break;
+
+ case 0x84000006: /* PSCI_MIGRATE */
+ ret.user_ret = 2;
+ break;
+
+#if 0
+ TODO: Remove this.
+ case 1: /* TODO: Fix. */
+ {
+ extern struct vm vm0;
+ struct vcpu *vcpu = vm0.vcpus;
+ vcpu->interrupt = true;
+ vcpu_ready(vcpu);
+ dlog("Readying VCPU0 again\n");
+ }
+ ret.user_ret = 0;
+ break;
+#endif
+
+ default:
+ ret.user_ret = -1;
+ }
+
+ return ret;
+}
+
+bool sync_lower_exception(uint64_t esr)
+{
+ struct cpu *c = cpu();
+ struct vcpu *vcpu = c->current;
+
+ switch (esr >> 26) {
+ case 0x01: /* EC = 000001, WFI or WFE. */
+ /* Check TI bit of ISS. */
+ if (esr & 1)
+ return true;
+ //vcpu_unready(vcpu);
+ return true;
+
+ case 0x24: /* EC = 100100, Data abort. */
+ dlog("Data abort: pc=0x%x, esr=0x%x, ec=0x%x", vcpu->regs.pc, esr, esr >> 26);
+ if (!(esr & (1u << 10))) /* Check FnV bit. */
+ dlog(", far=0x%x, hpfar=0x%x", read_msr(far_el2), read_msr(hpfar_el2) << 8);
+ else
+ dlog(", far=invalid");
+
+ dlog("\n");
+ for (;;);
+
+ default:
+ dlog("Unknown sync exception pc=0x%x, esr=0x%x, ec=0x%x\n", vcpu->regs.pc, esr, esr >> 26);
+ for (;;);
+ }
+
+ /* TODO: For now we always reschedule. But we shoudln't. */
+ return true;
+}
diff --git a/src/arch/aarch64/hikey.mk b/src/arch/aarch64/hikey.mk
new file mode 100644
index 0000000..1bf62c1
--- /dev/null
+++ b/src/arch/aarch64/hikey.mk
@@ -0,0 +1,9 @@
+LOAD_ADDRESS := 0x1000000
+PL011_BASE := 0xf8015000
+PL011 := 1
+GICV2 := 1
+
+GICD_BASE := 0xf6801000
+GICC_BASE := 0xf6802000
+
+TIMER_IRQ := 26
diff --git a/src/arch/aarch64/inc/arch_barriers.h b/src/arch/aarch64/inc/arch_barriers.h
new file mode 100644
index 0000000..4d4cf08
--- /dev/null
+++ b/src/arch/aarch64/inc/arch_barriers.h
@@ -0,0 +1,19 @@
+#ifndef _ARCH_BARRIERS_H
+#define _ARCH_BARRIERS_H
+
+static inline void dmb(void)
+{
+ __asm__ volatile("dmb sy");
+}
+
+static inline void dsb(void)
+{
+ __asm__ volatile("dsb sy");
+}
+
+static inline void isb(void)
+{
+ __asm__ volatile("isb");
+}
+
+#endif /* _ARCH_BARRIERS_H */
diff --git a/src/arch/aarch64/inc/arch_cpu.h b/src/arch/aarch64/inc/arch_cpu.h
new file mode 100644
index 0000000..e86983e
--- /dev/null
+++ b/src/arch/aarch64/inc/arch_cpu.h
@@ -0,0 +1,124 @@
+#ifndef _ARCH_CPU_H
+#define _ARCH_CPU_H
+
+#include <stdalign.h>
+#include <stddef.h>
+#include <stdint.h>
+
+struct arch_regs {
+ /* General purpose registers. */
+ uint64_t r[31];
+ uint64_t pc;
+ uint64_t spsr;
+
+ struct {
+ uint64_t vmpidr_el2;
+ uint64_t csselr_el1;
+ uint64_t sctlr_el1;
+ uint64_t actlr_el1;
+ uint64_t cpacr_el1;
+ uint64_t ttbr0_el1;
+ uint64_t ttbr1_el1;
+ uint64_t tcr_el1;
+ uint64_t esr_el1;
+ uint64_t afsr0_el1;
+ uint64_t afsr1_el1;
+ uint64_t far_el1;
+ uint64_t mair_el1;
+ uint64_t vbar_el1;
+ uint64_t contextidr_el1;
+ uint64_t tpidr_el0;
+ uint64_t tpidrro_el0;
+ uint64_t tpidr_el1;
+ uint64_t amair_el1;
+ uint64_t cntkctl_el1;
+ uint64_t sp_el0;
+ uint64_t sp_el1;
+ uint64_t par_el1;
+ uint64_t hcr_el2;
+ } lazy;
+};
+
+struct arch_page_table {
+ alignas(4096) uint64_t first[512];
+ alignas(4096) uint64_t entry0[512];
+ alignas(4096) uint64_t entry1[512];
+};
+
+static inline struct cpu *cpu(void)
+{
+ struct cpu *p;
+ __asm volatile("mrs %0, tpidr_el2" : "=r"(p));
+ return p;
+}
+
+static inline void arch_irq_disable(void)
+{
+ __asm volatile("msr DAIFSet, #0xf");
+}
+
+static inline void arch_irq_enable(void)
+{
+ __asm volatile("msr DAIFClr, #0xf");
+}
+
+static inline
+void arch_regs_init(struct arch_regs *r, size_t pc, size_t arg)
+{
+ /* TODO: Use constant here. */
+ r->spsr = 5 | /* M bits, set to EL1h. */
+ (0xf << 6); /* DAIF bits set; disable interrupts. */
+ r->pc = pc;
+ r->r[0] = arg;
+ r->lazy.hcr_el2 = (1u << 31) | /* RW bit. */
+// (7u << 3) | /* AMO, IMO, FMO bits. */
+ (3u << 13) | /* TWI, TWE bits. */
+ (1u << 2) | /* PTW, Protected Table Walk. */
+ (1u << 0); /* VM: enable stage-2 translation. */
+}
+
+static inline void arch_regs_set_irq(struct arch_regs *r)
+{
+ /* Set the VI bit. */
+ r->lazy.hcr_el2 |= (1u << 7);
+}
+
+static inline void arch_regs_clear_irq(struct arch_regs *r)
+{
+ /* Clear the VI bit. */
+ r->lazy.hcr_el2 &= ~(1u << 7);
+}
+
+/* TODO: Figure out what to do with this. */
+int32_t smc(size_t arg0, size_t arg1, size_t arg2, size_t arg3);
+
+static inline void arch_cpu_on(size_t id, void *ctx)
+{
+ void cpu_entry(void *ctx);
+ int32_t ret;
+
+ /*
+ * There's a race when turning a CPU on when it's in the process of
+ * turning off. We need to loop here while it is reported that the CPU
+ * is on (because it's about to turn itself off).
+ */
+ do {
+ /* CPU_ON */
+ ret = smc(0xC4000003, id, (size_t)&cpu_entry, (size_t)ctx);
+ } while (ret == -4); /* ALREADY_ON */
+}
+
+static inline void arch_cpu_off(void)
+{
+ /* CPU_OFF */
+ smc(0xC4000002, 0, 0, 0);
+}
+
+static inline void arch_set_vm_mm(struct arch_page_table *table)
+{
+ __asm volatile("msr vttbr_el2, %0" : : "r" ((size_t)table));
+}
+
+void arch_vptable_init(struct arch_page_table *table);
+
+#endif /* _ARCH_CPU_H */
diff --git a/src/arch/aarch64/inc/io.h b/src/arch/aarch64/inc/io.h
new file mode 100644
index 0000000..16f3112
--- /dev/null
+++ b/src/arch/aarch64/inc/io.h
@@ -0,0 +1,34 @@
+#ifndef _IO_H
+#define _IO_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "arch_barriers.h"
+
+static inline uint32_t io_read(size_t addr)
+{
+ return *(volatile uint32_t *)addr;
+}
+
+static inline uint32_t io_read_mb(size_t addr)
+{
+ uint32_t v = io_read(addr);
+ dsb();
+ isb();
+ return v;
+}
+
+static inline void io_write(size_t addr, uint32_t v)
+{
+ *(volatile uint32_t *)addr = v;
+}
+
+static inline void io_write_mb(size_t addr, uint32_t v)
+{
+ dsb();
+ isb();
+ io_write(addr, v);
+}
+
+#endif /* _IO_H */
diff --git a/src/arch/aarch64/mm.c b/src/arch/aarch64/mm.c
new file mode 100644
index 0000000..b062f85
--- /dev/null
+++ b/src/arch/aarch64/mm.c
@@ -0,0 +1,333 @@
+#include "arch_cpu.h"
+#include "dlog.h" /* TODO: Remove? */
+#include "msr.h"
+
+void arch_vptable_init(struct arch_page_table *table)
+{
+ uint64_t i;
+ uint64_t v;
+
+ dlog("ID_AA64MMFR0_EL1=0x%x\n", read_msr(ID_AA64MMFR0_EL1));
+
+ /* TODO: Check each bit. */
+ for (i = 0; i < 512; i++) {
+ table->entry0[i] = 1 |
+ (i << 30) | /* Address */
+ (1 << 10) | /* Access flag. */
+ (0 << 8) | /* sh: non-shareable. this preserves EL1. */
+ (3 << 6) | /* rw */
+ (0xf << 2); /* normal mem; preserves EL0/1. */
+ table->entry1[i] = 1 |
+ ((i+512) << 30) | /* Address */
+ (1 << 10) | /* Access flag. */
+ (0 << 8) | /* sh: non-shareable. this preserves EL1. */
+ (3 << 6) | /* rw */
+ (0xf << 2); /* normal mem; preserves EL0/1. */
+ table->first[i] = 0;
+ }
+
+ table->first[0] = (uint64_t)&table->entry0[0] | 3;
+ table->first[1] = (uint64_t)&table->entry1[0] | 3;
+
+ /* TODO: Where should this go? */
+ v =
+ (1u << 31) | /* RES1. */
+ (4 << 16) | /* PS: 44 bits. */
+ (0 << 14) | /* TG0: 4 KB granule. */
+ (3 << 12) | /* SH0: inner shareable. */
+ (1 << 10) | /* ORGN0: normal, cacheable ... */
+ (1 << 8) | /* IRGN0: normal, cacheable ... */
+ (2 << 6) | /* SL0: Start at level 0. */
+ (20 << 0); /* T0SZ: 44-bit input address size. */
+ write_msr(vtcr_el2, v);
+}
+
+#if 0
+#include "arch.h"
+
+#include <stdint.h>
+
+#include "alloc.h"
+#include "log.h"
+#include "msr.h"
+
+#define PAGE_BITS 12
+#define PAGE_SIZE (1 << PAGE_BITS)
+#define ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(uint64_t))
+#define INITIAL_LEVEL 1
+
+extern char text_begin[];
+extern char text_end[];
+extern char rodata_begin[];
+extern char rodata_end[];
+extern char data_begin[];
+extern char data_end[];
+extern char bin_end[];
+
+static uint64_t *ttbr;
+
+static inline size_t mm_entry_size(int level)
+{
+ return 1ull << (PAGE_BITS + (3 - level) * (PAGE_BITS - 3));
+}
+
+static inline size_t mm_level_end(size_t va, int level)
+{
+ size_t offset = (PAGE_BITS + (4 - level) * (PAGE_BITS - 3));
+ return ((va >> offset) + 1) << offset;
+}
+
+static inline size_t mm_index(size_t va, int level)
+{
+ size_t v = va >> (PAGE_BITS + (3 - level) * (PAGE_BITS - 3));
+ return v & ((1 << (PAGE_BITS - 3)) - 1);
+}
+
+static inline uint64_t mm_clear_attrs(uint64_t v)
+{
+ /* Clean bottom bits. */
+ v &= ~((1 << PAGE_BITS) - 1);
+
+ /* Clean top bits. */
+ v &= ((1ull << 59) - 1);
+
+ return v;
+}
+
+static inline uint64_t *mm_table_ptr(uint64_t pa)
+{
+ return (uint64_t *)mm_clear_attrs(pa);
+}
+
+static inline uint64_t mm_mode_to_attrs(uint64_t mode)
+{
+ uint64_t attrs =
+ (1 << 10) | /* Access flag. */
+ (2 << 8); /* sh -> outer shareable. */
+
+ /* TODO: This is different in s2. */
+ if (!(mode & MM_X)) {
+ attrs |= (1ull << 54); /* XN or UXN, [user] execute never. */
+
+ /* TODO: This is only ok in EL1, it is RES0 in EL2. */
+ attrs |= (1ull << 53); /* PXN, privileged execute never. */
+ }
+
+ /* TODO: This is different in s2. */
+ if (mode & MM_W)
+ attrs |= (0 << 6); /* rw, no EL0 access. */
+ else
+ attrs |= (2 << 6); /* read-only, no EL0 access. */
+
+ if (mode & MM_D)
+ attrs |= (0 << 2); /* device memory in MAIR_ELx. */
+ else
+ attrs |= (1 << 2); /* normal memory in MAIR_ELx. */
+
+ return attrs;
+}
+
+static uint64_t *mm_populate_table(uint64_t *table, uint64_t index)
+{
+ uint64_t *ntable;
+ uint64_t v = table[index];
+ uint64_t i;
+
+ /* Check if table entry already exists. */
+ if (v & 1) {
+ /* Fail if it's a block one. */
+ if (!(v & 2))
+ return NULL;
+ return mm_table_ptr(v);
+ }
+
+ /* Allocate a new table entry and initialize it. */
+ ntable = halloc_aligned(PAGE_SIZE, PAGE_SIZE);
+ if (!ntable)
+ return NULL;
+
+ for (i = 0; i < ENTRIES_PER_LEVEL; i++)
+ ntable[i] = 0;
+
+ /* Fill in the new entry. */
+ table[index] = (size_t)ntable | 0x3;
+
+ return ntable;
+}
+
+static bool mm_map_level(size_t va, size_t va_end, size_t pa,
+ uint64_t attrs, uint64_t *table, int level)
+{
+ size_t i = mm_index(va, level);
+ size_t va_level_end = mm_level_end(va, level);
+ size_t entry_size = mm_entry_size(level);
+
+ /* Cap va_end so that we don't go over of the current level max. */
+ if (va_end > va_level_end)
+ va_end = va_level_end;
+
+ /* Fill each entry in the table. */
+ while (va < va_end) {
+ if (level == 3) {
+ table[i] = pa | 0x3 | attrs;
+ } else {
+ uint64_t *nt = mm_populate_table(table, i);
+ if (!nt) {
+ /* TODO: Undo all the work so far? */
+ return false;
+ }
+
+ if (!mm_map_level(va, va_end, pa, attrs, nt, level+1)) {
+ /* TODO: Undo all the work so far? */
+ return false;
+ }
+ }
+
+ va += entry_size;
+ pa += entry_size;
+ i++;
+ }
+
+ return true;
+}
+
+bool mm_map_range(size_t va, size_t size, uint64_t pa, uint64_t mode)
+{
+ uint64_t attrs = mm_mode_to_attrs(mode);
+ uint64_t end = mm_clear_attrs(va + size + PAGE_SIZE - 1);
+
+ va = mm_clear_attrs(va);
+ pa = mm_clear_attrs(pa);
+
+ return mm_map_level(va, end, pa, attrs, ttbr, INITIAL_LEVEL);
+}
+
+bool mm_map_page(size_t va, size_t pa, uint64_t mode)
+{
+ size_t i;
+ uint64_t attrs = mm_mode_to_attrs(mode);
+ uint64_t *table = ttbr;
+
+ va = mm_clear_attrs(va);
+ pa = mm_clear_attrs(pa);
+ for (i = INITIAL_LEVEL; i < 3; i++) {
+ table = mm_populate_table(table, mm_index(va, i));
+ if (!table)
+ return false;
+ }
+
+ /* We reached level 3. */
+ i = mm_index(va, 3);
+ table[i] = pa | 0x3 | attrs;
+ return true;
+}
+
+bool arch_init_mm(void)
+{
+#if 0
+ size_t i;
+
+ /* Allocate the first level, then zero it out. */
+ ttbr = halloc_aligned(PAGE_SIZE, PAGE_SIZE);
+ if (!ttbr)
+ return false;
+
+ for (i = 0; i < ENTRIES_PER_LEVEL; i++)
+ ttbr[i] = 0;
+
+ /* Map page for uart. */
+ mm_map_page(PL011_BASE, PL011_BASE, MM_R | MM_W | MM_D);
+
+ /* Map page for gic. */
+ mm_map_page(GICD_BASE, GICD_BASE, MM_R | MM_W | MM_D);
+ mm_map_page(GICC_BASE, GICC_BASE, MM_R | MM_W | MM_D);
+
+ /* Map each section. */
+ mm_map_range((size_t)text_begin, text_end - text_begin,
+ (size_t)text_begin, MM_X);
+
+ mm_map_range((size_t)rodata_begin, rodata_end - rodata_begin,
+ (size_t)rodata_begin, MM_R);
+
+ mm_map_range((size_t)data_begin, data_end - data_begin,
+ (size_t)data_begin, MM_R | MM_W);
+
+ mm_map_range((size_t)bin_end, 20 * 1024 * 1024, (size_t)bin_end,
+ MM_R | MM_W);
+#endif
+ log(INFO, "About to enable mmu.\n");
+ enable_mmu(ttbr);
+ log(INFO, "mmu is on.\n");
+
+ return true;
+}
+
+static void arch_mm_dump_table(uint64_t *table, int level)
+{
+ uint64_t i, j;
+ for (i = 0; i < ENTRIES_PER_LEVEL; i++) {
+ if ((table[i] & 1) == 0)
+ continue;
+
+ for (j = 1 * (level - INITIAL_LEVEL + 1); j; j--)
+ log(INFO, "\t");
+ log(INFO, "%x: %x\n", i, table[i]);
+ if (level >= 3)
+ continue;
+
+ if ((table[i] & 3) == 3)
+ arch_mm_dump_table(mm_table_ptr(table[i]), level + 1);
+ }
+}
+
+void enable_mmu(uint64_t *table)
+{
+ //uint32_t v;
+
+ enable_s2();
+#if 0
+ /*
+ * 0 -> Device-nGnRnE memory
+ * 1 -> Normal memory, Inner/Outer Write-Back Non-transient,
+ * Write-Alloc, Read-Alloc.
+ */
+ write_msr(mair_el2, 0xff00);
+ write_msr(ttbr0_el2, table);
+
+ /*
+ * Configure tcr_el2.
+ */
+ v =
+ (1 << 20) | /* TBI, top byte ignored. */
+ (2 << 16) | /* PS, Physical Address Size, 40 bits, 1TB. */
+ (0 << 14) | /* TG0, granule size, 4KB. */
+ (3 << 12) | /* SH0, inner shareable. */
+ (1 << 10) | /* ORGN0, normal mem, WB RA WA Cacheable. */
+ (1 << 8) | /* IRGN0, normal mem, WB RA WA Cacheable. */
+ (25 << 0) | /* T0SZ, input address is 2^39 bytes. */
+ 0;
+ write_msr(tcr_el2, v);
+
+ v =
+ (1 << 0) | /* M, enable stage 1 EL2 MMU. */
+ (1 << 1) | /* A, enable alignment check faults. */
+ // TODO: Enable this.
+// (1 << 2) | /* C, data cache enable. */
+ (1 << 3) | /* SA, enable stack alignment check. */
+ (3 << 4) | /* RES1 bits. */
+ (1 << 11) | /* RES1 bit. */
+ (1 << 12) | /* I, instruction cache enable. */
+ (1 << 16) | /* RES1 bit. */
+ (1 << 18) | /* RES1 bit. */
+ (1 << 19) | /* WXN bit, writable execute never . */
+ (3 << 22) | /* RES1 bits. */
+ (3 << 28) | /* RES1 bits. */
+ 0;
+
+ __asm volatile("dsb sy");
+ __asm volatile("isb");
+ write_msr(sctlr_el2, v);
+ __asm volatile("isb");
+#endif
+}
+#endif
diff --git a/src/arch/aarch64/msr.h b/src/arch/aarch64/msr.h
new file mode 100644
index 0000000..e242cc2
--- /dev/null
+++ b/src/arch/aarch64/msr.h
@@ -0,0 +1,19 @@
+#ifndef _MSR_H
+#define _MSR_H
+
+#include <stddef.h>
+
+#define read_msr(name) \
+ __extension__({ \
+ size_t __v; \
+ __asm volatile("mrs %0, " #name : "=r" (__v)); \
+ __v; \
+ })
+
+#define write_msr(name, value) \
+ do { \
+ __asm volatile("msr " #name ", %x0" \
+ : : "rZ" ((size_t)value)); \
+ } while (0)
+
+#endif /* _MSR_H */
diff --git a/src/arch/aarch64/offsets.c b/src/arch/aarch64/offsets.c
new file mode 100644
index 0000000..f16e242
--- /dev/null
+++ b/src/arch/aarch64/offsets.c
@@ -0,0 +1,10 @@
+#include "cpu.h"
+#include "decl_offsets.h"
+
+void dummy(void)
+{
+ DECL(CPU_CURRENT, struct cpu, current);
+ DECL(CPU_STACK_BOTTOM, struct cpu, stack_bottom);
+ DECL(VCPU_REGS, struct vcpu, regs);
+ DECL(VCPU_LAZY, struct vcpu, regs.lazy);
+}
diff --git a/src/arch/aarch64/pl011.c b/src/arch/aarch64/pl011.c
new file mode 100644
index 0000000..02df2eb
--- /dev/null
+++ b/src/arch/aarch64/pl011.c
@@ -0,0 +1,34 @@
+#include "dlog.h"
+#include "io.h"
+
+/* UART Data Register. */
+#define UARTDR 0
+
+/* UART Flag Register. */
+#define UARTFR 0x018
+
+/* UART Flag Register bit: transmit fifo is full. */
+#define UARTFR_TXFF (1 << 5)
+
+/* UART Flag Register bit: UART is busy. */
+#define UARTFR_BUSY (1 << 3)
+
+void arch_putchar(char c)
+{
+ /* Print a carriage-return as well. */
+ if (c == '\n')
+ arch_putchar('\r');
+
+ /* Wait until there is room in the tx buffer. */
+ while (io_read(PL011_BASE + UARTFR) & UARTFR_TXFF);
+
+ dmb();
+
+ /* Write the character out. */
+ io_write(PL011_BASE + UARTDR, c);
+
+ dmb();
+
+ /* Wait until the UART is no longer busy. */
+ while (io_read_mb(PL011_BASE + UARTFR) & UARTFR_BUSY);
+}
diff --git a/src/arch/aarch64/qemu.mk b/src/arch/aarch64/qemu.mk
new file mode 100644
index 0000000..c137ca8
--- /dev/null
+++ b/src/arch/aarch64/qemu.mk
@@ -0,0 +1,10 @@
+LOAD_ADDRESS := 0x40001000
+PL011_BASE := 0x09000000
+PL011 := 1
+GICV3 := 1
+
+GICD_BASE := 0x08000000
+GICC_BASE := 0x08010000
+GICR_BASE := 0x080A0000
+
+TIMER_IRQ := 26
diff --git a/src/arch/aarch64/rules.mk b/src/arch/aarch64/rules.mk
new file mode 100644
index 0000000..ea0d53d
--- /dev/null
+++ b/src/arch/aarch64/rules.mk
@@ -0,0 +1,19 @@
+SRCS += entry.S
+SRCS += exceptions.S
+SRCS += handler.c
+SRCS += mm.c
+SRCS += timer.c
+
+OFFSET_SRCS += offsets.c
+
+ifeq ($(GICV2),1)
+ SRCS += gicv2.c
+endif
+
+ifeq ($(GICV3),1)
+ SRCS += gicv3.c
+endif
+
+ifeq ($(PL011),1)
+ SRCS += pl011.c
+endif
diff --git a/src/arch/aarch64/timer.c b/src/arch/aarch64/timer.c
new file mode 100644
index 0000000..c4eaecd
--- /dev/null
+++ b/src/arch/aarch64/timer.c
@@ -0,0 +1,49 @@
+#include <stdint.h>
+
+#include "cpu.h"
+#include "irq.h"
+#include "msr.h"
+
+static bool timer_irq_handler(void *context, struct irq_handle *h)
+{
+ struct cpu *c = cpu();
+
+ /* Mask timer interrupt and dismiss current interrupt. */
+ write_msr(cnthp_ctl_el2, read_msr(cnthp_ctl_el2) | 0x2);
+ irq_dismiss(h);
+
+ /* Execute user-supplied callback. */
+ if (c->timer_cb)
+ return c->timer_cb(c->timer_context);
+
+ return false;
+}
+
+void timer_set(uint64_t time, bool (*cb)(void *), void *context)
+{
+ uint64_t v;
+ struct cpu *c = cpu();
+
+ /* Save callback. */
+ c->timer_cb = cb;
+ c->timer_context = context;
+
+ /* TODO: There's a better way to advance this. */
+ v = read_msr(cntpct_el0);
+ write_msr(CNTHP_CVAL_EL2, v + time);
+ write_msr(cnthp_ctl_el2, 1); /* enable. */
+}
+
+void timer_init(void)
+{
+ irq_config(TIMER_IRQ, irq_trigger_level, irq_polarity_active_high,
+ timer_irq_handler, NULL);
+}
+
+void timer_init_percpu(void)
+{
+ /* Mask timer interrupt for now. */
+ write_msr(cnthp_ctl_el2, read_msr(cnthp_ctl_el2) | 0x2);
+
+ irq_enable(TIMER_IRQ);
+}
diff --git a/src/cpio.c b/src/cpio.c
new file mode 100644
index 0000000..c4add22
--- /dev/null
+++ b/src/cpio.c
@@ -0,0 +1,77 @@
+#include "cpio.h"
+
+#include <stdint.h>
+
+#include "std.h"
+
+#pragma pack(push, 1)
+struct cpio_header {
+ uint16_t magic;
+ uint16_t dev;
+ uint16_t ino;
+ uint16_t mode;
+ uint16_t uid;
+ uint16_t gid;
+ uint16_t nlink;
+ uint16_t rdev;
+ uint16_t mtime[2];
+ uint16_t namesize;
+ uint16_t filesize[2];
+};
+#pragma pack(pop)
+
+void cpio_init(struct cpio *c, const void *buf, size_t size)
+{
+ c->first = buf;
+ c->total_size = size;
+}
+
+void cpio_init_iter(struct cpio *c, struct cpio_iter *iter)
+{
+ iter->cur = c->first;
+ iter->size_left = c->total_size;
+}
+
+bool cpio_next(struct cpio_iter *iter, const char **name,
+ const void **contents, size_t *size)
+{
+ const struct cpio_header *h = iter->cur;
+ size_t size_left;
+ size_t filelen;
+ size_t namelen;
+
+ size_left = iter->size_left;
+ if (size_left < sizeof(struct cpio_header))
+ return false;
+
+ /* TODO: Check magic. */
+
+ size_left -= sizeof(struct cpio_header);
+ namelen = (h->namesize + 1) & ~1;
+ if (size_left < namelen)
+ return false;
+
+ size_left -= namelen;
+ filelen = (size_t)h->filesize[0] << 16 | h->filesize[1];
+ if (size_left < filelen)
+ return false;
+
+ /* TODO: Check that string is null-terminated. */
+ /* TODO: Check that trailler is not returned. */
+
+ /* Stop enumerating files when we hit the end marker. */
+ if (!strcmp((const char *)(iter->cur + 1), "TRAILER!!!"))
+ return false;
+
+ size_left -= filelen;
+
+ *name = (const char *)(iter->cur + 1);
+ *contents = *name + namelen;
+ *size = filelen;
+
+ iter->cur = (struct cpio_header *)((char *)*contents + filelen);
+ iter->cur = (struct cpio_header *)(char *)(((size_t)iter->cur + 1) & ~1);
+ iter->size_left = size_left;
+
+ return true;
+}
diff --git a/src/cpu.c b/src/cpu.c
new file mode 100644
index 0000000..85646c6
--- /dev/null
+++ b/src/cpu.c
@@ -0,0 +1,161 @@
+#include "cpu.h"
+
+#include "arch_cpu.h"
+#include "dlog.h"
+#include "std.h"
+#include "timer.h"
+#include "vm.h"
+
+struct new_old_vcpu {
+ struct vcpu *new;
+ struct vcpu *old;
+};
+
+void cpu_init(struct cpu *c)
+{
+ /* TODO: Assumes that c is zeroed out already. */
+ sl_init(&c->lock);
+ list_init(&c->ready_queue);
+ c->irq_disable_count = 1;
+}
+
+void cpu_irq_enable(struct cpu *c)
+{
+ c->irq_disable_count--;
+ if (!c->irq_disable_count)
+ arch_irq_enable();
+}
+
+void cpu_irq_disable(struct cpu *c)
+{
+ if (!c->irq_disable_count)
+ arch_irq_disable();
+ c->irq_disable_count++;
+}
+
+void cpu_on(struct cpu *c)
+{
+ sl_lock(&c->lock);
+ if (!c->cpu_on_count) {
+ /* The CPU is currently off, we need to turn it on. */
+ arch_cpu_on(c->id, c);
+ }
+ c->cpu_on_count++;
+ sl_unlock(&c->lock);
+}
+
+/*
+ * This must be called only from the same CPU.
+ */
+void cpu_off(struct cpu *c)
+{
+ bool on;
+
+ sl_lock(&c->lock);
+ c->cpu_on_count--;
+ on = c->cpu_on_count > 0;
+ sl_unlock(&c->lock);
+
+ if (!on)
+ arch_cpu_off();
+}
+
+void vcpu_ready(struct vcpu *v)
+{
+ struct cpu *c = v->cpu;
+
+ sl_lock(&c->lock);
+ if (!v->is_runnable) {
+ v->is_runnable = true;
+ list_append(&c->ready_queue, &v->links);
+ /* TODO: Send IPI to cpu if needed. */
+ }
+ sl_unlock(&c->lock);
+}
+
+void vcpu_unready(struct vcpu *v)
+{
+ struct cpu *c = v->cpu;
+
+ sl_lock(&c->lock);
+ if (v->is_runnable) {
+ v->is_runnable = false;
+ list_remove(&v->links);
+ }
+ sl_unlock(&c->lock);
+}
+
+#if 0
+static bool cpu_schedule_next(void *ctx)
+{
+ /* Indicate that a new vcpu should be chosen. */
+ return true;
+}
+#endif
+
+struct new_old_vcpu cpu_next_vcpu(void)
+{
+ struct cpu *c = cpu();
+ struct new_old_vcpu ret;
+ struct vcpu *next;
+ bool switch_mm;
+
+ /* TODO: Check if too soon. */
+
+ sl_lock(&c->lock);
+
+ ret.old = c->current;
+ if (list_empty(&c->ready_queue)) {
+ bool first = true;
+ c->current = NULL;
+ do {
+ sl_unlock(&c->lock);
+ /* TODO: Implement this. Enable irqs. */
+ if (first) {
+ dlog("CPU%d waiting for work...\n", c->id);
+ first = false;
+ }
+ sl_lock(&c->lock);
+ } while (list_empty(&c->ready_queue));
+ dlog("CPU%d found work!\n", c->id);
+ }
+
+ next = LIST_ELEM(c->ready_queue.next, struct vcpu, links);
+ if (next->links.next != &c->ready_queue) {
+ /* Move new vcpu to the end of ready queue. */
+ list_remove(&next->links);
+ list_append(&c->ready_queue, &next->links);
+ }
+
+ c->current = next;
+
+ if (next->interrupt) {
+ arch_regs_set_irq(&next->regs);
+ next->interrupt = false;
+ } else {
+ arch_regs_clear_irq(&next->regs);
+ }
+
+ switch_mm = !ret.old || ret.old->vm != next->vm;
+
+ sl_unlock(&c->lock);
+
+ ret.new = next;
+
+ if (switch_mm)
+ arch_set_vm_mm(&next->vm->page_table);
+
+ /* TODO: Only set this when there is a next thing to run. */
+ /* Set timer again. */
+ //timer_set(5 * 1000000, cpu_schedule_next, NULL);
+
+ return ret;
+}
+
+void vcpu_init(struct vcpu *vcpu, struct cpu *cpu, struct vm *vm)
+{
+ memset(vcpu, 0, sizeof(*vcpu));
+ vcpu->cpu = cpu;
+ vcpu->vm = vm;
+ /* TODO: Initialize vmid register. */
+}
diff --git a/src/dlog.c b/src/dlog.c
new file mode 100644
index 0000000..c4d49ce
--- /dev/null
+++ b/src/dlog.c
@@ -0,0 +1,272 @@
+#include "dlog.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdarg.h>
+
+#include "arch.h"
+#include "spinlock.h"
+#include "std.h"
+
+#define FLAG_SPACE 0x01
+#define FLAG_ZERO 0x02
+#define FLAG_MINUS 0x04
+#define FLAG_PLUS 0x08
+#define FLAG_ALT 0x10
+#define FLAG_UPPER 0x20
+#define FLAG_NEG 0x40
+
+/*
+ * Prints a raw string to the debug log and returns its length.
+ */
+static size_t print_raw_string(const char *str)
+{
+ const char *c = str;
+ while (*c != '\0')
+ arch_putchar(*c++);
+ return c - str;
+}
+
+/*
+ * Prints a formatted string to the debug log. The format includes a minimum
+ * width, the fill character, and flags (whether to align to left or right).
+ *
+ * str is the full string, while suffix is a pointer within str that indicates
+ * where the suffix begins. This is used when printing right-aligned numbers
+ * with a zero fill; for example, -10 with width 4 should be padded to -010,
+ * so suffix would point to index one of the "-10" string .
+ */
+static void print_string(const char *str, const char *suffix, size_t width,
+ int flags, char fill)
+{
+ size_t len = suffix - str;
+
+ /* Print the string up to the beginning of the suffix. */
+ while (str != suffix)
+ arch_putchar(*str++);
+
+ if (flags & FLAG_MINUS) {
+ /* Left-aligned. Print suffix, then print padding if needed. */
+ len += print_raw_string(suffix);
+ while (len < width) {
+ arch_putchar(' ');
+ len++;
+ }
+ return;
+ }
+
+ /* Fill until we reach the desired length. */
+ len += strlen(suffix);
+ while (len < width) {
+ arch_putchar(fill);
+ len++;
+ }
+
+ /* Now print the rest of the string. */
+ print_raw_string(suffix);
+}
+
+/*
+ * Prints a number to the debug log. The caller specifies the base, its minimum
+ * width and printf-style flags.
+ */
+static void print_num(size_t v, size_t base, size_t width, int flags)
+{
+ static const char *digits_lower = "0123456789abcdefx";
+ static const char *digits_upper = "0123456789ABCDEFX";
+ const char *d = (flags & FLAG_UPPER) ? digits_upper : digits_lower;
+ char buf[51];
+ char *ptr = buf + sizeof(buf) - 1;
+ char *num;
+ *ptr = '\0';
+ do {
+ --ptr;
+ *ptr = d[v % base];
+ v /= base;
+ } while (v);
+
+ /* Num stores where the actual number begins. */
+ num = ptr;
+
+ /* Add prefix if requested. */
+ if (flags & FLAG_ALT) {
+ switch (base) {
+ case 16:
+ ptr -= 2;
+ ptr[0] = '0';
+ ptr[1] = d[16];
+ break;
+
+ case 8:
+ ptr--;
+ *ptr = '0';
+ break;
+ }
+ }
+
+ /* Add sign if requested. */
+ if (flags & FLAG_NEG)
+ *--ptr = '-';
+ else if (flags & FLAG_PLUS)
+ *--ptr = '+';
+ else if (flags & FLAG_SPACE)
+ *--ptr = ' ';
+
+ if (flags & FLAG_ZERO)
+ print_string(ptr, num, width, flags, '0');
+ else
+ print_string(ptr, ptr, width, flags, ' ');
+}
+
+/*
+ * Parses the optional flags field of a printf-style format. It returns the spot
+ * on the string where a non-flag character was found.
+ */
+static const char *parse_flags(const char *p, int *flags)
+{
+ for (;;) {
+ switch (*p) {
+ case ' ':
+ *flags |= FLAG_SPACE;
+ break;
+
+ case '0':
+ *flags |= FLAG_ZERO;
+ break;
+
+ case '-':
+ *flags |= FLAG_MINUS;
+ break;
+
+ case '+':
+ *flags |= FLAG_PLUS;
+
+ case '#':
+ *flags |= FLAG_ALT;
+ break;
+
+ default:
+ return p;
+ }
+ p++;
+ }
+}
+
+/*
+ * Prints the given format string to the debug log.
+ */
+void dlog(const char *str, ...)
+{
+ static struct spinlock sl = SPINLOCK_INIT;
+ const char *p;
+ va_list args;
+ size_t w;
+ int flags;
+ char buf[2];
+
+ va_start(args, str);
+
+ sl_lock(&sl);
+
+ for (p = str; *p; p++) {
+ switch (*p) {
+ default:
+ arch_putchar(*p);
+ break;
+
+ case '%':
+ /* Read optional flags. */
+ flags = 0;
+ p = parse_flags(p + 1, &flags) - 1;
+
+ /* Read the minimum width, if one is specified. */
+ w = 0;
+ while (p[1] >= '0' && p[1] <= '9') {
+ w = (w * 10) + (p[1] - '0');
+ p++;
+ }
+
+ /* Read minimum width from arguments. */
+ if (w == 0 && p[1] == '*') {
+ int v = va_arg(args, int);
+ if (v >= 0) {
+ w = v;
+ } else {
+ w = -v;
+ flags |= FLAG_MINUS;
+ }
+ p++;
+ }
+
+ /* Handle the format specifier. */
+ switch (p[1]) {
+ case 's':
+ {
+ char *str = va_arg(args, char *);
+ print_string(str, str, w, flags, ' ');
+ }
+ p++;
+ break;
+
+ case 'd':
+ case 'i':
+ {
+ int v = va_arg(args, int);
+ if (v < 0) {
+ flags |= FLAG_NEG;
+ v = -v;
+ }
+
+ print_num((size_t)v, 10, w, flags);
+ }
+ p++;
+ break;
+
+ case 'X':
+ flags |= FLAG_UPPER;
+ print_num(va_arg(args, size_t), 16, w, flags);
+ break;
+
+ case 'p':
+ print_num(va_arg(args, size_t), 16,
+ sizeof(size_t) * 2, FLAG_ZERO);
+ p++;
+ break;
+
+ case 'x':
+ print_num(va_arg(args, size_t), 16, w, flags);
+ p++;
+ break;
+
+ case 'u':
+ print_num(va_arg(args, size_t), 10, w, flags);
+ p++;
+ break;
+
+ case 'o':
+ print_num(va_arg(args, size_t), 8, w, flags);
+ p++;
+ break;
+
+ case 'c':
+ buf[1] = 0;
+ buf[0] = va_arg(args, int);
+ print_string(buf, buf, w, flags, ' ');
+ p++;
+ break;
+
+ case '%':
+ break;
+
+ default:
+ arch_putchar('%');
+ }
+
+ break;
+ }
+ }
+
+ sl_unlock(&sl);
+
+ va_end(args);
+}
diff --git a/src/fdt.c b/src/fdt.c
new file mode 100644
index 0000000..035ec64
--- /dev/null
+++ b/src/fdt.c
@@ -0,0 +1,327 @@
+#include "fdt.h"
+
+#include <stdint.h>
+
+#include "dlog.h"
+#include "std.h"
+
+struct fdt_header {
+ uint32_t magic;
+ uint32_t totalsize;
+ uint32_t off_dt_struct;
+ uint32_t off_dt_strings;
+ uint32_t off_mem_rsvmap;
+ uint32_t version;
+ uint32_t last_comp_version;
+ uint32_t boot_cpuid_phys;
+ uint32_t size_dt_strings;
+ uint32_t size_dt_struct;
+};
+
+struct fdt_reserve_entry {
+ uint64_t address;
+ uint64_t size;
+};
+
+enum fdt_token {
+ FDT_BEGIN_NODE = 1,
+ FDT_END_NODE = 2,
+ FDT_PROP = 3,
+ FDT_NOP = 4,
+ FDT_END = 9,
+};
+
+struct fdt_tokenizer {
+ const char *cur;
+ const char *end;
+ const char *strs;
+};
+
+#define FDT_VERSION 17
+#define FDT_MAGIC 0xd00dfeed
+
+static void fdt_tokenizer_init(struct fdt_tokenizer *t, const char *strs,
+ const char *begin, const char *end)
+{
+ t->strs = strs;
+ t->cur = begin;
+ t->end = end;
+}
+
+static void fdt_tokenizer_align(struct fdt_tokenizer *t)
+{
+ t->cur = (char *)(((size_t)t->cur + 3) & ~3);
+}
+
+static bool fdt_tokenizer_uint32(struct fdt_tokenizer *t, uint32_t *res)
+{
+ const char *next = t->cur + sizeof(*res);
+ if (next > t->end)
+ return false;
+
+ *res = ntohl(*(uint32_t *)t->cur);
+ t->cur = next;
+
+ return true;
+}
+
+static bool fdt_tokenizer_token(struct fdt_tokenizer *t, uint32_t *res)
+{
+ uint32_t v;
+
+ while (fdt_tokenizer_uint32(t, &v)) {
+ if (v != FDT_NOP) {
+ *res = v;
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool fdt_tokenizer_bytes(struct fdt_tokenizer *t,
+ const char **res, size_t size)
+{
+ const char *next = t->cur + size;
+ if (next > t->end)
+ return false;
+
+ *res = t->cur;
+ t->cur = next;
+ fdt_tokenizer_align(t);
+
+ return true;
+}
+
+static bool fdt_tokenizer_str(struct fdt_tokenizer *t, const char **res)
+{
+ const char *p;
+ for (p = t->cur; p < t->end; p++) {
+ if (!*p) {
+ /* Found the end of the string. */
+ *res = t->cur;
+ t->cur = p + 1;
+ fdt_tokenizer_align(t);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void fdt_root_node(struct fdt_node *node, const struct fdt_header *hdr)
+{
+ uint32_t max_ver;
+ uint32_t min_ver;
+ uint32_t begin = ntohl(hdr->off_dt_struct);
+ uint32_t size = ntohl(hdr->size_dt_struct);
+
+ memset(node, 0, sizeof(*node));
+
+ /* Check the magic number before anything else. */
+ if (hdr->magic != ntohl(FDT_MAGIC))
+ return;
+
+ /* Check the version. */
+ max_ver = ntohl(hdr->version);
+ min_ver = ntohl(hdr->last_comp_version);
+ if (FDT_VERSION < min_ver || FDT_VERSION > max_ver)
+ return;
+
+ /* TODO: Verify that it is all within the fdt. */
+ node->begin = (const char *)hdr + begin;
+ node->end = node->begin + size;
+
+ /* TODO: Verify strings as well. */
+ node->strs = (char *)hdr + ntohl(hdr->off_dt_strings);
+}
+
+static bool fdt_next_property(struct fdt_tokenizer *t, const char **name,
+ const char **buf, uint32_t *size)
+{
+ uint32_t token;
+ uint32_t nameoff;
+
+ if (!fdt_tokenizer_token(t, &token))
+ return false;
+
+ if (token != FDT_PROP) {
+ /* Rewind so that caller will get the same token. */
+ t->cur -= sizeof(uint32_t);
+ return false;
+ }
+
+ if (!fdt_tokenizer_uint32(t, size) ||
+ !fdt_tokenizer_uint32(t, &nameoff) ||
+ !fdt_tokenizer_bytes(t, buf, *size)) {
+ /*
+ * Move cursor to the end so that caller won't get any new
+ * tokens.
+ */
+ t->cur = t->end;
+ return false;
+ }
+
+ /* TODO: Need to verify the strings. */
+ *name = t->strs + nameoff;
+
+ return true;
+}
+
+static bool fdt_next_subnode(struct fdt_tokenizer *t, const char **name)
+{
+ uint32_t token;
+
+ if (!fdt_tokenizer_token(t, &token))
+ return false;
+
+ if (token != FDT_BEGIN_NODE) {
+ /* Rewind so that caller will get the same token. */
+ t->cur -= sizeof(uint32_t);
+ return false;
+ }
+
+ if (!fdt_tokenizer_str(t, name)) {
+ /*
+ * Move cursor to the end so that caller won't get any new
+ * tokens.
+ */
+ t->cur = t->end;
+ return false;
+ }
+
+ return true;
+}
+
+static void fdt_skip_properties(struct fdt_tokenizer *t)
+{
+ const char *name;
+ const char *buf;
+ uint32_t size;
+ while (fdt_next_property(t, &name, &buf, &size));
+}
+
+static bool fdt_skip_node(struct fdt_tokenizer *t)
+{
+ const char *name;
+ uint32_t token;
+ size_t pending = 1;
+
+ fdt_skip_properties(t);
+
+ do {
+ while (fdt_next_subnode(t, &name)) {
+ fdt_skip_properties(t);
+ pending++;
+ }
+
+ if (!fdt_tokenizer_token(t, &token))
+ return false;
+
+ if (token != FDT_END_NODE) {
+ t->cur = t->end;
+ return false;
+ }
+
+ pending--;
+ } while (pending);
+
+ return true;
+}
+
+bool fdt_read_property(const struct fdt_node *node, const char *name,
+ const char **buf, uint32_t *size)
+{
+ struct fdt_tokenizer t;
+ const char *prop_name;
+
+ fdt_tokenizer_init(&t, node->strs, node->begin, node->end);
+
+ while (fdt_next_property(&t, &prop_name, buf, size)) {
+ if (!strcmp(prop_name, name))
+ return true;
+ }
+
+ return false;
+}
+
+bool fdt_find_child(struct fdt_node *node, const char *child)
+{
+ struct fdt_tokenizer t;
+ const char *name;
+
+ fdt_tokenizer_init(&t, node->strs, node->begin, node->end);
+
+ fdt_skip_properties(&t);
+
+ while (fdt_next_subnode(&t, &name)) {
+ if (!strcmp(name, child)) {
+ node->begin = t.cur;
+ return true;
+ }
+
+ fdt_skip_node(&t);
+ }
+
+ return false;
+}
+
+void fdt_dump(struct fdt_header *hdr)
+{
+ uint32_t token;
+ size_t depth = 0;
+ const char *name;
+ struct fdt_tokenizer t;
+ struct fdt_node node;
+
+ /* Traverse the whole thing. */
+ fdt_root_node(&node, hdr);
+
+ fdt_tokenizer_init(&t, node.strs, node.begin, node.end);
+
+ do {
+ while (fdt_next_subnode(&t, &name)) {
+ const char *buf;
+ uint32_t size;
+
+ dlog("%*sNew node: \"%s\"\n", 2 * depth, "", name);
+ depth++;
+ while (fdt_next_property(&t, &name, &buf, &size)) {
+ size_t i;
+ dlog("%*sproperty: \"%s\" (", 2 * depth, "", name);
+ for (i = 0; i < size; i++)
+ dlog("%s%02x", i == 0 ? "" : " ", buf[i]);
+ dlog(")\n");
+ }
+ }
+
+ if (!fdt_tokenizer_token(&t, &token))
+ return;
+
+ if (token != FDT_END_NODE)
+ return;
+
+ depth--;
+ } while (depth);
+
+ dlog("fdt: off_mem_rsvmap=%u\n", ntohl(hdr->off_mem_rsvmap));
+ {
+ struct fdt_reserve_entry *e = (struct fdt_reserve_entry *)((size_t)hdr + ntohl(hdr->off_mem_rsvmap));
+ while (e->address || e->size) {
+ dlog("Entry: %p (0x%x bytes)\n", ntohll(e->address), ntohll(e->size));
+ e++;
+ }
+ }
+}
+
+void fdt_add_mem_reservation(struct fdt_header *hdr, size_t addr, size_t len)
+{
+ /* TODO: Clean this up. */
+ char *begin = (char *)hdr + ntohl(hdr->off_mem_rsvmap);
+ struct fdt_reserve_entry *e = (struct fdt_reserve_entry *)begin;
+ hdr->totalsize = htonl(ntohl(hdr->totalsize) + sizeof(struct fdt_reserve_entry));
+ hdr->off_dt_struct = htonl(ntohl(hdr->off_dt_struct) + sizeof(struct fdt_reserve_entry));
+ hdr->off_dt_strings = htonl(ntohl(hdr->off_dt_strings) + sizeof(struct fdt_reserve_entry));
+ memmove(begin + sizeof(struct fdt_reserve_entry), begin, ntohl(hdr->totalsize) - ntohl(hdr->off_mem_rsvmap));
+ e->address = htonll(addr);
+ e->size = htonll(len);
+}
diff --git a/src/hafnium.ld b/src/hafnium.ld
new file mode 100644
index 0000000..9949f03
--- /dev/null
+++ b/src/hafnium.ld
@@ -0,0 +1,41 @@
+ENTRY(entry)
+SECTIONS
+{
+ . = PREFERRED_LOAD_ADDRESS;
+ _orig_base = ABSOLUTE(.);
+
+ text_begin = .;
+ .init : {
+ *(.init.entry)
+ *(.init)
+ }
+ .text : { *(.text) }
+ text_end = .;
+
+ . = ALIGN(4096);
+ rodata_begin = .;
+ .rodata : { *(.rodata) }
+ .rela : ALIGN(8) {
+ rela_begin = .;
+ *(.rela .rela*)
+ rela_end = .;
+ }
+ rodata_end = .;
+
+ . = ALIGN(4096);
+ data_begin = .;
+ .data : { *(.data) }
+
+ /* The entry point code assumes that bss is 16-byte aligned. */
+ .bss ALIGN(16) : {
+ file_size = ABSOLUTE(. - PREFERRED_LOAD_ADDRESS);
+ bss_begin = .;
+ *(.bss COMMON)
+ . = ALIGN(16);
+ bss_end = .;
+ }
+ data_end = .;
+
+ . = ALIGN(4096);
+ bin_end = .;
+}
diff --git a/src/irq.c b/src/irq.c
new file mode 100644
index 0000000..d751c56
--- /dev/null
+++ b/src/irq.c
@@ -0,0 +1,41 @@
+#include "irq.h"
+
+#include "arch.h"
+
+struct irq_config {
+ void *cb_context;
+ bool (*cb)(void *context, struct irq_handle *);
+};
+
+/*
+ * TODO: Move this to write-once page so that we know it won't change in the
+ * future.
+ */
+static struct irq_config irq_configs[300];
+
+void irq_config(uint32_t num, enum irq_trigger t, enum irq_polarity p,
+ bool (*cb)(void *, struct irq_handle *), void *context)
+{
+ struct irq_config *cfg = irq_configs + num;
+
+ cfg->cb = cb;
+ cfg->cb_context = context;
+
+ arch_irq_config(num, t, p);
+}
+
+bool irq_handle(uint32_t num, struct irq_handle *h)
+{
+ struct irq_config *cfg = irq_configs + num;
+
+ return cfg->cb(cfg->cb_context, h);
+}
+
+void irq_init(void)
+{
+}
+
+void irq_init_percpu(void)
+{
+ arch_irq_init_percpu();
+}
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..346c798
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,230 @@
+#include <stdalign.h>
+#include <stdatomic.h>
+#include <stddef.h>
+
+#include "cpio.h"
+#include "cpu.h"
+#include "dlog.h"
+#include "fdt.h"
+#include "irq.h"
+#include "std.h"
+#include "timer.h"
+#include "vm.h"
+
+void *fdt;
+
+/* The stack to be used by the CPUs. */
+alignas(2 * sizeof(size_t)) char callstacks[STACK_SIZE * MAX_CPUS];
+
+/* State of all supported CPUs. The stack of the first one is initialized. */
+struct cpu cpus[MAX_CPUS] = {
+ {
+ .cpu_on_count = 1,
+ .stack_bottom = callstacks + STACK_SIZE,
+ },
+};
+
+bool fdt_find_node(struct fdt_node *node, const char *path)
+{
+ if (!fdt_find_child(node, ""))
+ return false;
+
+ while (*path) {
+ if (!fdt_find_child(node, path))
+ return false;
+ path += strlen(path);
+ }
+
+ return true;
+}
+
+bool fdt_read_number(struct fdt_node *node, const char *name, uint64_t *value)
+{
+ const char *data;
+ uint32_t size;
+ union {
+ volatile uint64_t v;
+ char a[8];
+ } t;
+
+ if (!fdt_read_property(node, name, &data, &size))
+ return false;
+
+ switch (size) {
+ case sizeof(uint32_t):
+ *value = ntohl(*(uint32_t *)data);
+ break;
+
+ case sizeof(uint64_t):
+ memcpy(t.a, data, sizeof(uint64_t));
+ *value = ntohll(t.v);
+ break;
+
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+bool fdt_write_number(struct fdt_node *node, const char *name, uint64_t value)
+{
+ const char *data;
+ uint32_t size;
+ union {
+ volatile uint64_t v;
+ char a[8];
+ } t;
+
+ if (!fdt_read_property(node, name, &data, &size))
+ return false;
+
+ switch (size) {
+ case sizeof(uint32_t):
+ *(uint32_t *)data = ntohl(value);
+ break;
+
+ case sizeof(uint64_t):
+ t.v = ntohll(value);
+ memcpy((void *)data, t.a, sizeof(uint64_t));
+ break;
+
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+static void relocate(const char *from, size_t size)
+{
+ extern char bin_end[];
+ size_t tmp = (size_t)&bin_end[0];
+ char *dest = (char *)((tmp + 0x80000 - 1) & ~(0x80000 - 1));
+ dlog("bin_end is at %p, copying to %p\n", &bin_end[0], dest);
+ memcpy(dest, from, size);
+}
+
+/* TODO: Remove this. */
+struct vm vm0;
+
+static void one_time_init(void)
+{
+ size_t i;
+
+ dlog("Initializing hafnium\n");
+
+ /*
+ * TODO: Re-enable this.
+ irq_init();
+ timer_init();
+ */
+
+ /* Initialize all CPUs. */
+ for (i = 0; i < MAX_CPUS; i++) {
+ struct cpu *c = cpus + i;
+ cpu_init(c);
+ c->id = i; /* TODO: Initialize ID. */
+ c->stack_bottom = callstacks + STACK_SIZE * (i + 1);
+ }
+
+ /* TODO: Code below this point should be removed from this function. */
+ /* TODO: Remove this. */
+
+ do {
+ struct fdt_node n;
+
+ fdt_root_node(&n, fdt);
+ if (!fdt_find_node(&n, "chosen\0")) {
+ dlog("Unable to find 'chosen'\n");
+ break;
+ }
+
+ uint64_t begin;
+ uint64_t end;
+
+ if (!fdt_read_number(&n, "linux,initrd-start", &begin)) {
+ dlog("Unable to read linux,initrd-start\n");
+ break;
+ }
+
+ if (!fdt_read_number(&n, "linux,initrd-end", &end)) {
+ dlog("Unable to read linux,initrd-end\n");
+ break;
+ }
+
+ dlog("Ramdisk: from %x to %x\n", begin, end);
+
+ struct cpio c;
+ struct cpio_iter iter;
+ cpio_init(&c, (void *)begin, end - begin);
+ cpio_init_iter(&c, &iter);
+
+ const char *name;
+ const void *fcontents;
+ size_t ramdisk = 0;
+ size_t ramdisk_end = 0;
+ size_t fsize;
+ while (cpio_next(&iter, &name, &fcontents, &fsize)) {
+ dlog("File: %s, size=%u\n", name, fsize);
+ if (!strcmp(name, "vm/vmlinuz")) {
+ relocate(fcontents, fsize);
+ continue;
+ }
+
+ if (!strcmp(name, "vm/initrd.img")) {
+ dlog("Found vm/ramdisk @ %p, %u bytes\n", fcontents, fsize);
+ ramdisk = (size_t)fcontents;
+ ramdisk_end = ramdisk + fsize;
+ continue;
+ }
+ }
+
+ dlog("Ramdisk; %p\n", ramdisk);
+
+ /* Patch FDT to point to new ramdisk. */
+ if (!fdt_write_number(&n, "linux,initrd-start", ramdisk)) {
+ dlog("Unable to write linux,initrd-start\n");
+ break;
+ }
+
+ if (!fdt_write_number(&n, "linux,initrd-end", ramdisk_end)) {
+ dlog("Unable to write linux,initrd-end\n");
+ break;
+ }
+
+ /*
+ * Patch fdt to point remove memory.
+ */
+ {
+ size_t tmp = (size_t)&relocate;
+ tmp = (tmp + 0x80000 - 1) & ~(0x80000 - 1);
+
+
+ fdt_add_mem_reservation(fdt, tmp & ~0xfffff, 0x80000);
+ vm_init(&vm0, cpus);
+ vm_start_vcpu(&vm0, 0, tmp, (size_t)fdt);
+ }
+ } while (0);
+}
+
+/*
+ * The entry point of CPUs when they are turned on. It is supposed to initialise
+ * all state and return; the caller will ensure that the next vcpu runs.
+ */
+void cpu_main(void)
+{
+ /* Do global one-time initialization just once. */
+ static atomic_flag inited = ATOMIC_FLAG_INIT;
+ if (!atomic_flag_test_and_set_explicit(&inited, memory_order_acq_rel))
+ one_time_init();
+
+ dlog("Starting up cpu %d\n", cpu() - cpus);
+
+ /* Do per-cpu initialization. */
+ /* TODO: What to do here? */
+ /*
+ irq_init_percpu();
+ timer_init_percpu();
+ */
+}
diff --git a/src/rules.mk b/src/rules.mk
new file mode 100644
index 0000000..1bed7fa
--- /dev/null
+++ b/src/rules.mk
@@ -0,0 +1,12 @@
+SRCS += alloc.c
+SRCS += cpio.c
+SRCS += cpu.c
+SRCS += fdt.c
+SRCS += irq.c
+SRCS += main.c
+SRCS += std.c
+SRCS += vm.c
+
+ifeq ($(DEBUG),1)
+ SRCS += dlog.c
+endif
diff --git a/src/std.c b/src/std.c
new file mode 100644
index 0000000..39ba972
--- /dev/null
+++ b/src/std.c
@@ -0,0 +1,84 @@
+#include "std.h"
+
+void *memset(void *s, int c, size_t n)
+{
+ char *p = (char *)s;
+ while (n--)
+ *p++ = c;
+ return s;
+}
+
+/*
+ * Calculates the length of the provided null-terminated string.
+ */
+size_t strlen(const char *str)
+{
+ const char *p = str;
+ while (*p)
+ p++;
+ return p - str;
+}
+
+void *memcpy(void *dst, const void *src, size_t n)
+{
+ char *x = dst;
+ const char *y = src;
+
+ while (n--) {
+ *x = *y;
+ x++;
+ y++;
+ }
+
+ return dst;
+}
+
+void *memmove(void *dst, const void *src, size_t n)
+{
+ char *x;
+ const char *y;
+
+ if (dst < src)
+ return memcpy(dst, src, n);
+
+ x = (char *)dst + n - 1;
+ y = (const char *)src + n - 1;
+
+ while (n--) {
+ *x = *y;
+ x--;
+ y--;
+ }
+
+ return dst;
+}
+
+int memcmp(const void *a, const void *b, size_t n)
+{
+ const char *x = a;
+ const char *y = b;
+
+ while (n--) {
+ if (*x != *y)
+ return *x - *y;
+ x++;
+ y++;
+ }
+
+ return 0;
+}
+
+int strcmp(const char *a, const char *b)
+{
+ const char *x = a;
+ const char *y = b;
+
+ while (*x != 0 && *y != 0) {
+ if (*x != *y)
+ return *x - *y;
+ x++;
+ y++;
+ }
+
+ return *x - *y;
+}
diff --git a/src/vm.c b/src/vm.c
new file mode 100644
index 0000000..ee6a5cd
--- /dev/null
+++ b/src/vm.c
@@ -0,0 +1,24 @@
+#include "vm.h"
+
+#include "cpu.h"
+
+void vm_init(struct vm *vm, struct cpu *cpus)
+{
+ size_t i;
+
+ /* Do basic initialization of vcpus. */
+ for (i = 0; i < MAX_CPUS; i++) {
+ vcpu_init(vm->vcpus + i, cpus + i, vm);
+ }
+
+ arch_vptable_init(&vm->page_table);
+}
+
+/* TODO: Shall we use index or id here? */
+void vm_start_vcpu(struct vm *vm, size_t index, size_t entry, size_t arg)
+{
+ struct vcpu *vcpu = vm->vcpus + index;
+ arch_regs_init(&vcpu->regs, entry, arg);
+ vcpu_ready(vcpu);
+ cpu_on(vcpu->cpu);
+}