Enable MMU in EL2.
diff --git a/inc/alloc.h b/inc/alloc.h
index b3fc110..4277495 100644
--- a/inc/alloc.h
+++ b/inc/alloc.h
@@ -7,5 +7,6 @@
void *halloc(size_t size);
void hfree(void *ptr);
void *halloc_aligned(size_t size, size_t align);
+void *halloc_aligned_nosync(size_t size, size_t align);
#endif /* _ALLOC_H */
diff --git a/inc/fdt.h b/inc/fdt.h
index 740d345..fcc1716 100644
--- a/inc/fdt.h
+++ b/inc/fdt.h
@@ -2,6 +2,7 @@
#define _FDT_H
#include <stdbool.h>
+#include <stddef.h>
#include <stdint.h>
struct fdt_node {
@@ -12,6 +13,8 @@
const char *strs;
};
+size_t fdt_header_size(void);
+size_t fdt_total_size(struct fdt_header *hdr);
void fdt_dump(struct fdt_header *hdr);
void fdt_root_node(struct fdt_node *node, const struct fdt_header *hdr);
bool fdt_find_child(struct fdt_node *node, const char *child);
diff --git a/inc/mm.h b/inc/mm.h
new file mode 100644
index 0000000..1af7496
--- /dev/null
+++ b/inc/mm.h
@@ -0,0 +1,43 @@
+#ifndef _MM_H
+#define _MM_H
+
+#include <stdbool.h>
+
+#include "arch_mm.h"
+
+struct mm_ptable {
+ struct arch_mm_ptable arch;
+ pte_t *table;
+};
+
+#define PAGE_SIZE (1 << PAGE_BITS)
+
+/* The following are arch-independent page mapping modes. */
+#define MM_MODE_R 0x01 /* read */
+#define MM_MODE_W 0x02 /* write */
+#define MM_MODE_X 0x04 /* execute */
+#define MM_MODE_D 0x08 /* device */
+
+/*
+ * This flag indicates that memory allocation must not use locks. This is
+ * relevant in systems where interlocked operations are only available after
+ * virtual memory is enabled.
+ */
+#define MM_MODE_NOSYNC 0x10
+
+/*
+ * This flag indicates that the mapping is intended to be used in a first
+ * stage translation table, which might have different encodings for the
+ * attribute bits than the second stage table.
+ */
+#define MM_MODE_STAGE1 0x20
+
+bool mm_ptable_init(struct mm_ptable *t, int mode);
+void mm_ptable_dump(struct mm_ptable *t);
+bool mm_ptable_map(struct mm_ptable *t, vaddr_t vaddr_begin, vaddr_t vaddr_end,
+ paddr_t paddr, int mode);
+bool mm_ptable_map_page(struct mm_ptable *t, vaddr_t va, paddr_t pa, int mode);
+bool mm_ptable_unmap(struct mm_ptable *t, vaddr_t begin, vaddr_t end, int mode);
+void mm_ptable_defrag(struct mm_ptable *t);
+
+#endif /* _MM_H */
diff --git a/src/alloc.c b/src/alloc.c
index cd7c614..b9dc585 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -7,7 +7,7 @@
static size_t alloc_limit;
static struct spinlock alloc_lock = SPINLOCK_INIT;
-/*
+/**
* Initializes the allocator.
*/
void halloc_init(size_t base, size_t size)
@@ -16,16 +16,16 @@
alloc_limit = base + size;
}
-/*
- * Allocates the requested amount of memory. Return NULL when there isn't enough
- * free memory.
+/**
+ * Allocates the requested amount of memory. Returns NULL when there isn't
+ * enough free memory.
*/
void *halloc(size_t size)
{
return halloc_aligned(size, 2 * sizeof(size_t));
}
-/*
+/**
* Frees the provided memory.
*
* Currently unimplemented.
@@ -35,7 +35,7 @@
dlog("Attempted to free pointer %p\n", ptr);
}
-/*
+/**
* Allocates the requested amount of memory, with the requested alignment.
*
* Alignment must be a power of two. Returns NULL when there isn't enough free
@@ -43,10 +43,27 @@
*/
void *halloc_aligned(size_t size, size_t align)
{
- size_t begin;
- size_t end;
+ void *ret;
sl_lock(&alloc_lock);
+ ret = halloc_aligned_nosync(size, align);
+ sl_unlock(&alloc_lock);
+
+ return ret;
+}
+
+/**
+ * Allocates the requested amount of memory, with the requested alignment, but
+ * no synchronisation with other CPUs. The caller is responsible for serialising
+ * all such calls.
+ *
+ * Alignment must be a power of two. Returns NULL when there isn't enough free
+ * memory.
+ */
+void *halloc_aligned_nosync(size_t size, size_t align)
+{
+ size_t begin;
+ size_t end;
begin = (alloc_base + align - 1) & ~(align - 1);
end = begin + size;
@@ -57,7 +74,5 @@
else
begin = 0;
- sl_unlock(&alloc_lock);
-
return (void *)begin;
}
diff --git a/src/arch/aarch64/entry.S b/src/arch/aarch64/entry.S
index 3c8f860..7f8a4e1 100644
--- a/src/arch/aarch64/entry.S
+++ b/src/arch/aarch64/entry.S
@@ -1,14 +1,11 @@
#include "offsets.h"
-#define PECOFF_FILE_ALIGNMENT 0x200
-
.section .init.entry, "ax"
.global entry
entry:
-
- add x13, x18, #0x16
b 0f
+ .word 0
.quad 4096 /* text_offset */
.quad file_size /* image_size */
.quad 0 /* flags */
diff --git a/src/arch/aarch64/handler.c b/src/arch/aarch64/handler.c
index 48de85d..27ceacf 100644
--- a/src/arch/aarch64/handler.c
+++ b/src/arch/aarch64/handler.c
@@ -19,7 +19,21 @@
void sync_current_exception(uint64_t esr, uint64_t elr)
{
- dlog("Exception: esr=%#x, elr=%#x\n", esr, elr);
+ switch (esr >> 26) {
+ case 0x25: /* EC = 100101, Data abort. */
+ dlog("Data abort: pc=0x%x, esr=0x%x, ec=0x%x", elr, esr, esr >> 26);
+ if (!(esr & (1u << 10))) /* Check FnV bit. */
+ dlog(", far=0x%x, hpfar=0x%x", read_msr(far_el2), read_msr(hpfar_el2) << 8);
+ else
+ dlog(", far=invalid");
+
+ dlog("\n");
+ for (;;);
+
+ default:
+ dlog("Unknown sync exception pc=0x%x, esr=0x%x, ec=0x%x\n", elr, esr, esr >> 26);
+ for (;;);
+ }
for (;;);
}
diff --git a/src/arch/aarch64/inc/arch_mm.h b/src/arch/aarch64/inc/arch_mm.h
new file mode 100644
index 0000000..c65c488
--- /dev/null
+++ b/src/arch/aarch64/inc/arch_mm.h
@@ -0,0 +1,191 @@
+#ifndef _ARCH_MM_H
+#define _ARCH_MM_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/* A phypiscal address. */
+typedef size_t paddr_t;
+
+/* A virtual address. */
+typedef size_t vaddr_t;
+
+/* A page table entry. */
+typedef size_t pte_t;
+
+#define PAGE_LEVEL_BITS 9
+#define PAGE_BITS 12
+
+struct arch_mm_ptable {
+ int max_level;
+};
+
+/**
+ * Initialises the architecture-dependents aspects of the page table.
+ */
+static inline void arch_mm_ptable_init(struct arch_mm_ptable *t)
+{
+ t->max_level = 2;
+}
+
+/**
+ * Determines the maximum level supported by the given page table.
+ */
+static inline int arch_mm_max_level(struct arch_mm_ptable *t)
+{
+ return t->max_level;
+}
+
+/**
+ * Converts a physical address to a table PTE.
+ *
+ * The spec says that 'Table descriptors for stage 2 translations do not
+ * include any attribute field', so we don't take any attributes as arguments.
+ */
+static inline pte_t arch_mm_pa_to_table_pte(paddr_t pa)
+{
+ return pa | 0x3;
+}
+
+/**
+ * Converts a physical address to a block PTE.
+ */
+static inline pte_t arch_mm_pa_to_block_pte(paddr_t pa, uint64_t attrs)
+{
+ return pa | attrs;
+}
+
+/**
+ * Converts a physical address to a page PTE.
+ */
+static inline pte_t arch_mm_pa_to_page_pte(paddr_t pa, uint64_t attrs)
+{
+ return pa | attrs | ((attrs & 1) << 1);
+}
+
+/**
+ * Converts a block PTE to a page PTE.
+ */
+static inline pte_t arch_mm_block_to_page_pte(pte_t pte)
+{
+ return pte | 2;
+}
+
+/**
+ * Specifies whether block mappings are acceptable at the given level.
+ */
+static inline bool arch_mm_is_block_allowed(int level)
+{
+ return level == 1 || level == 2;
+}
+
+/**
+ * Returns the encoding of a page table entry that isn't present.
+ */
+static inline pte_t arch_mm_absent_pte(void)
+{
+ return 0;
+}
+
+/**
+ * Determines if the given pte is present, i.e., if it points to another table,
+ * to a page, or a block of pages.
+ */
+static inline bool arch_mm_pte_is_present(pte_t pte)
+{
+ return (pte & 1) != 0;
+}
+
+/**
+ * Determines if the given pte references another table.
+ */
+static inline bool arch_mm_pte_is_table(pte_t pte)
+{
+ return (pte & 3) == 3;
+}
+
+/**
+ * Determines if the given pte references a block of pages.
+ */
+static inline bool arch_mm_pte_is_block(pte_t pte)
+{
+ return (pte & 3) == 1;
+}
+
+/**
+ * Clears the given virtual address, i.e., sets the ignored bits (from a page
+ * table perspective) to zero.
+ */
+static inline vaddr_t arch_mm_clear_va(vaddr_t addr)
+{
+ return addr & ~((1ull << PAGE_BITS) - 1) & ((1ull << 48) - 1);
+}
+
+/**
+ * Clears the given physical address, i.e., sets the ignored bits (from a page
+ * table perspective) to zero.
+ */
+static inline paddr_t arch_mm_clear_pa(paddr_t addr)
+{
+ return addr & ~((1ull << PAGE_BITS) - 1) & ((1ull << 48) - 1);
+}
+
+/**
+ * Extracts the physical address from a page table entry.
+ */
+static inline paddr_t arch_mm_pte_to_paddr(pte_t pte)
+{
+ return arch_mm_clear_pa(pte);
+}
+
+/**
+ * Extracts a page table pointer from the given page table entry.
+ */
+static inline pte_t *arch_mm_pte_to_table(pte_t pte)
+{
+ return (pte_t *)arch_mm_pte_to_paddr(pte);
+}
+
+/**
+ * Invalidates stage-1 TLB entries referring to the given virtual address range.
+ */
+static inline void arch_mm_invalidate_stage1_range(vaddr_t begin, vaddr_t end)
+{
+ vaddr_t it;
+
+ begin >>= 12;
+ end >>= 12;
+
+ __asm__ volatile("dsb ishst");
+
+ for (it = begin; it < end; it += (1ull << (PAGE_BITS - 12)))
+ __asm__("tlbi vae2is, %0" : : "r"(it));
+
+ __asm__ volatile("dsb ish");
+}
+
+/**
+ * Invalidates stage-2 TLB entries referring to the given virtual address range.
+ */
+static inline void arch_mm_invalidate_stage2_range(vaddr_t begin, vaddr_t end)
+{
+ vaddr_t it;
+
+ begin >>= 12;
+ end >>= 12;
+
+ __asm__ volatile("dsb ishst");
+
+ for (it = begin; it < end; it += (1ull << (PAGE_BITS - 12)))
+ __asm__("tlbi ipas2e1, %0" : : "r"(it));
+
+ __asm__ volatile("dsb ish\n"
+ "tlbi vmalle1is\n"
+ "dsb ish\n");
+}
+
+uint64_t arch_mm_mode_to_attrs(int mode);
+void arch_mm_init(paddr_t table);
+
+#endif /* _ARCH_MM_H */
diff --git a/src/arch/aarch64/mm.c b/src/arch/aarch64/mm.c
index ef446f3..225f1ae 100644
--- a/src/arch/aarch64/mm.c
+++ b/src/arch/aarch64/mm.c
@@ -1,10 +1,59 @@
#include "arch_cpu.h"
+#include "mm.h"
#include "msr.h"
+#define NON_SHAREABLE 0ull
+#define OUTER_SHAREABLE 2ull
+#define INNER_SHAREABLE 3ull
+
+#define STAGE1_XN (1ull << 54)
+#define STAGE1_CONTIGUOUS (1ull << 52)
+#define STAGE1_DBM (1ull << 51)
+#define STAGE1_NG (1ull << 11)
+#define STAGE1_AF (1ull << 10)
+#define STAGE1_SH(x) ((x) << 8)
+#define STAGE1_AP(x) ((x) << 6)
+#define STAGE1_NS (1ull << 5)
+#define STAGE1_ATTRINDX(x) ((x) << 2)
+
+#define STAGE1_READONLY 2ull
+#define STAGE1_READWRITE 0ull
+
+#define STAGE1_DEVICEINDX 0ull
+#define STAGE1_NORMALINDX 1ull
+
+#define STAGE2_XN(x) ((x) << 53)
+#define STAGE2_CONTIGUOUS (1ull << 52)
+#define STAGE2_DBM (1ull << 51)
+#define STAGE2_AF (1ull << 10)
+#define STAGE2_SH(x) ((x) << 8)
+#define STAGE2_S2AP(x) ((x) << 6)
+#define STAGE2_MEMATTR(x) ((x) << 2)
+
+#define STAGE2_EXECUTE_ALL 0ull
+#define STAGE2_EXECUTE_EL0 1ull
+#define STAGE2_EXECUTE_NONE 2ull
+#define STAGE2_EXECUTE_EL1 3ull
+
+/* The following are stage-2 memory attributes for normal memory. */
+#define STAGE2_NONCACHEABLE 1ull
+#define STAGE2_WRITETHROUGH 2ull
+#define STAGE2_WRITEBACK 3ull
+
+#define STAGE2_MEMATTR_NORMAL(outer, inner) ((outer << 2) | (inner))
+
+/* The following stage-2 memory attributes for device memory. */
+#define STAGE2_MEMATTR_DEVICE_nGnRnE 0ull
+#define STAGE2_MEMATTR_DEVICE_nGnRE 1ull
+#define STAGE2_MEMATTR_DEVICE_nGRE 2ull
+#define STAGE2_MEMATTR_DEVICE_GRE 3ull
+
+#define STAGE2_ACCESS_READ 1ull
+#define STAGE2_ACCESS_WRITE 2ull
+
void arch_vptable_init(struct arch_page_table *table)
{
uint64_t i;
- uint64_t v;
/* TODO: Check each bit. */
for (i = 0; i < 512; i++) {
@@ -25,9 +74,64 @@
table->first[0] = (uint64_t)&table->entry0[0] | 3;
table->first[1] = (uint64_t)&table->entry1[0] | 3;
+}
- /* TODO: Where should this go? */
- v =
+uint64_t arch_mm_mode_to_attrs(int mode)
+{
+ uint64_t attrs = 1; /* Present bit. */
+
+ if (mode & MM_MODE_STAGE1) {
+ attrs |= STAGE1_AF | STAGE1_SH(OUTER_SHAREABLE);
+
+ /* Define the execute bits. */
+ if (!(mode & MM_MODE_X))
+ attrs |= STAGE1_XN;
+
+ /* Define the read/write bits. */
+ if (mode & MM_MODE_W)
+ attrs |= STAGE1_AP(STAGE1_READWRITE);
+ else
+ attrs |= STAGE1_AP(STAGE1_READONLY);
+
+ /* Define the memory attribute bits. */
+ if (mode & MM_MODE_D)
+ attrs |= STAGE1_ATTRINDX(STAGE1_DEVICEINDX);
+ else
+ attrs |= STAGE1_ATTRINDX(STAGE1_NORMALINDX);
+ } else {
+ uint64_t access = 0;
+
+ attrs |= STAGE2_AF | STAGE2_SH(OUTER_SHAREABLE);
+
+ /* Define the read/write bits. */
+ if (mode & MM_MODE_R)
+ access |= STAGE2_ACCESS_READ;
+
+ if (mode & MM_MODE_W)
+ access |= STAGE2_ACCESS_WRITE;
+
+ attrs |= STAGE2_S2AP(access);
+
+ /* Define the execute bits. */
+ if (mode & MM_MODE_X)
+ attrs |= STAGE2_XN(STAGE2_EXECUTE_ALL);
+ else
+ attrs |= STAGE2_XN(STAGE2_EXECUTE_NONE);
+
+ /* Define the memory attribute bits. */
+ if (mode & MM_MODE_D)
+ attrs |= STAGE2_MEMATTR_DEVICE_nGnRnE;
+ else
+ attrs |= STAGE2_MEMATTR_NORMAL(STAGE2_WRITEBACK,
+ STAGE2_WRITEBACK);
+ }
+
+ return attrs;
+}
+
+void arch_mm_init(paddr_t table)
+{
+ uint64_t v =
(1u << 31) | /* RES1. */
(4 << 16) | /* PS: 44 bits. */
(0 << 14) | /* TG0: 4 KB granule. */
@@ -37,258 +141,16 @@
(2 << 6) | /* SL0: Start at level 0. */
(20 << 0); /* T0SZ: 44-bit input address size. */
write_msr(vtcr_el2, v);
-}
-#if 0
-#include "arch.h"
-
-#include <stdint.h>
-
-#include "alloc.h"
-#include "log.h"
-#include "msr.h"
-
-#define PAGE_BITS 12
-#define PAGE_SIZE (1 << PAGE_BITS)
-#define ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(uint64_t))
-#define INITIAL_LEVEL 1
-
-extern char text_begin[];
-extern char text_end[];
-extern char rodata_begin[];
-extern char rodata_end[];
-extern char data_begin[];
-extern char data_end[];
-extern char bin_end[];
-
-static uint64_t *ttbr;
-
-static inline size_t mm_entry_size(int level)
-{
- return 1ull << (PAGE_BITS + (3 - level) * (PAGE_BITS - 3));
-}
-
-static inline size_t mm_level_end(size_t va, int level)
-{
- size_t offset = (PAGE_BITS + (4 - level) * (PAGE_BITS - 3));
- return ((va >> offset) + 1) << offset;
-}
-
-static inline size_t mm_index(size_t va, int level)
-{
- size_t v = va >> (PAGE_BITS + (3 - level) * (PAGE_BITS - 3));
- return v & ((1 << (PAGE_BITS - 3)) - 1);
-}
-
-static inline uint64_t mm_clear_attrs(uint64_t v)
-{
- /* Clean bottom bits. */
- v &= ~((1 << PAGE_BITS) - 1);
-
- /* Clean top bits. */
- v &= ((1ull << 59) - 1);
-
- return v;
-}
-
-static inline uint64_t *mm_table_ptr(uint64_t pa)
-{
- return (uint64_t *)mm_clear_attrs(pa);
-}
-
-static inline uint64_t mm_mode_to_attrs(uint64_t mode)
-{
- uint64_t attrs =
- (1 << 10) | /* Access flag. */
- (2 << 8); /* sh -> outer shareable. */
-
- /* TODO: This is different in s2. */
- if (!(mode & MM_X)) {
- attrs |= (1ull << 54); /* XN or UXN, [user] execute never. */
-
- /* TODO: This is only ok in EL1, it is RES0 in EL2. */
- attrs |= (1ull << 53); /* PXN, privileged execute never. */
- }
-
- /* TODO: This is different in s2. */
- if (mode & MM_W)
- attrs |= (0 << 6); /* rw, no EL0 access. */
- else
- attrs |= (2 << 6); /* read-only, no EL0 access. */
-
- if (mode & MM_D)
- attrs |= (0 << 2); /* device memory in MAIR_ELx. */
- else
- attrs |= (1 << 2); /* normal memory in MAIR_ELx. */
-
- return attrs;
-}
-
-static uint64_t *mm_populate_table(uint64_t *table, uint64_t index)
-{
- uint64_t *ntable;
- uint64_t v = table[index];
- uint64_t i;
-
- /* Check if table entry already exists. */
- if (v & 1) {
- /* Fail if it's a block one. */
- if (!(v & 2))
- return NULL;
- return mm_table_ptr(v);
- }
-
- /* Allocate a new table entry and initialize it. */
- ntable = halloc_aligned(PAGE_SIZE, PAGE_SIZE);
- if (!ntable)
- return NULL;
-
- for (i = 0; i < ENTRIES_PER_LEVEL; i++)
- ntable[i] = 0;
-
- /* Fill in the new entry. */
- table[index] = (size_t)ntable | 0x3;
-
- return ntable;
-}
-
-static bool mm_map_level(size_t va, size_t va_end, size_t pa,
- uint64_t attrs, uint64_t *table, int level)
-{
- size_t i = mm_index(va, level);
- size_t va_level_end = mm_level_end(va, level);
- size_t entry_size = mm_entry_size(level);
-
- /* Cap va_end so that we don't go over of the current level max. */
- if (va_end > va_level_end)
- va_end = va_level_end;
-
- /* Fill each entry in the table. */
- while (va < va_end) {
- if (level == 3) {
- table[i] = pa | 0x3 | attrs;
- } else {
- uint64_t *nt = mm_populate_table(table, i);
- if (!nt) {
- /* TODO: Undo all the work so far? */
- return false;
- }
-
- if (!mm_map_level(va, va_end, pa, attrs, nt, level+1)) {
- /* TODO: Undo all the work so far? */
- return false;
- }
- }
-
- va += entry_size;
- pa += entry_size;
- i++;
- }
-
- return true;
-}
-
-bool mm_map_range(size_t va, size_t size, uint64_t pa, uint64_t mode)
-{
- uint64_t attrs = mm_mode_to_attrs(mode);
- uint64_t end = mm_clear_attrs(va + size + PAGE_SIZE - 1);
-
- va = mm_clear_attrs(va);
- pa = mm_clear_attrs(pa);
-
- return mm_map_level(va, end, pa, attrs, ttbr, INITIAL_LEVEL);
-}
-
-bool mm_map_page(size_t va, size_t pa, uint64_t mode)
-{
- size_t i;
- uint64_t attrs = mm_mode_to_attrs(mode);
- uint64_t *table = ttbr;
-
- va = mm_clear_attrs(va);
- pa = mm_clear_attrs(pa);
- for (i = INITIAL_LEVEL; i < 3; i++) {
- table = mm_populate_table(table, mm_index(va, i));
- if (!table)
- return false;
- }
-
- /* We reached level 3. */
- i = mm_index(va, 3);
- table[i] = pa | 0x3 | attrs;
- return true;
-}
-
-bool arch_init_mm(void)
-{
-#if 0
- size_t i;
-
- /* Allocate the first level, then zero it out. */
- ttbr = halloc_aligned(PAGE_SIZE, PAGE_SIZE);
- if (!ttbr)
- return false;
-
- for (i = 0; i < ENTRIES_PER_LEVEL; i++)
- ttbr[i] = 0;
-
- /* Map page for uart. */
- mm_map_page(PL011_BASE, PL011_BASE, MM_R | MM_W | MM_D);
-
- /* Map page for gic. */
- mm_map_page(GICD_BASE, GICD_BASE, MM_R | MM_W | MM_D);
- mm_map_page(GICC_BASE, GICC_BASE, MM_R | MM_W | MM_D);
-
- /* Map each section. */
- mm_map_range((size_t)text_begin, text_end - text_begin,
- (size_t)text_begin, MM_X);
-
- mm_map_range((size_t)rodata_begin, rodata_end - rodata_begin,
- (size_t)rodata_begin, MM_R);
-
- mm_map_range((size_t)data_begin, data_end - data_begin,
- (size_t)data_begin, MM_R | MM_W);
-
- mm_map_range((size_t)bin_end, 20 * 1024 * 1024, (size_t)bin_end,
- MM_R | MM_W);
-#endif
- log(INFO, "About to enable mmu.\n");
- enable_mmu(ttbr);
- log(INFO, "mmu is on.\n");
-
- return true;
-}
-
-static void arch_mm_dump_table(uint64_t *table, int level)
-{
- uint64_t i, j;
- for (i = 0; i < ENTRIES_PER_LEVEL; i++) {
- if ((table[i] & 1) == 0)
- continue;
-
- for (j = 1 * (level - INITIAL_LEVEL + 1); j; j--)
- log(INFO, "\t");
- log(INFO, "%x: %x\n", i, table[i]);
- if (level >= 3)
- continue;
-
- if ((table[i] & 3) == 3)
- arch_mm_dump_table(mm_table_ptr(table[i]), level + 1);
- }
-}
-
-void enable_mmu(uint64_t *table)
-{
- //uint32_t v;
-
- enable_s2();
-#if 0
/*
- * 0 -> Device-nGnRnE memory
- * 1 -> Normal memory, Inner/Outer Write-Back Non-transient,
- * Write-Alloc, Read-Alloc.
+ * 0 -> Device-nGnRnE memory
+ * 0xff -> Normal memory, Inner/Outer Write-Back Non-transient,
+ * Write-Alloc, Read-Alloc.
*/
- write_msr(mair_el2, 0xff00);
+ write_msr(mair_el2,
+ (0 << (8 * STAGE1_DEVICEINDX)) |
+ (0xff << (8 * STAGE1_NORMALINDX)));
+
write_msr(ttbr0_el2, table);
/*
@@ -308,8 +170,7 @@
v =
(1 << 0) | /* M, enable stage 1 EL2 MMU. */
(1 << 1) | /* A, enable alignment check faults. */
- // TODO: Enable this.
-// (1 << 2) | /* C, data cache enable. */
+ (1 << 2) | /* C, data cache enable. */
(1 << 3) | /* SA, enable stack alignment check. */
(3 << 4) | /* RES1 bits. */
(1 << 11) | /* RES1 bit. */
@@ -325,6 +186,4 @@
__asm volatile("isb");
write_msr(sctlr_el2, v);
__asm volatile("isb");
-#endif
}
-#endif
diff --git a/src/fdt.c b/src/fdt.c
index d5b1e16..d20b82e 100644
--- a/src/fdt.c
+++ b/src/fdt.c
@@ -358,3 +358,13 @@
e->address = htobe64(addr);
e->size = htobe64(len);
}
+
+size_t fdt_header_size(void)
+{
+ return sizeof(struct fdt_header);
+}
+
+size_t fdt_total_size(struct fdt_header *hdr)
+{
+ return be32toh(hdr->totalsize);
+}
diff --git a/src/main.c b/src/main.c
index d5c2c22..0c18d79 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,15 +1,19 @@
#include <stdalign.h>
-#include <stdatomic.h>
#include <stddef.h>
+#include "alloc.h"
+#include "api.h"
#include "cpio.h"
#include "cpu.h"
#include "dlog.h"
#include "fdt.h"
+#include "mm.h"
#include "std.h"
#include "vm.h"
void *fdt;
+char ptable_buf[PAGE_SIZE * 20];
+struct mm_ptable ptable;
bool fdt_find_node(struct fdt_node *node, const char *path)
{
@@ -91,19 +95,32 @@
return true;
}
-static void relocate(const char *from, size_t size)
+/**
+ * Copies data to an unmapped location by mapping it for write, copying the
+ * data, then unmapping it.
+ */
+static bool copy_to_unmaped(paddr_t to, const void *from, size_t size)
{
- extern char bin_end[];
- size_t tmp = (size_t)&bin_end[0];
- char *dest = (char *)((tmp + 0x80000 - 1) & ~(0x80000 - 1));
- dlog("bin_end is at %p, copying to %p\n", &bin_end[0], dest);
- memcpy(dest, from, size);
+ if (!mm_ptable_map(&ptable, (vaddr_t)to, (vaddr_t)to + size, to,
+ MM_MODE_W | MM_MODE_STAGE1))
+ return false;
+
+ memcpy((void *)to, from, size);
+
+ mm_ptable_unmap(&ptable, to, to + size, MM_MODE_STAGE1);
+
+ return true;
}
-/* TODO: Remove this. */
-struct vm primary_vm;
-struct vm secondary_vm[MAX_VMS];
-uint32_t secondary_vm_count = 0;
+static bool relocate(const char *from, size_t size)
+{
+ /* TODO: This is a hack. We must read the alignment from the binary. */
+ extern char bin_end[];
+ size_t tmp = (size_t)&bin_end[0];
+ paddr_t dest = (tmp + 0x80000 - 1) & ~(0x80000 - 1);
+ dlog("bin_end is at %p, copying to %p\n", &bin_end[0], dest);
+ return copy_to_unmaped(dest, from, size);
+}
static void find_memory_range(const struct fdt_node *root,
uint64_t *block_start, uint64_t *block_size)
@@ -341,8 +358,11 @@
}
*mem_size -= mem;
- memcpy((void *)(mem_start + *mem_size), kernel.next,
- kernel.limit - kernel.next);
+ if (!copy_to_unmaped(mem_start + *mem_size, kernel.next,
+ kernel.limit - kernel.next)) {
+ dlog("Unable to copy kernel for vm %u\n", count);
+ continue;
+ }
dlog("Loaded VM%u with %u vcpus, entry at 0x%x\n", count, cpu,
mem_start + *mem_size);
@@ -365,7 +385,10 @@
return false;
}
- relocate(it.next, it.limit - it.next);
+ if (!relocate(it.next, it.limit - it.next)) {
+ dlog("Unable to relocate kernel for primary vm.\n");
+ return false;
+ }
if (!find_file(c, "initrd.img", &it)) {
dlog("Unable to find initrd.img\n");
@@ -389,7 +412,6 @@
{
size_t tmp = (size_t)&relocate;
tmp = (tmp + 0x80000 - 1) & ~(0x80000 - 1);
-
fdt_add_mem_reservation(fdt, tmp & ~0xfffff, 0x80000);
vm_init(&primary_vm, MAX_CPUS);
vm_start_vcpu(&primary_vm, 0, tmp, (size_t)fdt, true);
@@ -398,24 +420,83 @@
return true;
}
+/**
+ * Performs one-time initialisation of the hypervisor.
+ */
static void one_time_init(void)
{
+ extern char text_begin[];
+ extern char text_end[];
+ extern char rodata_begin[];
+ extern char rodata_end[];
+ extern char data_begin[];
+ extern char data_end[];
+
dlog("Initializing hafnium\n");
cpu_module_init();
+ halloc_init((size_t)ptable_buf, sizeof(ptable_buf));
+
+ if (!mm_ptable_init(&ptable, MM_MODE_NOSYNC | MM_MODE_STAGE1)) {
+ dlog("Unable to allocate memory for page table.\n");
+ for (;;);
+ }
+
+ dlog("text: 0x%x - 0x%x\n", text_begin, text_end);
+ dlog("rodata: 0x%x - 0x%x\n", rodata_begin, rodata_end);
+ dlog("data: 0x%x - 0x%x\n", data_begin, data_end);
+
+ /* Map page for uart. */
+ mm_ptable_map_page(&ptable, PL011_BASE, PL011_BASE,
+ MM_MODE_R | MM_MODE_W | MM_MODE_D | MM_MODE_NOSYNC |
+ MM_MODE_STAGE1);
+
+ /* Map each section. */
+ mm_ptable_map(&ptable, (vaddr_t)text_begin, (vaddr_t)text_end,
+ (paddr_t)text_begin,
+ MM_MODE_X | MM_MODE_NOSYNC | MM_MODE_STAGE1);
+
+ mm_ptable_map(&ptable, (vaddr_t)rodata_begin, (vaddr_t)rodata_end,
+ (paddr_t)rodata_begin,
+ MM_MODE_R | MM_MODE_NOSYNC | MM_MODE_STAGE1);
+
+ mm_ptable_map(&ptable, (vaddr_t)data_begin, (vaddr_t)data_end,
+ (paddr_t)data_begin,
+ MM_MODE_R | MM_MODE_W | MM_MODE_NOSYNC | MM_MODE_STAGE1);
+
+ arch_mm_init((paddr_t)ptable.table);
/* TODO: Code below this point should be removed from this function. */
- /* TODO: Remove this. */
-
do {
struct fdt_node n;
uint64_t mem_start = 0;
uint64_t mem_size = 0;
+ uint64_t new_mem_size;
+
+ /* Map in the fdt header. */
+ if (!mm_ptable_map(&ptable, (vaddr_t)fdt,
+ (vaddr_t)fdt + fdt_header_size(),
+ (paddr_t)fdt,
+ MM_MODE_R | MM_MODE_STAGE1)) {
+ dlog("Unable to map FDT header.\n");
+ break;
+ }
+
+ /*
+ * Map the rest of the fdt plus an extra page for adding new
+ * memory reservations.
+ */
+ if (!mm_ptable_map(&ptable, (vaddr_t)fdt,
+ (vaddr_t)fdt + fdt_total_size(fdt),
+ (paddr_t)fdt,
+ MM_MODE_R | MM_MODE_STAGE1)) {
+ dlog("Unable to map FDT.\n");
+ break;
+ }
fdt_root_node(&n, fdt);
fdt_find_child(&n, "");
- /* TODO: Use this. */
find_memory_range(&n, &mem_start, &mem_size);
dlog("Memory range: 0x%x - 0x%x\n", mem_start,
mem_start + mem_size - 1);
@@ -427,18 +508,44 @@
break;
dlog("Ramdisk range: 0x%x - 0x%x\n", begin, end - 1);
+ mm_ptable_map(&ptable, begin, end, begin,
+ MM_MODE_R | MM_MODE_STAGE1);
struct cpio c;
cpio_init(&c, (void *)begin, end - begin);
- load_secondary(&c, mem_start, &mem_size);
+ /* Map the fdt in r/w mode in preparation for extending it. */
+ if (!mm_ptable_map(&ptable, (vaddr_t)fdt,
+ (vaddr_t)fdt + fdt_total_size(fdt) +
+ PAGE_SIZE,
+ (paddr_t)fdt,
+ MM_MODE_R | MM_MODE_W | MM_MODE_STAGE1)) {
+ dlog("Unable to map FDT in r/w mode.\n");
+ break;
+ }
+ new_mem_size = mem_size;
+ load_secondary(&c, mem_start, &new_mem_size);
load_primary(&c, &n);
+
+ /* Patch fdt to reserve memory for secondary VMs. */
+ fdt_add_mem_reservation(fdt, mem_start + new_mem_size,
+ mem_size - new_mem_size);
+
+ /* Unmap FDT. */
+ if (!mm_ptable_unmap(&ptable, (vaddr_t)fdt,
+ (vaddr_t)fdt + fdt_total_size(fdt) +
+ PAGE_SIZE, MM_MODE_STAGE1)) {
+ dlog("Unable to unmap the FDT.\n");
+ break;
+ }
} while (0);
+ mm_ptable_defrag(&ptable);
+
arch_set_vm_mm(&primary_vm.page_table);
}
-/*
+/**
* The entry point of CPUs when they are turned on. It is supposed to initialise
* all state and return the first vCPU to run.
*/
@@ -446,10 +553,13 @@
{
struct cpu *c = cpu();
- /* Do global one-time initialization just once. */
- static atomic_flag inited = ATOMIC_FLAG_INIT;
- if (!atomic_flag_test_and_set_explicit(&inited, memory_order_acq_rel))
+ /* Do global one-time initialization just once. We avoid using atomics
+ * by only touching the variable from cpu 0. */
+ static volatile bool inited = false;
+ if (cpu_index(c) == 0 && !inited) {
+ inited = true;
one_time_init();
+ }
dlog("Starting up cpu %d\n", cpu_index(c));
diff --git a/src/mm.c b/src/mm.c
new file mode 100644
index 0000000..4a72d56
--- /dev/null
+++ b/src/mm.c
@@ -0,0 +1,320 @@
+#include "mm.h"
+
+#include <stdatomic.h>
+#include <stdint.h>
+
+#include "alloc.h"
+#include "dlog.h"
+
+#define MAP_FLAG_SYNC 0x01
+#define MAP_FLAG_COMMIT 0x02
+
+/**
+ * Calculates the size of the address space represented by a page table entry at
+ * the given level.
+ */
+static inline size_t mm_entry_size(int level)
+{
+ return 1ull << (PAGE_BITS + level * PAGE_LEVEL_BITS);
+}
+
+/**
+ * For a given virtual address, calculates the maximum (plus one) address that
+ * can be represented by the same table at the given level.
+ */
+static inline vaddr_t mm_level_end(vaddr_t va, int level)
+{
+ size_t offset = PAGE_BITS + (level + 1) * PAGE_LEVEL_BITS;
+ return ((va >> offset) + 1) << offset;
+}
+
+/**
+ * For a given virtual address, calculates the index at which its entry is
+ * stored in a table at the given level.
+ */
+static inline size_t mm_index(vaddr_t va, int level)
+{
+ vaddr_t v = va >> (PAGE_BITS + level * PAGE_LEVEL_BITS);
+ return v & ((1ull << PAGE_LEVEL_BITS) - 1);
+}
+
+/**
+ * Populates the provided page table entry with a reference to another table if
+ * needed, that is, if it does not yet point to another table.
+ *
+ * Returns a pointer to the table the entry now points to.
+ */
+static pte_t *mm_populate_table_pte(pte_t *pte, int level, bool sync_alloc)
+{
+ pte_t *ntable;
+ pte_t v = *pte;
+ pte_t new_pte;
+ size_t i;
+ size_t inc;
+
+ /* Just return pointer to table if it's already populated. */
+ if (arch_mm_pte_is_table(v))
+ return arch_mm_pte_to_table(v);
+
+ /* Allocate a new table. */
+ ntable = (sync_alloc ? halloc_aligned : halloc_aligned_nosync)(
+ PAGE_SIZE, PAGE_SIZE);
+ if (!ntable) {
+ dlog("Failed to allocate memory for page table\n");
+ return NULL;
+ }
+
+ /* Determine template for new pte and its increment. */
+ if (!arch_mm_pte_is_block(v)) {
+ inc = 0;
+ new_pte = arch_mm_absent_pte();
+ } else {
+ inc = mm_entry_size(level - 1);
+ if (level == 1)
+ new_pte = arch_mm_block_to_page_pte(v);
+ else
+ new_pte = v;
+ }
+
+ /* Initialise entries in the new table. */
+ for (i = 0; i < PAGE_SIZE / sizeof(paddr_t); i++) {
+ ntable[i] = new_pte;
+ new_pte += inc;
+ }
+
+ /*
+ * Ensure initialisation is visible before updating the actual pte, then
+ * update it.
+ */
+ atomic_thread_fence(memory_order_release);
+ *pte = arch_mm_pa_to_table_pte((paddr_t)ntable);
+
+ return ntable;
+}
+
+/**
+ * Frees all page-table-related memory associated with the given pte at the
+ * given level.
+ */
+static void mm_free_page_pte(pte_t pte, int level, bool sync)
+{
+ /* TODO: Implement.
+ if (!arch_mm_pte_is_present(pte) || level < 1)
+ return;
+ */
+}
+
+/**
+ * Updates the page table at the given level to map the given virtual address
+ * range to a physical range using the provided (architecture-specific)
+ * attributes.
+ *
+ * This function calls itself recursively if it needs to update additional
+ * levels, but the recursion is bound by the maximum number of levels in a page
+ * table.
+ */
+static bool mm_map_level(vaddr_t va, vaddr_t va_end, paddr_t pa, uint64_t attrs,
+ pte_t *table, int level, int flags)
+{
+ size_t i = mm_index(va, level);
+ vaddr_t va_level_end = mm_level_end(va, level);
+ size_t entry_size = mm_entry_size(level);
+ bool commit = flags & MAP_FLAG_COMMIT;
+ bool sync = flags & MAP_FLAG_SYNC;
+
+ /* Cap va_end so that we don't go over the current level max. */
+ if (va_end > va_level_end)
+ va_end = va_level_end;
+
+ /* Fill each entry in the table. */
+ while (va < va_end) {
+ if (level == 0) {
+ if (commit)
+ table[i] = arch_mm_pa_to_page_pte(pa, attrs);
+ } else if ((va_end - va) >= entry_size &&
+ arch_mm_is_block_allowed(level) &&
+ (va & (entry_size - 1)) == 0) {
+ if (commit) {
+ pte_t pte = table[i];
+ table[i] = arch_mm_pa_to_block_pte(pa, attrs);
+ /* TODO: Add barrier. How do we ensure this
+ * isn't in use by another CPU? Send IPI? */
+ mm_free_page_pte(pte, level, sync);
+ }
+ } else {
+ pte_t *nt = mm_populate_table_pte(table + i, level,
+ sync);
+ if (!nt)
+ return false;
+
+ if (!mm_map_level(va, va_end, pa, attrs, nt, level-1,
+ flags))
+ return false;
+ }
+
+ va = (va + entry_size) & ~(entry_size - 1);
+ pa = (pa + entry_size) & ~(entry_size - 1);
+ i++;
+ }
+
+ return true;
+}
+
+/**
+ * Invalidates the TLB for the given virtual address range.
+ */
+static void mm_invalidate_tlb(vaddr_t begin, vaddr_t end, bool stage1)
+{
+ if (stage1)
+ arch_mm_invalidate_stage1_range(begin, end);
+ else
+ arch_mm_invalidate_stage2_range(begin, end);
+}
+
+/**
+ * Updates the given table such that the given virtual address range is mapped
+ * to the given physical address range in the architecture-agnostic mode
+ * provided.
+ */
+bool mm_ptable_map(struct mm_ptable *t, vaddr_t begin, vaddr_t end,
+ paddr_t paddr, int mode)
+{
+ uint64_t attrs = arch_mm_mode_to_attrs(mode);
+ int flags = (mode & MM_MODE_NOSYNC) ? 0 : MAP_FLAG_SYNC;
+ int level = arch_mm_max_level(&t->arch);
+
+ begin = arch_mm_clear_va(begin);
+ end = arch_mm_clear_va(end + PAGE_SIZE - 1);
+ paddr = arch_mm_clear_pa(paddr);
+
+ /*
+ * Do it in two steps to prevent leaving the table in a halfway updated
+ * state. In such a two-step implementation, the table may be left with
+ * extra internal tables, but no different mapping on failure.
+ */
+ if (!mm_map_level(begin, end, paddr, attrs, t->table, level, flags))
+ return false;
+
+ mm_map_level(begin, end, paddr, attrs, t->table, level,
+ flags | MAP_FLAG_COMMIT);
+
+ /* Invalidate the tlb. */
+ mm_invalidate_tlb(begin, end, (mode & MM_MODE_STAGE1) != 0);
+
+ return true;
+}
+
+/**
+ * Updates the given table such that the given virtual address range is not
+ * mapped to any physical address.
+ */
+bool mm_ptable_unmap(struct mm_ptable *t, vaddr_t begin, vaddr_t end, int mode)
+{
+ int flags = (mode & MM_MODE_NOSYNC) ? 0 : MAP_FLAG_SYNC;
+ int level = arch_mm_max_level(&t->arch);
+
+ begin = arch_mm_clear_va(begin);
+ end = arch_mm_clear_va(end + PAGE_SIZE - 1);
+
+ /* Also do updates in two steps, similarly to mm_ptable_map. */
+ if (!mm_map_level(begin, end, begin, 0, t->table, level, flags))
+ return false;
+
+ mm_map_level(begin, end, begin, 0, t->table, level,
+ flags | MAP_FLAG_COMMIT);
+
+ /* Invalidate the tlb. */
+ mm_invalidate_tlb(begin, end, (mode & MM_MODE_STAGE1) != 0);
+
+ return true;
+}
+
+/**
+ * Updates the given table such that a single virtual address page is mapped
+ * to a single physical address page in the provided architecture-agnostic mode.
+ */
+bool mm_ptable_map_page(struct mm_ptable *t, vaddr_t va, paddr_t pa, int mode)
+{
+ size_t i;
+ uint64_t attrs = arch_mm_mode_to_attrs(mode);
+ pte_t *table = t->table;
+ bool sync = !(mode & MM_MODE_NOSYNC);
+
+ va = arch_mm_clear_va(va);
+ pa = arch_mm_clear_pa(pa);
+
+ for (i = arch_mm_max_level(&t->arch); i > 0; i--) {
+ table = mm_populate_table_pte(table + mm_index(va, i), i, sync);
+ if (!table)
+ return false;
+ }
+
+ i = mm_index(va, 0);
+ table[i] = arch_mm_pa_to_page_pte(pa, attrs);
+ return true;
+}
+
+/**
+ * Writes the given table to the debug log, calling itself recursively to
+ * write sub-tables.
+ */
+static void mm_dump_table_recursive(pte_t *table, int level, int max_level)
+{
+ uint64_t i;
+ for (i = 0; i < PAGE_SIZE / sizeof(pte_t); i++) {
+ if (!arch_mm_pte_is_present(table[i]))
+ continue;
+
+ dlog("%*s%x: %x\n", 4 * (max_level - level), "", i, table[i]);
+ if (!level)
+ continue;
+
+ if (arch_mm_pte_is_table(table[i])) {
+ mm_dump_table_recursive(arch_mm_pte_to_table(table[i]),
+ level - 1, max_level);
+ }
+ }
+}
+
+/**
+ * Write the given table to the debug log.
+ */
+void mm_ptable_dump(struct mm_ptable *t)
+{
+ int max_level = arch_mm_max_level(&t->arch);
+ mm_dump_table_recursive(t->table, max_level, max_level);
+}
+
+/**
+ * Defragments the given page table by converting page table references to
+ * blocks whenever possible.
+ */
+void mm_ptable_defrag(struct mm_ptable *t)
+{
+ /* TODO: Implement. */
+}
+
+/**
+ * Initialises the given page table.
+ */
+bool mm_ptable_init(struct mm_ptable *t, int mode)
+{
+ size_t i;
+ pte_t *table;
+
+ if (mode & MM_MODE_NOSYNC)
+ table = halloc_aligned_nosync(PAGE_SIZE, PAGE_SIZE);
+ else
+ table = halloc_aligned(PAGE_SIZE, PAGE_SIZE);
+
+ if (!table)
+ return false;
+
+ for (i = 0; i < PAGE_SIZE / sizeof(pte_t); i++)
+ table[i] = arch_mm_absent_pte();
+
+ t->table = table;
+ arch_mm_ptable_init(&t->arch);
+
+ return true;
+}
diff --git a/src/rules.mk b/src/rules.mk
index 313748d..0426fba 100644
--- a/src/rules.mk
+++ b/src/rules.mk
@@ -4,6 +4,7 @@
SRCS += cpu.c
SRCS += fdt.c
SRCS += main.c
+SRCS += mm.c
SRCS += std.c
SRCS += vm.c