Flush all TLB entries if range too large
Revisions of the ISA prior to ARMv8.4 do not support invalidation of
a range of addresses, only looping per page or invalidating all. Hafnium
currently only supports the former, this patch adds the latter for
performance reasons.
Change-Id: I2d07c66eaea0dabfdec1b63673103c1f43866784
diff --git a/src/arch/aarch64/mm.c b/src/arch/aarch64/mm.c
index 34d1dbd..191368e 100644
--- a/src/arch/aarch64/mm.c
+++ b/src/arch/aarch64/mm.c
@@ -88,6 +88,13 @@
#define STAGE2_ACCESS_READ UINT64_C(1)
#define STAGE2_ACCESS_WRITE UINT64_C(2)
+/**
+ * Threshold number of pages in TLB to invalidate after which we invalidate all
+ * TLB entries on a given level.
+ * Constant is the number of pointers per page table entry, also used by Linux.
+ */
+#define MAX_TLBI_OPS MM_PTE_PER_PAGE
+
/* clang-format on */
#define tlbi(op) \
@@ -250,15 +257,24 @@
uintvaddr_t end = va_addr(va_end);
uintvaddr_t it;
- begin >>= 12;
- end >>= 12;
-
/* Sync with page table updates. */
dsb(ishst);
- /* Invalidate stage-1 TLB, one page from the range at a time. */
- for (it = begin; it < end; it += (UINT64_C(1) << (PAGE_BITS - 12))) {
- tlbi_reg(vae2is, it);
+ /*
+ * Revisions prior to ARMv8.4 do not support invalidating a range of
+ * addresses, which means we have to loop over individual pages. If
+ * there are too many, it is quicker to invalidate all TLB entries.
+ */
+ if ((end - begin) > (MAX_TLBI_OPS * PAGE_SIZE)) {
+ tlbi(alle2);
+ } else {
+ begin >>= 12;
+ end >>= 12;
+ /* Invalidate stage-1 TLB, one page from the range at a time. */
+ for (it = begin; it < end;
+ it += (UINT64_C(1) << (PAGE_BITS - 12))) {
+ tlbi_reg(vae2is, it);
+ }
}
/* Sync data accesses with TLB invalidation completion. */
@@ -280,34 +296,49 @@
/* TODO: This only applies to the current VMID. */
- begin >>= 12;
- end >>= 12;
-
/* Sync with page table updates. */
dsb(ishst);
/*
- * Invalidate stage-2 TLB, one page from the range at a time.
- * Note that this has no effect if the CPU has a TLB with combined
- * stage-1/stage-2 translation.
+ * Revisions prior to ARMv8.4 do not support invalidating a range of
+ * addresses, which means we have to loop over individual pages. If
+ * there are too many, it is quicker to invalidate all TLB entries.
*/
- for (it = begin; it < end; it += (UINT64_C(1) << (PAGE_BITS - 12))) {
- tlbi_reg(ipas2e1, it);
+ if ((end - begin) > (MAX_TLBI_OPS * PAGE_SIZE)) {
+ /*
+ * Invalidate all stage-1 and stage-2 entries of the TLB for
+ * the current VMID.
+ */
+ tlbi(vmalls12e1);
+ } else {
+ begin >>= 12;
+ end >>= 12;
+
+ /*
+ * Invalidate stage-2 TLB, one page from the range at a time.
+ * Note that this has no effect if the CPU has a TLB with
+ * combined stage-1/stage-2 translation.
+ */
+ for (it = begin; it < end;
+ it += (UINT64_C(1) << (PAGE_BITS - 12))) {
+ tlbi_reg(ipas2e1, it);
+ }
+
+ /*
+ * Ensure completion of stage-2 invalidation in case a page
+ * table walk on another CPU refilled the TLB with a complete
+ * stage-1 + stage-2 walk based on the old stage-2 mapping.
+ */
+ dsb(ish);
+
+ /*
+ * Invalidate all stage-1 TLB entries. If the CPU has a combined
+ * TLB for stage-1 and stage-2, this will invalidate stage-2 as
+ * well.
+ */
+ tlbi(vmalle1is);
}
- /*
- * Ensure completion of stage-2 invalidation in case a page table walk
- * on another CPU refilled the TLB with a complete stage-1 + stage-2
- * walk based on the old stage-2 mapping.
- */
- dsb(ish);
-
- /*
- * Invalidate all stage-1 TLB entries. If the CPU has a combined TLB for
- * stage-1 and stage-2, this will invalidate stage-2 as well.
- */
- tlbi(vmalle1is);
-
/* Sync data accesses with TLB invalidation completion. */
dsb(ish);