Refactor aarch64 barriers and TLBI commands

Use macros instead of function calls to not rely on LTO inlining.
Make macros take the op-kind argument to generalize.

Test: ./kokoro/ubuntu/build.sh
Change-Id: I8a5553d47cf3a0965fbf35d93c3c925f5f02ac4e
diff --git a/Makefile b/Makefile
index 4d7b384..b37d946 100644
--- a/Makefile
+++ b/Makefile
@@ -39,7 +39,7 @@
 export PATH := $(PREBUILTS)/clang/bin:$(PATH)
 
 CHECKPATCH := $(PWD)/third_party/linux/scripts/checkpatch.pl \
-	--ignore BRACES,SPDX_LICENSE_TAG,VOLATILE,SPLIT_STRING,AVOID_EXTERNS,USE_SPINLOCK_T,NEW_TYPEDEFS,INITIALISED_STATIC,FILE_PATH_CHANGES,EMBEDDED_FUNCTION_NAME --quiet
+	--ignore BRACES,SPDX_LICENSE_TAG,VOLATILE,SPLIT_STRING,AVOID_EXTERNS,USE_SPINLOCK_T,NEW_TYPEDEFS,INITIALISED_STATIC,FILE_PATH_CHANGES,EMBEDDED_FUNCTION_NAME,SINGLE_STATEMENT_DO_WHILE_MACRO --quiet
 
 # Select the project to build.
 PROJECT ?= reference
diff --git a/inc/hf/arch/barriers.h b/inc/hf/arch/barriers.h
index 1ff43e6..85d4e7d 100644
--- a/inc/hf/arch/barriers.h
+++ b/inc/hf/arch/barriers.h
@@ -16,20 +16,55 @@
 
 #pragma once
 
-/**
- * Ensures all explicit memory accesses before this point are completed before
- * any later memory accesses are performed.
- */
-void dmb(void);
+/** AArch64-specific API */
 
 /**
- * Ensures all explicit memory access and management instructions have completed
- * before continuing.
+ * Ensures explicit memory accesses before this point are completed before any
+ * later memory accesses are performed. The instruction argument specifies:
+ *   - the shareability domain over which the instruction must operate,
+ *   - the accesses for which the instruction operates.
  */
-void dsb(void);
+#define dmb(arg)                               \
+	do {                                   \
+		__asm__ volatile("dmb " #arg); \
+	} while (0)
+
+/**
+ * Ensures explicit memory access and management instructions have completed
+ * before continuing. The instruction argument specifies:
+ *   - the shareability domain over which the instruction must operate,
+ *   - the accesses for which the instruction operates.
+ */
+#define dsb(arg)                               \
+	do {                                   \
+		__asm__ volatile("dsb " #arg); \
+	} while (0)
 
 /**
  * Flushes the instruction pipeline so that instructions are fetched from
  * memory.
  */
-void isb(void);
+#define isb()                            \
+	do {                             \
+		__asm__ volatile("isb"); \
+	} while (0)
+
+/** Platform-agnostic API */
+
+/**
+ * Ensures all explicit memory accesses before this point are completed before
+ * any later memory accesses are performed.
+ */
+#define memory_ordering_barrier() dmb(sy)
+
+/**
+ * Ensures all explicit memory access and management instructions have completed
+ * before continuing.
+ */
+#define data_sync_barrier() dsb(sy)
+
+/**
+ * Flushes the instruction pipeline so that instructions are fetched from
+ * memory.
+ */
+#define insn_sync_barrier() isb()
diff --git a/inc/hf/io.h b/inc/hf/io.h
index fbafaf8..f4a80a4 100644
--- a/inc/hf/io.h
+++ b/inc/hf/io.h
@@ -133,7 +133,7 @@
 {
 	uint8_t v = io_read8(io);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -141,7 +141,7 @@
 {
 	uint16_t v = io_read16(io);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -149,7 +149,7 @@
 {
 	uint32_t v = io_read32(io);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -157,7 +157,7 @@
 {
 	uint64_t v = io_read64(io);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -165,7 +165,7 @@
 {
 	uint8_t v = io_read8_array(io, n);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -173,7 +173,7 @@
 {
 	uint16_t v = io_read16_array(io, n);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -181,7 +181,7 @@
 {
 	uint32_t v = io_read32_array(io, n);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -189,7 +189,7 @@
 {
 	uint64_t v = io_read64_array(io, n);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -247,48 +247,48 @@
 
 static inline void io_write8_mb(io8_t io, uint8_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write8(io, v);
 }
 
 static inline void io_write16_mb(io16_t io, uint16_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write16(io, v);
 }
 
 static inline void io_write32_mb(io32_t io, uint32_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write32(io, v);
 }
 
 static inline void io_write64_mb(io64_t io, uint64_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write64(io, v);
 }
 
 static inline void io_write8_array_mb(io8_array_t io, size_t n, uint8_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write8_array(io, n, v);
 }
 
 static inline void io_write16_array_mb(io16_array_t io, size_t n, uint16_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write16_array(io, n, v);
 }
 
 static inline void io_write32_array_mb(io32_array_t io, size_t n, uint32_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write32_array(io, n, v);
 }
 
 static inline void io_write64_array_mb(io64_array_t io, size_t n, uint64_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write64_array(io, n, v);
 }
diff --git a/src/arch/aarch64/BUILD.gn b/src/arch/aarch64/BUILD.gn
index 9f29b72..8abc3d0 100644
--- a/src/arch/aarch64/BUILD.gn
+++ b/src/arch/aarch64/BUILD.gn
@@ -19,7 +19,6 @@
 # Implementation of the arch interface for aarch64.
 source_set("arch") {
   sources = [
-    "barriers.c",
     "cpu.c",
     "mm.c",
     "timer.c",
diff --git a/src/arch/aarch64/barriers.c b/src/arch/aarch64/barriers.c
deleted file mode 100644
index 8642b05..0000000
--- a/src/arch/aarch64/barriers.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright 2018 The Hafnium Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "hf/arch/barriers.h"
-
-void dmb(void)
-{
-	__asm__ volatile("dmb sy");
-}
-
-void dsb(void)
-{
-	__asm__ volatile("dsb sy");
-}
-
-void isb(void)
-{
-	__asm__ volatile("isb");
-}
diff --git a/src/arch/aarch64/hypervisor/handler.c b/src/arch/aarch64/hypervisor/handler.c
index 7ee74d1..621b847 100644
--- a/src/arch/aarch64/hypervisor/handler.c
+++ b/src/arch/aarch64/hypervisor/handler.c
@@ -18,6 +18,7 @@
 
 #include "hf/arch/barriers.h"
 #include "hf/arch/init.h"
+#include "hf/arch/mm.h"
 
 #include "hf/api.h"
 #include "hf/cpu.h"
@@ -102,15 +103,6 @@
 }
 
 /**
- * Ensures all explicit memory access and management instructions for
- * non-shareable normal memory have completed before continuing.
- */
-static void dsb_nsh(void)
-{
-	__asm__ volatile("dsb nsh");
-}
-
-/**
  * Invalidate all stage 1 TLB entries on the current (physical) CPU for the
  * current VMID.
  */
@@ -135,7 +127,7 @@
 	 * TLB invalidation has taken effect. Non-sharable is enough because the
 	 * TLB is local to the CPU.
 	 */
-	dsb_nsh();
+	dsb(nsh);
 }
 
 /**
diff --git a/src/arch/aarch64/mm.c b/src/arch/aarch64/mm.c
index c2a1840..623ba0c 100644
--- a/src/arch/aarch64/mm.c
+++ b/src/arch/aarch64/mm.c
@@ -16,6 +16,7 @@
 
 #include "hf/mm.h"
 
+#include "hf/arch/barriers.h"
 #include "hf/arch/cpu.h"
 
 #include "hf/dlog.h"
@@ -89,6 +90,15 @@
 
 /* clang-format on */
 
+#define tlbi(op)                               \
+	do {                                   \
+		__asm__ volatile("tlbi " #op); \
+	} while (0)
+#define tlbi_reg(op, reg)                                              \
+	do {                                                           \
+		__asm__ __volatile__("tlbi " #op ", %0" : : "r"(reg)); \
+	} while (0)
+
 /** Mask for the address bits of the pte. */
 #define PTE_ADDR_MASK \
 	(((UINT64_C(1) << 48) - 1) & ~((UINT64_C(1) << PAGE_BITS) - 1))
@@ -243,13 +253,13 @@
 	begin >>= 12;
 	end >>= 12;
 
-	__asm__ volatile("dsb ishst");
+	dsb(ishst);
 
 	for (it = begin; it < end; it += (UINT64_C(1) << (PAGE_BITS - 12))) {
-		__asm__("tlbi vae2is, %0" : : "r"(it));
+		tlbi_reg(vae2is, it);
 	}
 
-	__asm__ volatile("dsb ish");
+	dsb(ish);
 }
 
 /**
@@ -267,16 +277,15 @@
 	begin >>= 12;
 	end >>= 12;
 
-	__asm__ volatile("dsb ishst");
+	dsb(ishst);
 
 	for (it = begin; it < end; it += (UINT64_C(1) << (PAGE_BITS - 12))) {
-		__asm__("tlbi ipas2e1, %0" : : "r"(it));
+		tlbi_reg(ipas2e1, it);
 	}
 
-	__asm__ volatile(
-		"dsb ish\n"
-		"tlbi vmalle1is\n"
-		"dsb ish\n");
+	dsb(ish);
+	tlbi(vmalle1is);
+	dsb(ish);
 }
 
 /**
@@ -294,8 +303,7 @@
 		__asm__ volatile("dc cvac, %0" : : "r"(line_begin));
 		line_begin += line_size;
 	}
-
-	__asm__ volatile("dsb sy");
+	dsb(sy);
 }
 
 uint64_t arch_mm_mode_to_stage1_attrs(int mode)
@@ -555,10 +563,10 @@
 	    (3 << 28) | /* RES1 bits. */
 	    0;
 
-	__asm__ volatile("dsb sy");
-	__asm__ volatile("isb");
+	dsb(sy);
+	isb();
 	write_msr(sctlr_el2, v);
-	__asm__ volatile("isb");
+	isb();
 
 	return true;
 }
diff --git a/src/arch/aarch64/pl011/pl011.c b/src/arch/aarch64/pl011/pl011.c
index 9afc3b9..f98f36e 100644
--- a/src/arch/aarch64/pl011/pl011.c
+++ b/src/arch/aarch64/pl011/pl011.c
@@ -57,12 +57,10 @@
 		/* do nothing */
 	}
 
-	dmb();
-
-	/* Write the character out. */
+	/* Write the character out, force memory access ordering. */
+	memory_ordering_barrier();
 	io_write32(UARTDR, c);
-
-	dmb();
+	memory_ordering_barrier();
 
 	/* Wait until the UART is no longer busy. */
 	while (io_read32_mb(UARTFR) & UARTFR_BUSY) {