Emit speculation barriers after ERETs

According to Linux commit 679db70801da9fda91d26caf13bf5b5ccc74e8e8
some ARM64 CPUs may speculate past an ERET. This could be used as part
of a side-channel attack.

To mitigate the issue, emit DSB/ISB barriers after every ERET.

Add a build step which dumps the generated ELF and check that this holds
for every ERET in the binary to prevent regressing.

Bug: 146490856
Change-Id: Idf1c2690637a7edb4a366d30fec26ed444069f5e
diff --git a/build/image/check_elf.py b/build/image/check_elf.py
new file mode 100644
index 0000000..cbc6454
--- /dev/null
+++ b/build/image/check_elf.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+#
+# Copyright 2019 The Hafnium Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/usr/bin/env python
+"""Check ELF file for assembly-level regressions.
+
+Objdumps the given ELF file and detects known assembly patterns, checking for
+regressions on bugs such as CPU erratas. Throws an exception if a broken pattern
+is detected.
+"""
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+
+HF_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+CLANG_ROOT = os.path.join(HF_ROOT, "prebuilts", "linux-x64", "clang")
+OBJDUMP = os.path.join(CLANG_ROOT, "bin", "llvm-objdump")
+
+def check_eret_speculation_barrier(objdump_stdout):
+	"""
+	Some ARM64 CPUs speculatively execute instructions after ERET.
+	Check that every ERET is followed by DSB NSH and ISB.
+	"""
+	found_eret = False
+
+	STATE_DEFAULT = 1
+	STATE_EXPECT_DSB_NSH = 2
+	STATE_EXPECT_ISB = 3
+
+	REGEX_ERET = re.compile(r"^\s*[0-9a-f]+:\s*e0 03 9f d6\s+eret$")
+	REGEX_DSB_NSH = re.compile(r"^\s*[0-9a-f]+:\s*9f 37 03 d5\s*dsb\s+nsh$")
+	REGEX_ISB = re.compile(r"^\s*[0-9a-f]+:\s*df 3f 03 d5\s+isb$")
+
+	state = STATE_DEFAULT
+	for line in objdump_stdout:
+		if state == STATE_DEFAULT:
+			if re.match(REGEX_ERET, line):
+				found_eret = True
+				state = STATE_EXPECT_DSB_NSH
+		elif state == STATE_EXPECT_DSB_NSH:
+			if re.match(REGEX_DSB_NSH, line):
+				state = STATE_EXPECT_ISB
+			else:
+				raise Exception("ERET not followed by DSB NSH")
+		elif state == STATE_EXPECT_ISB:
+			if re.match(REGEX_ISB, line):
+				state = STATE_DEFAULT
+			else:
+				raise Exception("ERET not followed by ISB")
+
+	# Ensure that at least one instance was found, otherwise the regexes are
+	# probably wrong.
+	if not found_eret:
+		raise Exception("Could not find any ERET instructions")
+
+def Main():
+	parser = argparse.ArgumentParser()
+	parser.add_argument("input_elf",
+		help="ELF file to analyze")
+	parser.add_argument("stamp_file",
+		help="file to be touched if successful")
+	args = parser.parse_args()
+
+	objdump_stdout = subprocess.check_output([
+		OBJDUMP, "-d", args.input_elf ])
+	objdump_stdout = objdump_stdout.splitlines()
+
+	check_eret_speculation_barrier(objdump_stdout)
+
+	# Touch `stamp_file`.
+	with open(args.stamp_file, "w"):
+		pass
+
+	return 0
+
+if __name__ == "__main__":
+	sys.exit(Main())
diff --git a/build/image/image.gni b/build/image/image.gni
index f116cb6..7860784 100644
--- a/build/image/image.gni
+++ b/build/image/image.gni
@@ -25,8 +25,15 @@
   }
   output_root += invoker.image_name
 
+  file_root = "${root_out_dir}/${output_root}"
+  elf_file = "${file_root}.elf"
+  bin_file = "${file_root}.bin"
+
+  elf_target = "${target_name}__elf"
+  checked_elf_target = "${target_name}__checked_elf"
+
   # Link objects together
-  executable("${target_name}__elf") {
+  executable(elf_target) {
     forward_variables_from(invoker,
                            [
                              "cflags",
@@ -47,20 +54,46 @@
       "-T",
       rebase_path("//build/image/image.ld"),
     ]
+    visibility = [
+      ":${checked_elf_target}",
+      ":${invoker.target_name}",
+    ]
+  }
+
+  # Analyze the generated ELF file and check that assembly-level fixes, e.g.
+  # for CPU errata, have been properly applied.
+  action(checked_elf_target) {
+    forward_variables_from(invoker, [ "testonly" ])
+    stamp_file = elf_file + "_check.stamp"
+
+    script = "//build/image/check_elf.py"
+    deps = [
+      ":${elf_target}",
+    ]
+    args = [
+      rebase_path(elf_file),
+      rebase_path(stamp_file),
+    ]
+    outputs = [
+      stamp_file,
+    ]
     visibility = [ ":${invoker.target_name}" ]
   }
 
   action(target_name) {
     forward_variables_from(invoker, [ "testonly" ])
 
-    file_root = "${root_out_dir}/${output_root}"
-    elf_file = "${file_root}.elf"
-    bin_file = "${file_root}.bin"
-
     script = "//build/image/convert_to_binary.py"
-    deps = [
-      ":${target_name}__elf",
-    ]
+
+    if (defined(invoker.check_binary) && invoker.check_binary == true) {
+      deps = [
+        ":${checked_elf_target}",
+      ]
+    } else {
+      deps = [
+        ":${elf_target}",
+      ]
+    }
     args = [
       "--input",
       rebase_path(elf_file),
@@ -87,6 +120,10 @@
                              "testonly",
                            ])
     image_name = target_name
+
+    # Perform checks on the generated binary to prevent regressing on some
+    # classes of bugs, typically CPU erratas.
+    check_binary = true
   }
 }
 
diff --git a/src/arch/aarch64/exception_macros.S b/src/arch/aarch64/exception_macros.S
index ef820b7..fee454a 100644
--- a/src/arch/aarch64/exception_macros.S
+++ b/src/arch/aarch64/exception_macros.S
@@ -15,6 +15,26 @@
  */
 
 /**
+ * From Linux commit 679db70801da9fda91d26caf13bf5b5ccc74e8e8:
+ * "Some CPUs can speculate past an ERET instruction and potentially perform
+ * speculative accesses to memory before processing the exception return.
+ * Since the register state is often controlled by a lower privilege level
+ * at the point of an ERET, this could potentially be used as part of a
+ * side-channel attack."
+ *
+ * This macro emits a speculation barrier after the ERET to prevent the CPU
+ * from speculating past the exception return.
+ *
+ * ARMv8.5 introduces a dedicated SB speculative barrier instruction.
+ * Use a DSB/ISB pair on older platforms.
+ */
+.macro eret_with_sb
+	eret
+	dsb	nsh
+	isb
+.endm
+
+/**
  * Saves the volatile registers onto the stack. This currently takes 14
  * instructions, so it can be used in exception handlers with 18 instructions
  * left, 2 of which in the same cache line (assuming a 16-byte cache line).
@@ -81,16 +101,18 @@
  * Switching to SPx and calling the C handler takes 16 instructions, so it's not
  * possible to add a branch to a common exit path without going into the next
  * cache line (assuming 16-byte cache lines). Additionally, to restore and
- * return we need an additional 16 instructions, so we implement the whole
- * handler within the allotted 32 instructions.
+ * return we need an additional 16 instructions, so we could implement the whole
+ * handler within the allotted 32 instructions. However, since we want to emit
+ * a speculation barrier after each ERET, we are forced to move the ERET to
+ * a shared exit path.
  */
-.macro current_exception_sp0 elx:req handler:req
+.macro current_exception_sp0 elx:req handler:req eret_label:req
 	msr spsel, #1
 	save_volatile_to_stack \elx
 	bl \handler
 	restore_volatile_from_stack \elx
 	msr spsel, #0
-	eret
+	b \eret_label
 .endm
 
 /**
diff --git a/src/arch/aarch64/hftest/exceptions.S b/src/arch/aarch64/hftest/exceptions.S
index b54e4d2..0203110 100644
--- a/src/arch/aarch64/hftest/exceptions.S
+++ b/src/arch/aarch64/hftest/exceptions.S
@@ -25,7 +25,7 @@
 
 .balign 0x80
 irq_cur_sp0:
-	current_exception_sp0 el1 irq_current
+	current_exception_sp0 el1 irq_current exception_handler_return
 
 .balign 0x80
 fiq_cur_sp0:
@@ -113,8 +113,10 @@
 skip_elr:
 	/* Restore register spsr_el1 using x1 as scratch. */
 	ldr x1, [sp, #8 * 23]
-        msr spsr_el1, x1
+	msr spsr_el1, x1
 
 	/* Restore x0 & x1, and release stack space. */
 	ldp x0, x1, [sp], #8 * 24
-	eret
+
+exception_handler_return:
+	eret_with_sb
diff --git a/src/arch/aarch64/hypervisor/exceptions.S b/src/arch/aarch64/hypervisor/exceptions.S
index fe89da8..17a5b63 100644
--- a/src/arch/aarch64/hypervisor/exceptions.S
+++ b/src/arch/aarch64/hypervisor/exceptions.S
@@ -518,7 +518,7 @@
 	/* Restore x0..x3, which we have used as scratch before. */
 	ldp x2, x3, [x0, #VCPU_REGS + 8 * 2]
 	ldp x0, x1, [x0, #VCPU_REGS + 8 * 0]
-	eret
+	eret_with_sb
 
 .balign 0x40
 /**
@@ -526,4 +526,4 @@
  */
 restore_from_stack_and_return:
 	restore_volatile_from_stack el2
-	eret
+	eret_with_sb