Implement saving and restoring of FP registers.

TFP temporarily disabled until exception handling code is in place.

Bug: 129131324
Change-Id: I0cffa8011e2613f801bfa6891f758c0f817619de
diff --git a/project/reference b/project/reference
index 6582198..9264cb7 160000
--- a/project/reference
+++ b/project/reference
@@ -1 +1 @@
-Subproject commit 6582198e313bf1b7965eb53ad966bdc47639e3a2
+Subproject commit 9264cb73132a3c0541097c92335753685fb041b2
diff --git a/src/arch/aarch64/cpu.c b/src/arch/aarch64/cpu.c
index a1a2c87..b7b5b99 100644
--- a/src/arch/aarch64/cpu.c
+++ b/src/arch/aarch64/cpu.c
@@ -68,7 +68,9 @@
 		       (1u << 10) | /* BSU bits set to inner-sh. */
 		       (3u << 13);  /* TWI, TWE bits. */
 
-		cptr |= (1u << 10); /* TFP, trap fp access. */
+		/* TODO: Trap fp access once handler logic is in place. */
+
+		/* TODO: Investigate fpexc32_el2 for 32bit EL0 support. */
 	}
 
 	r->lazy.hcr_el2 = hcr;
diff --git a/src/arch/aarch64/exceptions.S b/src/arch/aarch64/exceptions.S
index 031b82d..bca781d 100644
--- a/src/arch/aarch64/exceptions.S
+++ b/src/arch/aarch64/exceptions.S
@@ -312,6 +312,33 @@
 	mrs x28, vttbr_el2
 	str x28, [x1, #VCPU_LAZY + 16 * 14]
 
+	/*
+	 * Save floating point registers.
+	 *
+	 * Offset is too large, so start from a new base.
+	 */
+	add x2, x1, #VCPU_FREGS
+	stp q0, q1, [x2, #32 * 0]
+	stp q2, q3, [x2, #32 * 1]
+	stp q4, q5, [x2, #32 * 2]
+	stp q6, q7, [x2, #32 * 3]
+	stp q8, q9, [x2, #32 * 4]
+	stp q10, q11, [x2, #32 * 5]
+	stp q12, q13, [x2, #32 * 6]
+	stp q14, q15, [x2, #32 * 7]
+	stp q16, q17, [x2, #32 * 8]
+	stp q18, q19, [x2, #32 * 9]
+	stp q20, q21, [x2, #32 * 10]
+	stp q22, q23, [x2, #32 * 11]
+	stp q24, q25, [x2, #32 * 12]
+	stp q26, q27, [x2, #32 * 13]
+	stp q28, q29, [x2, #32 * 14]
+	/* Offest becomes too large, so move the base. */
+	stp q30, q31, [x2, #32 * 15]!
+	mrs x3, fpsr
+	mrs x4, fpcr
+	stp x3, x4, [x2, #32 * 1]
+
 	/* Save new vcpu pointer in non-volatile register. */
 	mov x19, x0
 
@@ -334,6 +361,34 @@
 	bl begin_restoring_state
 	mov x0, x19
 
+	/*
+	 * Restore floating point registers.
+	 *
+	 * Offset is too large, so start from a new base.
+	 */
+	add x2, x0, #VCPU_FREGS
+	ldp q0, q1, [x2, #32 * 0]
+	ldp q2, q3, [x2, #32 * 1]
+	ldp q4, q5, [x2, #32 * 2]
+	ldp q6, q7, [x2, #32 * 3]
+	ldp q8, q9, [x2, #32 * 4]
+	ldp q10, q11, [x2, #32 * 5]
+	ldp q12, q13, [x2, #32 * 6]
+	ldp q14, q15, [x2, #32 * 7]
+	ldp q16, q17, [x2, #32 * 8]
+	ldp q18, q19, [x2, #32 * 9]
+	ldp q20, q21, [x2, #32 * 10]
+	ldp q22, q23, [x2, #32 * 11]
+	ldp q24, q25, [x2, #32 * 12]
+	ldp q26, q27, [x2, #32 * 13]
+	ldp q28, q29, [x2, #32 * 14]
+	/* Offest becomes too large, so move the base. */
+	ldp q30, q31, [x2, #32 * 15]!
+	ldp x3, x4, [x2, #32 * 1]
+	msr fpsr, x3
+	/* TODO: Optimise by only performing expensive restore if changed. */
+	msr fpcr, x4
+
 	/* Restore lazy registers. */
 	ldp x24, x25, [x0, #VCPU_LAZY + 16 * 0]
 	msr vmpidr_el2, x24
diff --git a/src/arch/aarch64/hftest/BUILD.gn b/src/arch/aarch64/hftest/BUILD.gn
index 37e923d..952e2ae 100644
--- a/src/arch/aarch64/hftest/BUILD.gn
+++ b/src/arch/aarch64/hftest/BUILD.gn
@@ -58,3 +58,11 @@
     "state.c",
   ]
 }
+
+# Interact directly with registers.
+source_set("registers") {
+  testonly = true
+  sources = [
+    "registers.c",
+  ]
+}
diff --git a/src/arch/aarch64/hftest/cpu_entry.S b/src/arch/aarch64/hftest/cpu_entry.S
index 77b3af2..6461377 100644
--- a/src/arch/aarch64/hftest/cpu_entry.S
+++ b/src/arch/aarch64/hftest/cpu_entry.S
@@ -20,5 +20,10 @@
 	ldr x1, [x0]
 	mov sp, x1
 
+	/* Disable trapping floating point access in EL1. */
+	mov x1, #(0x3 << 20)
+	msr cpacr_el1, x1
+	isb
+
 	/* Jump to C entry point. */
 	b vm_cpu_entry
diff --git a/src/arch/aarch64/hftest/entry.S b/src/arch/aarch64/hftest/entry.S
index 99c086c..5fbe110 100644
--- a/src/arch/aarch64/hftest/entry.S
+++ b/src/arch/aarch64/hftest/entry.S
@@ -21,6 +21,11 @@
 	adr x30, kstack + 4096
 	mov sp, x30
 
+	/* Disable trapping floating point access in EL1. */
+	mov x30, #(0x3 << 20)
+	msr cpacr_el1, x30
+	isb
+
 	/* Call into C code. */
 	bl kmain
 
diff --git a/src/arch/aarch64/hftest/registers.c b/src/arch/aarch64/hftest/registers.c
new file mode 100644
index 0000000..15b3409
--- /dev/null
+++ b/src/arch/aarch64/hftest/registers.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2019 The Hafnium Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hf/arch/vm/registers.h"
+
+#define read_fp_register(name)                                   \
+	__extension__({                                          \
+		double __v;                                      \
+		__asm__ volatile("fmov %0, " #name : "=r"(__v)); \
+		__v;                                             \
+	})
+
+#define write_fp_register(name, value)                  \
+	__extension__({                                 \
+		__asm__ volatile("fmov " #name ", %0"   \
+				 :                      \
+				 : "r"((double)(value)) \
+				 : #name);              \
+	})
+
+#define move_fp_register(dest, source)                                    \
+	__extension__({                                                   \
+		__asm__ volatile("fmov " #dest ", " #source : : : #dest); \
+	})
+
+void fill_fp_registers(double value)
+{
+	write_fp_register(d0, value);
+	move_fp_register(d1, d0);
+	move_fp_register(d2, d0);
+	move_fp_register(d3, d0);
+	move_fp_register(d4, d0);
+	move_fp_register(d5, d0);
+	move_fp_register(d6, d0);
+	move_fp_register(d7, d0);
+	move_fp_register(d8, d0);
+	move_fp_register(d9, d0);
+	move_fp_register(d10, d0);
+	move_fp_register(d11, d0);
+	move_fp_register(d12, d0);
+	move_fp_register(d13, d0);
+	move_fp_register(d14, d0);
+	move_fp_register(d15, d0);
+	move_fp_register(d16, d0);
+	move_fp_register(d17, d0);
+	move_fp_register(d18, d0);
+	move_fp_register(d19, d0);
+	move_fp_register(d20, d0);
+	move_fp_register(d21, d0);
+	move_fp_register(d22, d0);
+	move_fp_register(d23, d0);
+	move_fp_register(d24, d0);
+	move_fp_register(d25, d0);
+	move_fp_register(d26, d0);
+	move_fp_register(d27, d0);
+	move_fp_register(d28, d0);
+	move_fp_register(d29, d0);
+	move_fp_register(d30, d0);
+	move_fp_register(d31, d0);
+}
+
+bool check_fp_register(double value)
+{
+	bool result = true;
+
+	result = result && (read_fp_register(d0) == value);
+	result = result && (read_fp_register(d1) == value);
+	result = result && (read_fp_register(d2) == value);
+	result = result && (read_fp_register(d3) == value);
+	result = result && (read_fp_register(d4) == value);
+	result = result && (read_fp_register(d5) == value);
+	result = result && (read_fp_register(d6) == value);
+	result = result && (read_fp_register(d7) == value);
+	result = result && (read_fp_register(d8) == value);
+	result = result && (read_fp_register(d9) == value);
+	result = result && (read_fp_register(d10) == value);
+	result = result && (read_fp_register(d11) == value);
+	result = result && (read_fp_register(d12) == value);
+	result = result && (read_fp_register(d13) == value);
+	result = result && (read_fp_register(d14) == value);
+	result = result && (read_fp_register(d15) == value);
+	result = result && (read_fp_register(d16) == value);
+	result = result && (read_fp_register(d17) == value);
+	result = result && (read_fp_register(d18) == value);
+	result = result && (read_fp_register(d19) == value);
+	result = result && (read_fp_register(d20) == value);
+	result = result && (read_fp_register(d21) == value);
+	result = result && (read_fp_register(d21) == value);
+	result = result && (read_fp_register(d23) == value);
+	result = result && (read_fp_register(d24) == value);
+	result = result && (read_fp_register(d25) == value);
+	result = result && (read_fp_register(d26) == value);
+	result = result && (read_fp_register(d27) == value);
+	result = result && (read_fp_register(d28) == value);
+	result = result && (read_fp_register(d29) == value);
+	result = result && (read_fp_register(d30) == value);
+	result = result && (read_fp_register(d31) == value);
+	return result;
+}
diff --git a/src/arch/aarch64/inc/hf/arch/types.h b/src/arch/aarch64/inc/hf/arch/types.h
index 25b4c39..71e91e0 100644
--- a/src/arch/aarch64/inc/hf/arch/types.h
+++ b/src/arch/aarch64/inc/hf/arch/types.h
@@ -16,10 +16,14 @@
 
 #pragma once
 
+#include <stdalign.h>
 #include <stdint.h>
 
+#include "hf/assert.h"
+
 #define PAGE_BITS 12
 #define PAGE_LEVEL_BITS 9
+#define FLOAT_REG_BYTES 16
 
 /** The type of a page table entry (PTE). */
 typedef uint64_t pte_t;
@@ -30,16 +34,28 @@
 /** Integer type large enough to hold a virtual address. */
 typedef uintptr_t uintvaddr_t;
 
-/** The integer large corresponding to the native register size. */
+/** The integer type corresponding to the native register size. */
 typedef uint64_t uintreg_t;
 
+/**
+ * The struct for storing a floating point register.
+ *
+ * 2 64-bit integers used to avoid need for FP support at this level.
+ */
+struct float_reg {
+	alignas(FLOAT_REG_BYTES) uint64_t low;
+	uint64_t high;
+};
+
+static_assert(sizeof(struct float_reg) == FLOAT_REG_BYTES,
+	      "Ensure float register type is 128 bits.");
+
 /** Type to represent the register state of a VM.  */
 struct arch_regs {
 	/* General purpose registers. */
 	uintreg_t r[31];
 	uintreg_t pc;
 	uintreg_t spsr;
-
 	struct {
 		uintreg_t vmpidr_el2;
 		uintreg_t csselr_el1;
@@ -73,4 +89,8 @@
 		uintreg_t cntv_cval_el0;
 		uintreg_t cntv_ctl_el0;
 	} lazy;
+	/* Floating point registers. */
+	struct float_reg fp[32];
+	uintreg_t fpsr;
+	uintreg_t fpcr;
 };
diff --git a/src/arch/aarch64/inc/hf/arch/vm/registers.h b/src/arch/aarch64/inc/hf/arch/vm/registers.h
new file mode 100644
index 0000000..f6305af
--- /dev/null
+++ b/src/arch/aarch64/inc/hf/arch/vm/registers.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2019 The Hafnium Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+
+void fill_fp_registers(double value);
+bool check_fp_register(double value);
diff --git a/src/arch/aarch64/offsets.c b/src/arch/aarch64/offsets.c
index abb8adf..f85cf05 100644
--- a/src/arch/aarch64/offsets.c
+++ b/src/arch/aarch64/offsets.c
@@ -30,3 +30,4 @@
 CHECK_OFFSET(CPU_STACK_BOTTOM, struct cpu, stack_bottom);
 CHECK_OFFSET(VCPU_REGS, struct vcpu, regs);
 CHECK_OFFSET(VCPU_LAZY, struct vcpu, regs.lazy);
+CHECK_OFFSET(VCPU_FREGS, struct vcpu, regs.fp);
diff --git a/src/arch/aarch64/offsets.h b/src/arch/aarch64/offsets.h
index a63d75b..d872e64 100644
--- a/src/arch/aarch64/offsets.h
+++ b/src/arch/aarch64/offsets.h
@@ -19,5 +19,6 @@
 /* These are checked in offset.c. */
 #define CPU_ID 0
 #define CPU_STACK_BOTTOM 8
-#define VCPU_REGS 24
+#define VCPU_REGS 32
 #define VCPU_LAZY (VCPU_REGS + 264)
+#define VCPU_FREGS (VCPU_LAZY + 248)
diff --git a/test/vmapi/primary_with_secondaries/BUILD.gn b/test/vmapi/primary_with_secondaries/BUILD.gn
index 868487e..15b190c 100644
--- a/test/vmapi/primary_with_secondaries/BUILD.gn
+++ b/test/vmapi/primary_with_secondaries/BUILD.gn
@@ -26,6 +26,7 @@
   sources = [
     "abort.c",
     "boot.c",
+    "floating_point.c",
     "interrupts.c",
     "mailbox.c",
     "memory_sharing.c",
@@ -37,6 +38,7 @@
   sources += [ "util.c" ]
 
   deps = [
+    "//src/arch/aarch64/hftest:registers",
     "//test/hftest:hftest_primary_vm",
   ]
 }
diff --git a/test/vmapi/primary_with_secondaries/floating_point.c b/test/vmapi/primary_with_secondaries/floating_point.c
new file mode 100644
index 0000000..9581a19
--- /dev/null
+++ b/test/vmapi/primary_with_secondaries/floating_point.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2019 The Hafnium Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hf/arch/std.h"
+#include "hf/arch/vm/registers.h"
+
+#include "hf/spci.h"
+
+#include "vmapi/hf/call.h"
+
+#include "hftest.h"
+#include "primary_with_secondary.h"
+#include "util.h"
+
+/**
+ * Test that floating point registers are saved and restored by
+ * filling them with one value here and a different value in the
+ * service.
+ */
+TEST(floating_point, fp_fill)
+{
+	const double first = 1.2;
+	const double second = -2.3;
+	struct hf_vcpu_run_return run_res;
+	struct mailbox_buffers mb = set_up_mailbox();
+
+	fill_fp_registers(first);
+	SERVICE_SELECT(SERVICE_VM0, "fp_fill", mb.send);
+	run_res = hf_vcpu_run(SERVICE_VM0, 0);
+	EXPECT_EQ(run_res.code, HF_VCPU_RUN_YIELD);
+	EXPECT_EQ(check_fp_register(first), true);
+
+	fill_fp_registers(second);
+	run_res = hf_vcpu_run(SERVICE_VM0, 0);
+	EXPECT_EQ(run_res.code, HF_VCPU_RUN_YIELD);
+	EXPECT_EQ(check_fp_register(second), true);
+}
diff --git a/test/vmapi/primary_with_secondaries/services/BUILD.gn b/test/vmapi/primary_with_secondaries/services/BUILD.gn
index 377ac03..62d4e1b 100644
--- a/test/vmapi/primary_with_secondaries/services/BUILD.gn
+++ b/test/vmapi/primary_with_secondaries/services/BUILD.gn
@@ -52,6 +52,20 @@
   ]
 }
 
+# Service for floating point register save/restore checks.
+source_set("floating_point") {
+  testonly = true
+  public_configs = [ "//test/hftest:hftest_config" ]
+
+  sources = [
+    "floating_point.c",
+  ]
+
+  deps = [
+    "//src/arch/aarch64/hftest:registers",
+  ]
+}
+
 # Services related to memory sharing.
 source_set("memory") {
   testonly = true
@@ -159,6 +173,7 @@
     ":check_state",
     ":echo",
     ":echo_with_notification",
+    ":floating_point",
     ":interruptible",
     ":memory",
     ":receive_block",
diff --git a/test/vmapi/primary_with_secondaries/services/floating_point.c b/test/vmapi/primary_with_secondaries/services/floating_point.c
new file mode 100644
index 0000000..2ba57c9
--- /dev/null
+++ b/test/vmapi/primary_with_secondaries/services/floating_point.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2019 The Hafnium Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hf/arch/std.h"
+#include "hf/arch/vm/registers.h"
+
+#include "hf/spci.h"
+
+#include "vmapi/hf/call.h"
+
+#include "hftest.h"
+
+TEST_SERVICE(fp_fill)
+{
+	const double value = 0.75;
+	fill_fp_registers(value);
+	hf_vcpu_yield();
+
+	ASSERT_TRUE(check_fp_register(value));
+	hf_vcpu_yield();
+}