hftest: Write-back cpu_start_state before starting a new vCPU

New vCPU is started with caching disabled but it is meant to read data
provided by the vCPU which started it and had caching enabled. Because
the data is needed before it is possible to set up the MMU and enable
data caching, the original vCPU must ensure that the data is written
back to memory prior to spawning the new vCPU.

This patch restructures the way hftests start new CPUs to accommodate
this requirement. A new architecture-agnostic power_mgmt build target is
created which handles the high-level logic of this process. The
power_mgmt target in src/arch is reduced to the thinnest layer possible,
merely performing an SMC call to spawn a new vCPU and then setting up
the stack pointer and jumping to the high-level entry point when the
vCPU is powered up.

It is now the task of the high-level power_mgmt module to ensure the
state struct is properly written back to memory and that it does not get
freed until the new vCPU is finished both low- and high-level
initialization.

Bug: 141103913
Test: smp.two_vcpus on RPi4
Change-Id: Ie77c6b5ad0aeecaceb0b56623908353843d03c3e
diff --git a/src/arch/aarch64/hftest/cpu_entry.S b/src/arch/aarch64/hftest/cpu_entry.S
index 702e84b..726b042 100644
--- a/src/arch/aarch64/hftest/cpu_entry.S
+++ b/src/arch/aarch64/hftest/cpu_entry.S
@@ -14,16 +14,23 @@
  * limitations under the License.
  */
 
-.global vm_cpu_entry_raw
-vm_cpu_entry_raw:
-	/* Initialise stack from the cpu_start_state struct. */
-	ldr x1, [x0]
-	mov sp, x1
-
+.global vm_cpu_entry
+vm_cpu_entry:
 	/* Disable trapping floating point access in EL1. */
 	mov x1, #(0x3 << 20)
 	msr cpacr_el1, x1
 	isb
 
-	/* Jump to C entry point. */
-	b vm_cpu_entry
+	/* Initialise stack from the cpu_start_state struct. */
+	ldr x1, [x0]
+	mov sp, x1
+
+	/* Load entry function pointer and its argument. */
+	ldr x1, [x0, 8]
+	ldr x0, [x0, 16]
+
+	/* Branch to entry function. */
+	blr x1
+
+	/* Entry function should not return, but if it does, spin. */
+	b .
diff --git a/src/arch/aarch64/hftest/power_mgmt.c b/src/arch/aarch64/hftest/power_mgmt.c
index 98977f5..ab2332e 100644
--- a/src/arch/aarch64/hftest/power_mgmt.c
+++ b/src/arch/aarch64/hftest/power_mgmt.c
@@ -16,7 +16,6 @@
 
 #include "hf/arch/vm/power_mgmt.h"
 
-#include "hf/spinlock.h"
 #include "hf/static_assert.h"
 
 #include "vmapi/hf/call.h"
@@ -25,73 +24,31 @@
 #include "smc.h"
 
 /**
- * Holds temporary state used to set up the environment on which CPUs will
- * start executing.
+ * Starts the CPU with the given ID. It will set the stack pointer according to
+ * the provided `state` and jump to the entry point with the given argument
+ * specified in it.
  *
- * vm_cpu_entry_raw requires that the first field of cpu_start_state be the
- * initial stack pointer.
+ * Note: The caller of this function must guarantee that the contents of `state`
+ * do not change until the new CPU has branched to the given entry point, and
+ * that it was written-back to memory (that it is not waiting in a data cache)
+ * because the new CPU is started with caching disabled.
  */
-struct cpu_start_state {
-	uintptr_t initial_sp;
-	void (*entry)(uintptr_t arg);
-	uintreg_t arg;
-	struct spinlock lock;
-};
-
-/**
- * Releases the given cpu_start_state struct by releasing its lock, then calls
- * the entry point specified by the caller of cpu_start.
- */
-void vm_cpu_entry(struct cpu_start_state *s)
+bool arch_cpu_start(uintptr_t id, struct arch_cpu_start_state *state)
 {
-	struct cpu_start_state local = *(volatile struct cpu_start_state *)s;
-
-	sl_unlock(&s->lock);
-
-	local.entry(local.arg);
-
-	/* Turn off CPU if the entry point ever returns. */
-	cpu_stop();
-}
-
-/**
- * Starts the CPU with the given ID. It will start at the provided entry point
- * with the provided argument.
- */
-bool cpu_start(uintptr_t id, void *stack, size_t stack_size,
-	       void (*entry)(uintptr_t arg), uintptr_t arg)
-{
-	void vm_cpu_entry_raw(uintptr_t arg);
-	struct cpu_start_state s;
+	void vm_cpu_entry(uintptr_t arg);
 	smc_res_t smc_res;
 
-	/* Initialise the temporary state we'll hold on the stack. */
-	sl_init(&s.lock);
-	sl_lock(&s.lock);
-	s.initial_sp = (uintptr_t)stack + stack_size;
-	s.entry = entry;
-	s.arg = arg;
-
 	/* Try to start the CPU. */
-	smc_res = smc32(PSCI_CPU_ON, id, (size_t)&vm_cpu_entry_raw, (size_t)&s,
-			0, 0, 0, SMCCC_CALLER_HYPERVISOR);
-	if (smc_res.res0 != PSCI_RETURN_SUCCESS) {
-		return false;
-	}
+	smc_res = smc64(PSCI_CPU_ON, id, (uintptr_t)&vm_cpu_entry,
+			(uintptr_t)state, 0, 0, 0, SMCCC_CALLER_HYPERVISOR);
 
-	/*
-	 * Wait for the starting cpu to release the spin lock, which indicates
-	 * that it won't touch the state we hold on the stack anymore.
-	 */
-	sl_lock(&s.lock);
-
-	return true;
+	return smc_res.res0 == PSCI_RETURN_SUCCESS;
 }
 
 /**
  * Stops the current CPU.
  */
-noreturn void cpu_stop(void)
+noreturn void arch_cpu_stop(void)
 {
 	smc32(PSCI_CPU_OFF, 0, 0, 0, 0, 0, 0, SMCCC_CALLER_HYPERVISOR);
 	for (;;) {
@@ -109,7 +66,7 @@
 /**
  * Returns the power status of the given CPU.
  */
-enum power_status cpu_status(cpu_id_t cpu_id)
+enum power_status arch_cpu_status(cpu_id_t cpu_id)
 {
 	uint32_t lowest_affinity_level = 0;
 	smc_res_t smc_res;
diff --git a/src/arch/aarch64/inc/hf/arch/vm/power_mgmt.h b/src/arch/aarch64/inc/hf/arch/vm/power_mgmt.h
index be8b9b4..41ebe49 100644
--- a/src/arch/aarch64/inc/hf/arch/vm/power_mgmt.h
+++ b/src/arch/aarch64/inc/hf/arch/vm/power_mgmt.h
@@ -29,10 +29,21 @@
 	POWER_STATUS_ON_PENDING,
 };
 
+/**
+ * Holds temporary state used to set up the environment on which CPUs will
+ * start executing.
+ *
+ * vm_cpu_entry() depends on the layout of this struct.
+ */
+struct arch_cpu_start_state {
+	uintptr_t initial_sp;
+	void (*entry)(uintreg_t arg);
+	uintreg_t arg;
+};
+
+bool arch_cpu_start(uintptr_t id, struct arch_cpu_start_state *s);
+
+noreturn void arch_cpu_stop(void);
+enum power_status arch_cpu_status(cpu_id_t cpu_id);
+
 noreturn void arch_power_off(void);
-
-bool cpu_start(uintptr_t id, void *stack, size_t stack_size,
-	       void (*entry)(uintptr_t arg), uintptr_t arg);
-
-noreturn void cpu_stop(void);
-enum power_status cpu_status(cpu_id_t cpu_id);
diff --git a/test/hftest/BUILD.gn b/test/hftest/BUILD.gn
index 9a5b452..e23e817 100644
--- a/test/hftest/BUILD.gn
+++ b/test/hftest/BUILD.gn
@@ -38,11 +38,12 @@
   public_configs = [ ":hftest_config" ]
 
   sources = [
-    "hftest_service.c",
+    "service.c",
   ]
 
   deps = [
     ":mm",
+    ":power_mgmt",
     "//src:dlog",
     "//src:memiter",
     "//src:panic",
@@ -93,6 +94,7 @@
   deps = [
     ":common",
     ":mm",
+    ":power_mgmt",
     "//src:dlog",
     "//src:fdt",
     "//src:memiter",
@@ -109,7 +111,7 @@
   testonly = true
   public_configs = [ ":hftest_config" ]
   sources = [
-    "hftest_common.c",
+    "common.c",
   ]
   deps = [
     "//src:fdt_handler",
@@ -125,7 +127,7 @@
   public_configs = [ ":hftest_config" ]
 
   sources = [
-    "hftest_mm.c",
+    "mm.c",
   ]
 
   deps = [
@@ -135,3 +137,18 @@
     "//src/arch/${plat_arch}/hftest:mm",
   ]
 }
+
+source_set("power_mgmt") {
+  testonly = true
+
+  public_configs = [ ":hftest_config" ]
+
+  sources = [
+    "power_mgmt.c",
+  ]
+
+  deps = [
+    ":mm",
+    "//src/arch/${plat_arch}/hftest:power_mgmt",
+  ]
+}
diff --git a/test/hftest/hftest_common.c b/test/hftest/common.c
similarity index 99%
rename from test/hftest/hftest_common.c
rename to test/hftest/common.c
index 8c8d334..81fc516 100644
--- a/test/hftest/hftest_common.c
+++ b/test/hftest/common.c
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include "hftest_common.h"
-
 #include "hf/arch/vm/power_mgmt.h"
 
 #include "hf/boot_params.h"
@@ -24,6 +22,7 @@
 #include "hf/std.h"
 
 #include "hftest.h"
+#include "hftest_common.h"
 
 HFTEST_ENABLE();
 
diff --git a/test/hftest/inc/hftest.h b/test/hftest/inc/hftest.h
index 62eb84c..f8d388a 100644
--- a/test/hftest/inc/hftest.h
+++ b/test/hftest/inc/hftest.h
@@ -96,6 +96,8 @@
 /** Adds stage-1 identity mapping for pages covering bytes [base, base+size). */
 void hftest_mm_identity_map(const void *base, size_t size, int mode);
 
+void hftest_mm_vcpu_init(void);
+
 /**
  * Starts the CPU with the given ID. It will start at the provided entry point
  * with the provided argument. It is a wrapper around the generic cpu_start()
diff --git a/test/hftest/hftest_mm.c b/test/hftest/mm.c
similarity index 73%
rename from test/hftest/hftest_mm.c
rename to test/hftest/mm.c
index c4f908d..988c26e 100644
--- a/test/hftest/hftest_mm.c
+++ b/test/hftest/mm.c
@@ -15,7 +15,6 @@
  */
 
 #include "hf/arch/vm/mm.h"
-#include "hf/arch/vm/power_mgmt.h"
 
 #include "hftest.h"
 
@@ -72,35 +71,7 @@
 	}
 }
 
-struct cpu_start_state {
-	void (*entry)(uintptr_t arg);
-	uintreg_t arg;
-	struct spinlock lock;
-};
-
-static void cpu_entry(uintptr_t arg)
+void hftest_mm_vcpu_init(void)
 {
-	struct cpu_start_state *s = (struct cpu_start_state *)arg;
-	struct cpu_start_state local = *s;
-
-	sl_unlock(&s->lock);
 	arch_vm_mm_enable(ptable.root);
-	local.entry(local.arg);
-}
-
-bool hftest_cpu_start(uintptr_t id, void *stack, size_t stack_size,
-		      void (*entry)(uintptr_t arg), uintptr_t arg)
-{
-	struct cpu_start_state s =
-		(struct cpu_start_state){.entry = entry, .arg = arg};
-
-	sl_init(&s.lock);
-	sl_lock(&s.lock);
-	if (!cpu_start(id, stack, stack_size, &cpu_entry, (uintptr_t)&s)) {
-		return false;
-	}
-
-	/* Wait until cpu_entry unlocks the lock before freeing stack memory. */
-	sl_lock(&s.lock);
-	return true;
 }
diff --git a/test/hftest/power_mgmt.c b/test/hftest/power_mgmt.c
new file mode 100644
index 0000000..576c6bd
--- /dev/null
+++ b/test/hftest/power_mgmt.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2019 The Hafnium Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hf/arch/vm/power_mgmt.h"
+
+#include "hf/arch/mm.h"
+
+#include "hf/spinlock.h"
+
+#include "hftest.h"
+
+struct cpu_start_state {
+	void (*entry)(uintptr_t arg);
+	uintreg_t arg;
+	struct spinlock lock;
+};
+
+static noreturn void cpu_entry(uintptr_t arg)
+{
+	struct cpu_start_state *s = (struct cpu_start_state *)arg;
+	struct cpu_start_state s_copy;
+
+	/*
+	 * Initialize memory and enable caching. Must be the first thing we do.
+	 */
+	hftest_mm_vcpu_init();
+
+	/* Make a copy of the cpu_start_state struct. */
+	s_copy = *s;
+
+	/* Inform cpu_start() that the state struct memory can now be freed. */
+	sl_unlock(&s->lock);
+
+	/* Call the given entry function with the given argument. */
+	s_copy.entry(s_copy.arg);
+
+	/* If the entry function returns, turn off the CPU. */
+	arch_cpu_stop();
+}
+
+bool hftest_cpu_start(uintptr_t id, void *stack, size_t stack_size,
+		      void (*entry)(uintptr_t arg), uintptr_t arg)
+{
+	struct cpu_start_state s;
+	struct arch_cpu_start_state s_arch;
+
+	/*
+	 * Config for arch_cpu_start() which will start a new CPU and
+	 * immediately jump to cpu_entry(). This function must guarantee that
+	 * the state struct is not be freed until cpu_entry() is called.
+	 */
+	s_arch.initial_sp = (uintptr_t)stack + stack_size;
+	s_arch.entry = cpu_entry;
+	s_arch.arg = (uintptr_t)&s;
+
+	/*
+	 * Write back the `cpu_start_state` struct because the new CPU will be
+	 * started without caching enabled and will need the data early on.
+	 */
+	arch_mm_write_back_dcache(&s_arch, sizeof(s_arch));
+
+	if ((s_arch.initial_sp % STACK_ALIGN) != 0) {
+		HFTEST_FAIL(true,
+			    "Stack pointer of new vCPU not properly aligned.");
+	}
+
+	/*
+	 * Config for cpu_entry(). Its job is to initialize memory and call the
+	 * provided entry point with the provided argument.
+	 */
+	s.entry = entry;
+	s.arg = arg;
+	sl_init(&s.lock);
+
+	/*
+	 * Lock the cpu_start_state struct which will be unlocked once
+	 * cpu_entry() does not need its content anymore. This simultaneously
+	 * protects the arch_cpu_start_state struct which must not be freed
+	 * before cpu_entry() is called.
+	 */
+	sl_lock(&s.lock);
+
+	/* Try to start the given CPU. */
+	if (!arch_cpu_start(id, &s_arch)) {
+		return false;
+	}
+
+	/*
+	 * Wait until cpu_entry() unlocks the cpu_start_state lock before
+	 * freeing stack memory.
+	 */
+	sl_lock(&s.lock);
+	return true;
+}
diff --git a/test/hftest/hftest_service.c b/test/hftest/service.c
similarity index 100%
rename from test/hftest/hftest_service.c
rename to test/hftest/service.c
diff --git a/test/vmapi/primary_with_secondaries/services/smp.c b/test/vmapi/primary_with_secondaries/services/smp.c
index 44dd505..5ad31d0 100644
--- a/test/vmapi/primary_with_secondaries/services/smp.c
+++ b/test/vmapi/primary_with_secondaries/services/smp.c
@@ -57,8 +57,8 @@
 	ASSERT_EQ(arg, ARG_VALUE);
 
 	/* Check that vCPU statuses are as expected. */
-	ASSERT_EQ(cpu_status(0), POWER_STATUS_ON);
-	ASSERT_EQ(cpu_status(1), POWER_STATUS_ON);
+	ASSERT_EQ(arch_cpu_status(0), POWER_STATUS_ON);
+	ASSERT_EQ(arch_cpu_status(1), POWER_STATUS_ON);
 
 	dlog("Secondary second vCPU started.\n");
 	send_message("vCPU 1", sizeof("vCPU 1"));
@@ -68,8 +68,8 @@
 TEST_SERVICE(smp)
 {
 	/* Check that vCPU statuses are as expected. */
-	ASSERT_EQ(cpu_status(0), POWER_STATUS_ON);
-	ASSERT_EQ(cpu_status(1), POWER_STATUS_OFF);
+	ASSERT_EQ(arch_cpu_status(0), POWER_STATUS_ON);
+	ASSERT_EQ(arch_cpu_status(1), POWER_STATUS_OFF);
 
 	/* Start second vCPU. */
 	dlog("Secondary starting second vCPU.\n");
@@ -78,8 +78,8 @@
 	dlog("Secondary started second vCPU.\n");
 
 	/* Check that vCPU statuses are as expected. */
-	ASSERT_EQ(cpu_status(0), POWER_STATUS_ON);
-	ASSERT_EQ(cpu_status(1), POWER_STATUS_ON);
+	ASSERT_EQ(arch_cpu_status(0), POWER_STATUS_ON);
+	ASSERT_EQ(arch_cpu_status(1), POWER_STATUS_ON);
 
 	send_message("vCPU 0", sizeof("vCPU 0"));
 }