Ensure that register state is saved before it's used.
Without this there is a race where a physical CPU is still saving the
contents of registers into a vCPU buffer while another physical CPU
is already trying to restore the state.
Also adding test case that fails without this fix and succeeds with it.
Change-Id: I0badfd61f12385a3d8dc8d09eb298cfe56f2a415
diff --git a/inc/hf/cpu.h b/inc/hf/cpu.h
index 76d71d6..b477dec 100644
--- a/inc/hf/cpu.h
+++ b/inc/hf/cpu.h
@@ -54,6 +54,11 @@
uint32_t interrupt_pending[HF_NUM_INTIDS / INTERRUPT_REGISTER_BITS];
};
+struct retval_state {
+ uintptr_t value;
+ bool force;
+};
+
struct vcpu {
struct spinlock lock;
enum vcpu_state state;
@@ -62,6 +67,20 @@
struct vcpu *mailbox_next;
struct arch_regs regs;
struct interrupts interrupts;
+
+ /*
+ * The following field is used to force a return value to be set the
+ * next time a vCPU belonging to a secondary VM runs. For primary VMs,
+ * 'regs' can be set directly.
+ */
+ struct retval_state retval;
+
+ /*
+ * Determine whether the 'regs' field is available for use. This is set
+ * to false when a vCPU is about to run on a physical CPU, and is set
+ * back to true when it is descheduled.
+ */
+ bool regs_available;
};
/* TODO: Update alignment such that cpus are in different cache lines. */
diff --git a/kokoro/ubuntu/test.sh b/kokoro/ubuntu/test.sh
index be49777..14f67e4 100755
--- a/kokoro/ubuntu/test.sh
+++ b/kokoro/ubuntu/test.sh
@@ -44,3 +44,4 @@
$HFTEST hafnium --initrd gicv3_test
$HFTEST hafnium --initrd primary_only_test
$HFTEST hafnium --initrd primary_with_secondaries_test
+$HFTEST hafnium --initrd primary_with_state_secondary_test
diff --git a/src/api.c b/src/api.c
index 4ab7416..6be8702 100644
--- a/src/api.c
+++ b/src/api.c
@@ -129,6 +129,66 @@
}
/**
+ * This function is called by the architecture-specific context switching
+ * function to indicate that register state for the given vcpu has been saved
+ * and can therefore be used by other pcpus.
+ */
+void api_regs_state_saved(struct vcpu *vcpu)
+{
+ sl_lock(&vcpu->lock);
+ vcpu->regs_available = true;
+ sl_unlock(&vcpu->lock);
+}
+
+/**
+ * Prepares the vcpu to run by updating its state and fetching whether a return
+ * value needs to be forced onto the vCPU.
+ */
+static bool api_vcpu_prepare_run(const struct vcpu *current, struct vcpu *vcpu,
+ struct retval_state *vcpu_retval)
+{
+ bool ret;
+
+ sl_lock(&vcpu->lock);
+ if (vcpu->state != vcpu_state_ready) {
+ ret = false;
+ goto out;
+ }
+
+ vcpu->cpu = current->cpu;
+ vcpu->state = vcpu_state_running;
+
+ /* Fetch return value to inject into vCPU if there is one. */
+ *vcpu_retval = vcpu->retval;
+ if (vcpu_retval->force) {
+ vcpu->retval.force = false;
+ }
+
+ /*
+ * Wait until the registers become available. Care must be taken when
+ * looping on this: it shouldn't be done while holding other locks
+ * to avoid deadlocks.
+ */
+ while (!vcpu->regs_available) {
+ sl_unlock(&vcpu->lock);
+ sl_lock(&vcpu->lock);
+ }
+
+ /*
+ * Mark the registers as unavailable now that we're about to reflect
+ * them onto the real registers. This will also prevent another physical
+ * CPU from trying to read these registers.
+ */
+ vcpu->regs_available = false;
+
+ ret = true;
+
+out:
+ sl_unlock(&vcpu->lock);
+ return ret;
+}
+
+/**
* Runs the given vcpu of the given vm.
*/
struct hf_vcpu_run_return api_vcpu_run(uint32_t vm_id, uint32_t vcpu_idx,
@@ -137,6 +197,7 @@
{
struct vm *vm;
struct vcpu *vcpu;
+ struct retval_state vcpu_retval;
struct hf_vcpu_run_return ret = {
.code = HF_VCPU_RUN_WAIT_FOR_INTERRUPT,
};
@@ -162,18 +223,20 @@
goto out;
}
+ /* Update state if allowed. */
vcpu = &vm->vcpus[vcpu_idx];
-
- sl_lock(&vcpu->lock);
- if (vcpu->state != vcpu_state_ready) {
+ if (!api_vcpu_prepare_run(current, vcpu, &vcpu_retval)) {
ret.code = HF_VCPU_RUN_WAIT_FOR_INTERRUPT;
- } else {
- vcpu->cpu = current->cpu;
- vcpu->state = vcpu_state_running;
- *next = vcpu;
- ret.code = HF_VCPU_RUN_YIELD;
+ goto out;
}
- sl_unlock(&vcpu->lock);
+
+ *next = vcpu;
+ ret.code = HF_VCPU_RUN_YIELD;
+
+ /* Update return value if one was injected. */
+ if (vcpu_retval.force) {
+ arch_regs_set_retval(&vcpu->regs, vcpu_retval.value);
+ }
out:
return ret;
@@ -353,12 +416,12 @@
to_vcpu->state = vcpu_state_ready;
/* Return from HF_MAILBOX_RECEIVE. */
- arch_regs_set_retval(&to_vcpu->regs,
- hf_mailbox_receive_return_encode((
- struct hf_mailbox_receive_return){
- .vm_id = to->mailbox.recv_from_id,
- .size = size,
- }));
+ to_vcpu->retval.force = true;
+ to_vcpu->retval.value = hf_mailbox_receive_return_encode(
+ (struct hf_mailbox_receive_return){
+ .vm_id = to->mailbox.recv_from_id,
+ .size = size,
+ });
sl_unlock(&to_vcpu->lock);
diff --git a/src/arch/aarch64/exceptions.S b/src/arch/aarch64/exceptions.S
index db0cec0..0f06bdd 100644
--- a/src/arch/aarch64/exceptions.S
+++ b/src/arch/aarch64/exceptions.S
@@ -308,8 +308,15 @@
mrs x26, vttbr_el2
str x26, [x1, #VCPU_LAZY + 16 * 13]
- /* Intentional fallthrough. */
+ /* Save new vcpu pointer in non-volatile register. */
+ mov x19, x0
+ /* Inform the arch-independent sections that regs have been saved. */
+ mov x0, x1
+ bl api_regs_state_saved
+ mov x0, x19
+
+ /* Intentional fallthrough. */
.globl vcpu_restore_all_and_run
vcpu_restore_all_and_run:
/* Update pointer to current vcpu. */
diff --git a/src/arch/aarch64/hftest/BUILD.gn b/src/arch/aarch64/hftest/BUILD.gn
index dca75b0..b8fdc48 100644
--- a/src/arch/aarch64/hftest/BUILD.gn
+++ b/src/arch/aarch64/hftest/BUILD.gn
@@ -30,6 +30,7 @@
# Shutdown the system or exit emulation, start/stop CPUs.
source_set("power_mgmt") {
+ testonly = true
sources = [
"cpu_entry.S",
"power_mgmt.c",
@@ -40,7 +41,7 @@
]
}
-# Exception handlers for interrupts
+# Exception handlers for interrupts and gicv3 el1 driver.
source_set("interrupts_gicv3") {
testonly = true
sources = [
@@ -48,3 +49,11 @@
"interrupts_gicv3.c",
]
}
+
+# Get/set CPU state.
+source_set("state") {
+ testonly = true
+ sources = [
+ "state.c",
+ ]
+}
diff --git a/src/arch/aarch64/hftest/state.c b/src/arch/aarch64/hftest/state.c
new file mode 100644
index 0000000..219869a
--- /dev/null
+++ b/src/arch/aarch64/hftest/state.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2018 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hf/arch/vm/state.h"
+
+#include "../msr.h"
+
+void per_cpu_ptr_set(uintptr_t v)
+{
+ write_msr(tpidr_el1, v);
+}
+
+uintptr_t per_cpu_ptr_get(void)
+{
+ return read_msr(tpidr_el1);
+}
diff --git a/src/arch/aarch64/inc/hf/arch/cpu.h b/src/arch/aarch64/inc/hf/arch/cpu.h
index 175551f..11f07d7 100644
--- a/src/arch/aarch64/inc/hf/arch/cpu.h
+++ b/src/arch/aarch64/inc/hf/arch/cpu.h
@@ -73,8 +73,7 @@
}
static inline void arch_regs_init(struct arch_regs *r, bool is_primary,
- uint64_t vmid, paddr_t table, ipaddr_t pc,
- uintreg_t arg)
+ uint64_t vmid, paddr_t table, uint32_t index)
{
uintreg_t hcr;
uintreg_t cptr;
@@ -108,18 +107,32 @@
r->lazy.cptr_el2 = cptr;
r->lazy.cnthctl_el2 = cnthctl;
r->lazy.vttbr_el2 = pa_addr(table) | (vmid << 48);
+ r->lazy.vmpidr_el2 = index;
/* TODO: Use constant here. */
r->spsr = 5 | /* M bits, set to EL1h. */
(0xf << 6); /* DAIF bits set; disable interrupts. */
+}
+
+/**
+ * Updates the given registers so that when a vcpu runs, it starts off at the
+ * given address (pc) with the given argument.
+ *
+ * This function must only be called on an arch_regs that is known not be in use
+ * by any other physical CPU.
+ */
+static inline void arch_regs_set_pc_arg(struct arch_regs *r, ipaddr_t pc,
+ uintreg_t arg)
+{
r->pc = ipa_addr(pc);
r->r[0] = arg;
}
-static inline void arch_regs_set_vcpu_index(struct arch_regs *r, uint32_t index)
-{
- r->lazy.vmpidr_el2 = index;
-}
-
+/**
+ * Updates the register holding the return value of a function.
+ *
+ * This function must only be called on an arch_regs that is known not be in use
+ * by any other physical CPU.
+ */
static inline void arch_regs_set_retval(struct arch_regs *r, uintreg_t v)
{
r->r[0] = v;
diff --git a/src/arch/aarch64/inc/hf/arch/vm/state.h b/src/arch/aarch64/inc/hf/arch/vm/state.h
new file mode 100644
index 0000000..9eb4a51
--- /dev/null
+++ b/src/arch/aarch64/inc/hf/arch/vm/state.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2018 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+void per_cpu_ptr_set(uintptr_t v);
+uintptr_t per_cpu_ptr_get(void);
diff --git a/src/arch/fake/inc/hf/arch/cpu.h b/src/arch/fake/inc/hf/arch/cpu.h
index 8cac590..cd8a47c 100644
--- a/src/arch/fake/inc/hf/arch/cpu.h
+++ b/src/arch/fake/inc/hf/arch/cpu.h
@@ -40,20 +40,20 @@
}
static inline void arch_regs_init(struct arch_regs *r, bool is_primary,
- uint64_t vmid, paddr_t table, ipaddr_t pc,
- uintreg_t arg)
+ uint64_t vmid, paddr_t table, uint64_t index)
{
/* TODO */
(void)is_primary;
(void)vmid;
(void)table;
- (void)pc;
- r->r[0] = arg;
+ r->vcpu_index = index;
}
-static inline void arch_regs_set_vcpu_index(struct arch_regs *r, uint16_t index)
+static inline void arch_regs_set_pc_arg(struct arch_regs *r, ipaddr_t pc,
+ uintreg_t arg)
{
- r->vcpu_index = index;
+ (void)pc;
+ r->r[0] = arg;
}
static inline void arch_regs_set_retval(struct arch_regs *r, uintreg_t v)
diff --git a/src/cpu.c b/src/cpu.c
index 0f2bbba..1ae6cd1 100644
--- a/src/cpu.c
+++ b/src/cpu.c
@@ -96,8 +96,7 @@
if (!prev) {
struct vm *vm = vm_get(HF_PRIMARY_VM_ID);
struct vcpu *vcpu = &vm->vcpus[cpu_index(c)];
- arch_regs_init(&vcpu->regs, true, vm->id, vm->ptable.root,
- entry, arg);
+ arch_regs_set_pc_arg(&vcpu->regs, entry, arg);
vcpu_on(vcpu);
}
@@ -134,9 +133,11 @@
{
memset(vcpu, 0, sizeof(*vcpu));
sl_init(&vcpu->lock);
+ vcpu->regs_available = true;
vcpu->vm = vm;
vcpu->state = vcpu_state_off;
- arch_regs_set_vcpu_index(&vcpu->regs, vcpu - vm->vcpus);
+ arch_regs_init(&vcpu->regs, vm->id == HF_PRIMARY_VM_ID, vm->id,
+ vm->ptable.root, vcpu_index(vcpu));
}
void vcpu_on(struct vcpu *vcpu)
diff --git a/src/vm.c b/src/vm.c
index f1fcbb6..4a1d9d7 100644
--- a/src/vm.c
+++ b/src/vm.c
@@ -42,6 +42,10 @@
vm->vcpu_count = vcpu_count;
vm->mailbox.state = mailbox_state_empty;
+ if (!mm_ptable_init(&vm->ptable, 0, ppool)) {
+ return false;
+ }
+
/* Do basic initialization of vcpus. */
for (i = 0; i < vcpu_count; i++) {
vcpu_init(&vm->vcpus[i], vm);
@@ -50,7 +54,7 @@
++vm_count;
*new_vm = vm;
- return mm_ptable_init(&vm->ptable, 0, ppool);
+ return true;
}
uint32_t vm_get_count(void)
@@ -73,8 +77,7 @@
{
struct vcpu *vcpu = &vm->vcpus[index];
if (index < vm->vcpu_count) {
- arch_regs_init(&vcpu->regs, vm->id == HF_PRIMARY_VM_ID, vm->id,
- vm->ptable.root, entry, arg);
+ arch_regs_set_pc_arg(&vcpu->regs, entry, arg);
vcpu_on(vcpu);
}
}
diff --git a/test/vmapi/BUILD.gn b/test/vmapi/BUILD.gn
index da28503..fab748c 100644
--- a/test/vmapi/BUILD.gn
+++ b/test/vmapi/BUILD.gn
@@ -25,6 +25,7 @@
deps = [
":primary_only_test",
":primary_with_secondaries_test",
+ ":primary_with_state_secondary_test",
]
}
@@ -114,3 +115,28 @@
],
]
}
+
+# Tests with state test secondary VM.
+vm_kernel("primary_with_state_secondary_test_vm") {
+ testonly = true
+
+ sources = [
+ "primary_for_state_test.c",
+ ]
+
+ deps = [
+ "//test/hftest:hftest_primary_vm",
+ ]
+}
+
+initrd("primary_with_state_secondary_test") {
+ testonly = true
+
+ primary_vm = ":primary_with_state_secondary_test_vm"
+ secondary_vms = [ [
+ "1048576",
+ "1",
+ "check_state",
+ "//test/vmapi/secondaries:check_state",
+ ] ]
+}
diff --git a/test/vmapi/primary_for_state_test.c b/test/vmapi/primary_for_state_test.c
new file mode 100644
index 0000000..9a3b732
--- /dev/null
+++ b/test/vmapi/primary_for_state_test.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2018 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include <stdalign.h>
+#include <stdint.h>
+
+#include "hf/arch/vm/power_mgmt.h"
+
+#include "hf/mm.h"
+#include "hf/std.h"
+
+#include "vmapi/hf/call.h"
+
+#include "hftest.h"
+
+static alignas(PAGE_SIZE) uint8_t send_page[PAGE_SIZE];
+static alignas(PAGE_SIZE) uint8_t recv_page[PAGE_SIZE];
+static_assert(sizeof(send_page) == PAGE_SIZE, "Send page is not a page.");
+static_assert(sizeof(recv_page) == PAGE_SIZE, "Recv page is not a page.");
+
+static hf_ipaddr_t send_page_addr = (hf_ipaddr_t)send_page;
+static hf_ipaddr_t recv_page_addr = (hf_ipaddr_t)recv_page;
+
+#define STATE_VM_ID 1
+
+/**
+ * Iterates trying to run vCPU of the secondary VM. Returns when a message
+ * of non-zero length is received.
+ */
+bool run_loop(void)
+{
+ struct hf_vcpu_run_return run_res;
+ bool ok = false;
+
+ for (;;) {
+ /* Run until it manages to schedule vCPU on this CPU. */
+ do {
+ run_res = hf_vcpu_run(STATE_VM_ID, 0);
+ } while (run_res.code == HF_VCPU_RUN_WAIT_FOR_INTERRUPT);
+
+ /* Break out if we received a message with non-zero length. */
+ if (run_res.code == HF_VCPU_RUN_MESSAGE &&
+ run_res.message.size != 0) {
+ break;
+ }
+
+ /* Clear mailbox so that next message can be received. */
+ hf_mailbox_clear();
+ }
+
+ /* Copies the contents of the received boolean to the return value. */
+ if (run_res.message.size == sizeof(ok)) {
+ memcpy(&ok, recv_page, sizeof(ok));
+ }
+
+ hf_mailbox_clear();
+
+ return ok;
+}
+
+/**
+ * This is the entry point of the additional primary VM vCPU. It just calls
+ * the run loop so that two cpus compete for the chance to run a secondary VM.
+ */
+static void vm_cpu_entry(uintptr_t arg)
+{
+ (void)arg;
+ run_loop();
+}
+
+/**
+ * This test tries to run the same secondary vCPU from two different physical
+ * CPUs concurrently. The vCPU checks that the state is ok while it bounces
+ * between the physical CPUs.
+ */
+TEST(vcpu_state, concurrent_save_restore)
+{
+ alignas(4096) static char stack[4096];
+
+ EXPECT_EQ(hf_vm_configure(send_page_addr, recv_page_addr), 0);
+
+ /* Start second vCPU. */
+ EXPECT_EQ(cpu_start(1, stack, sizeof(stack), vm_cpu_entry, 0), true);
+
+ /* Run on a loop until the secondary VM is done. */
+ EXPECT_EQ(run_loop(), true);
+}
diff --git a/test/vmapi/secondaries/BUILD.gn b/test/vmapi/secondaries/BUILD.gn
index 52dfd57..43fb4cc 100644
--- a/test/vmapi/secondaries/BUILD.gn
+++ b/test/vmapi/secondaries/BUILD.gn
@@ -66,3 +66,16 @@
"//test/hftest:hftest_secondary_vm",
]
}
+
+vm_kernel("check_state") {
+ testonly = true
+
+ sources = [
+ "check_state.c",
+ ]
+
+ deps = [
+ "//src/arch/aarch64/hftest:state",
+ "//test/hftest:hftest_secondary_vm",
+ ]
+}
diff --git a/test/vmapi/secondaries/check_state.c b/test/vmapi/secondaries/check_state.c
new file mode 100644
index 0000000..e05c0a7
--- /dev/null
+++ b/test/vmapi/secondaries/check_state.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2018 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdalign.h>
+#include <stdint.h>
+
+#include "hf/arch/vm/state.h"
+
+#include "hf/mm.h"
+#include "hf/std.h"
+
+#include "vmapi/hf/call.h"
+
+alignas(4096) uint8_t kstack[4096];
+
+static alignas(PAGE_SIZE) uint8_t send_page[PAGE_SIZE];
+static alignas(PAGE_SIZE) uint8_t recv_page[PAGE_SIZE];
+
+static hf_ipaddr_t send_page_addr = (hf_ipaddr_t)send_page;
+static hf_ipaddr_t recv_page_addr = (hf_ipaddr_t)recv_page;
+
+void send_with_retry(uint32_t vm_id, size_t size)
+{
+ int64_t res;
+
+ do {
+ res = hf_mailbox_send(vm_id, size);
+ } while (res == -1);
+}
+
+/**
+ * This VM repeatedly takes the following steps: sets the per-cpu pointer to
+ * some value, makes a hypervisor call, check that the value is still what it
+ * was set to.
+ *
+ * This loop helps detect bugs where the hypervisor inadvertently destroys
+ * state.
+ *
+ * At the end of its iterations, the VM reports the result to the primary VM,
+ * which then fails or succeeds the test.
+ */
+void kmain(void)
+{
+ size_t i;
+ bool ok = true;
+ static volatile uintptr_t expected;
+ static volatile uintptr_t actual;
+
+ hf_vm_configure(send_page_addr, recv_page_addr);
+ for (i = 0; i < 100000; i++) {
+ /*
+ * We store the expected/actual values in volatile static
+ * variables to avoid relying on registers that may have been
+ * modified by the hypervisor.
+ */
+ expected = i;
+ per_cpu_ptr_set(expected);
+ send_with_retry(HF_PRIMARY_VM_ID, 0);
+ actual = per_cpu_ptr_get();
+ ok &= expected == actual;
+ }
+
+ /* Send two replies, one for each physical CPU. */
+ memcpy(send_page, &ok, sizeof(ok));
+ send_with_retry(HF_PRIMARY_VM_ID, sizeof(ok));
+ send_with_retry(HF_PRIMARY_VM_ID, sizeof(ok));
+}