blob: 77b10e35b5aeec6651f22b20b1ac0efd7cd60073 [file] [log] [blame]
/*
* Copyright 2018 The Hafnium Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdnoreturn.h>
#include "hf/arch/barriers.h"
#include "hf/arch/init.h"
#include "hf/arch/mm.h"
#include "hf/api.h"
#include "hf/check.h"
#include "hf/cpu.h"
#include "hf/dlog.h"
#include "hf/panic.h"
#include "hf/spci.h"
#include "hf/vm.h"
#include "vmapi/hf/call.h"
#include "debug_el1.h"
#include "msr.h"
#include "perfmon.h"
#include "psci.h"
#include "psci_handler.h"
#include "smc.h"
#include "sysregs.h"
/**
* Gets the Exception Class from the ESR.
*/
#define GET_EC(esr) ((esr) >> 26)
/**
* Gets the value to increment for the next PC.
* The ESR encodes whether the instruction is 2 bytes or 4 bytes long.
*/
#define GET_NEXT_PC_INC(esr) (((esr) & (1u << 25)) ? 4 : 2)
/**
* The Client ID field within X7 for an SMC64 call.
*/
#define CLIENT_ID_MASK UINT64_C(0xffff)
/**
* Returns a reference to the currently executing vCPU.
*/
static struct vcpu *current(void)
{
return (struct vcpu *)read_msr(tpidr_el2);
}
/**
* Saves the state of per-vCPU peripherals, such as the virtual timer, and
* informs the arch-independent sections that registers have been saved.
*/
void complete_saving_state(struct vcpu *vcpu)
{
vcpu->regs.peripherals.cntv_cval_el0 = read_msr(cntv_cval_el0);
vcpu->regs.peripherals.cntv_ctl_el0 = read_msr(cntv_ctl_el0);
api_regs_state_saved(vcpu);
/*
* If switching away from the primary, copy the current EL0 virtual
* timer registers to the corresponding EL2 physical timer registers.
* This is used to emulate the virtual timer for the primary in case it
* should fire while the secondary is running.
*/
if (vcpu->vm->id == HF_PRIMARY_VM_ID) {
/*
* Clear timer control register before copying compare value, to
* avoid a spurious timer interrupt. This could be a problem if
* the interrupt is configured as edge-triggered, as it would
* then be latched in.
*/
write_msr(cnthp_ctl_el2, 0);
write_msr(cnthp_cval_el2, read_msr(cntv_cval_el0));
write_msr(cnthp_ctl_el2, read_msr(cntv_ctl_el0));
}
}
/**
* Restores the state of per-vCPU peripherals, such as the virtual timer.
*/
void begin_restoring_state(struct vcpu *vcpu)
{
/*
* Clear timer control register before restoring compare value, to avoid
* a spurious timer interrupt. This could be a problem if the interrupt
* is configured as edge-triggered, as it would then be latched in.
*/
write_msr(cntv_ctl_el0, 0);
write_msr(cntv_cval_el0, vcpu->regs.peripherals.cntv_cval_el0);
write_msr(cntv_ctl_el0, vcpu->regs.peripherals.cntv_ctl_el0);
/*
* If we are switching (back) to the primary, disable the EL2 physical
* timer which was being used to emulate the EL0 virtual timer, as the
* virtual timer is now running for the primary again.
*/
if (vcpu->vm->id == HF_PRIMARY_VM_ID) {
write_msr(cnthp_ctl_el2, 0);
write_msr(cnthp_cval_el2, 0);
}
}
/**
* Invalidate all stage 1 TLB entries on the current (physical) CPU for the
* current VMID.
*/
static void invalidate_vm_tlb(void)
{
/*
* Ensure that the last VTTBR write has taken effect so we invalidate
* the right set of TLB entries.
*/
isb();
__asm__ volatile("tlbi vmalle1");
/*
* Ensure that no instructions are fetched for the VM until after the
* TLB invalidation has taken effect.
*/
isb();
/*
* Ensure that no data reads or writes for the VM happen until after the
* TLB invalidation has taken effect. Non-sharable is enough because the
* TLB is local to the CPU.
*/
dsb(nsh);
}
/**
* Invalidates the TLB if a different vCPU is being run than the last vCPU of
* the same VM which was run on the current pCPU.
*
* This is necessary because VMs may (contrary to the architecture
* specification) use inconsistent ASIDs across vCPUs. c.f. KVM's similar
* workaround:
* https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=94d0e5980d6791b9
*/
void maybe_invalidate_tlb(struct vcpu *vcpu)
{
size_t current_cpu_index = cpu_index(vcpu->cpu);
spci_vcpu_index_t new_vcpu_index = vcpu_index(vcpu);
if (vcpu->vm->arch.last_vcpu_on_cpu[current_cpu_index] !=
new_vcpu_index) {
/*
* The vCPU has changed since the last time this VM was run on
* this pCPU, so we need to invalidate the TLB.
*/
invalidate_vm_tlb();
/* Record the fact that this vCPU is now running on this CPU. */
vcpu->vm->arch.last_vcpu_on_cpu[current_cpu_index] =
new_vcpu_index;
}
}
noreturn void irq_current_exception(uintreg_t elr, uintreg_t spsr)
{
(void)elr;
(void)spsr;
panic("IRQ from current");
}
noreturn void fiq_current_exception(uintreg_t elr, uintreg_t spsr)
{
(void)elr;
(void)spsr;
panic("FIQ from current");
}
noreturn void serr_current_exception(uintreg_t elr, uintreg_t spsr)
{
(void)elr;
(void)spsr;
panic("SERR from current");
}
noreturn void sync_current_exception(uintreg_t elr, uintreg_t spsr)
{
uintreg_t esr = read_msr(esr_el2);
uintreg_t ec = GET_EC(esr);
(void)spsr;
switch (ec) {
case 0x25: /* EC = 100101, Data abort. */
dlog("Data abort: pc=%#x, esr=%#x, ec=%#x", elr, esr, ec);
if (!(esr & (1U << 10))) { /* Check FnV bit. */
dlog(", far=%#x", read_msr(far_el2));
} else {
dlog(", far=invalid");
}
dlog("\n");
break;
default:
dlog("Unknown current sync exception pc=%#x, esr=%#x, "
"ec=%#x\n",
elr, esr, ec);
break;
}
panic("EL2 exception");
}
/**
* Sets or clears the VI bit in the HCR_EL2 register saved in the given
* arch_regs.
*/
static void set_virtual_interrupt(struct arch_regs *r, bool enable)
{
if (enable) {
r->lazy.hcr_el2 |= HCR_EL2_VI;
} else {
r->lazy.hcr_el2 &= ~HCR_EL2_VI;
}
}
/**
* Sets or clears the VI bit in the HCR_EL2 register.
*/
static void set_virtual_interrupt_current(bool enable)
{
uintreg_t hcr_el2 = read_msr(hcr_el2);
if (enable) {
hcr_el2 |= HCR_EL2_VI;
} else {
hcr_el2 &= ~HCR_EL2_VI;
}
write_msr(hcr_el2, hcr_el2);
}
/**
* Checks whether to block an SMC being forwarded from a VM.
*/
static bool smc_is_blocked(const struct vm *vm, uint32_t func)
{
bool block_by_default = !vm->smc_whitelist.permissive;
for (size_t i = 0; i < vm->smc_whitelist.smc_count; ++i) {
if (func == vm->smc_whitelist.smcs[i]) {
return false;
}
}
dlog("SMC %#010x attempted from VM %d, blocked=%d\n", func, vm->id,
block_by_default);
/* Access is still allowed in permissive mode. */
return block_by_default;
}
/**
* Applies SMC access control according to manifest and forwards the call if
* access is granted.
*/
static void smc_forwarder(const struct vcpu *vcpu, struct smc_result *ret)
{
uint32_t func = vcpu->regs.r[0];
uint32_t client_id = vcpu->vm->id;
uintreg_t arg7;
if (smc_is_blocked(vcpu->vm, func)) {
ret->res0 = SMCCC_ERROR_UNKNOWN;
return;
}
/*
* Set the Client ID but keep the existing Secure OS ID and anything
* else (currently unspecified) that the client may have passed in the
* upper bits.
*/
arg7 = client_id | (vcpu->regs.r[7] & ~CLIENT_ID_MASK);
*ret = smc_forward(func, vcpu->regs.r[1], vcpu->regs.r[2],
vcpu->regs.r[3], vcpu->regs.r[4], vcpu->regs.r[5],
vcpu->regs.r[6], arg7);
/*
* Preserve the value passed by the caller, rather than the client_id we
* generated. Note that this would also overwrite any return value that
* may be in x7, but the SMCs that we are forwarding are legacy calls
* from before SMCCC 1.2 so won't have more than 4 return values anyway.
*/
ret->res7 = vcpu->regs.r[7];
}
static bool spci_handler(struct spci_value *args, struct vcpu **next)
{
/*
* NOTE: When adding new methods to this handler update
* api_spci_features accordingly.
*/
switch (args->func & ~SMCCC_CONVENTION_MASK) {
case SPCI_VERSION_32:
*args = api_spci_version();
return true;
case SPCI_ID_GET_32:
*args = api_spci_id_get(current());
return true;
case SPCI_FEATURES_32:
*args = api_spci_features(args->arg1);
return true;
case SPCI_YIELD_32:
api_yield(current(), next);
/* SPCI_YIELD always returns SPCI_SUCCESS. */
*args = (struct spci_value){.func = SPCI_SUCCESS_32};
return true;
case SPCI_MSG_SEND_32:
*args = api_spci_msg_send(spci_msg_send_sender(*args),
spci_msg_send_receiver(*args),
spci_msg_send_size(*args),
spci_msg_send_attributes(*args),
current(), next);
return true;
case SPCI_MSG_WAIT_32:
*args = api_spci_msg_recv(true, current(), next);
return true;
case SPCI_MSG_POLL_32:
*args = api_spci_msg_recv(false, current(), next);
return true;
}
return false;
}
/**
* Set or clear VI bit according to pending interrupts.
*/
static void update_vi(struct vcpu *next)
{
if (next == NULL) {
/*
* Not switching vCPUs, set the bit for the current vCPU
* directly in the register.
*/
struct vcpu *vcpu = current();
sl_lock(&vcpu->lock);
set_virtual_interrupt_current(
vcpu->interrupts.enabled_and_pending_count > 0);
sl_unlock(&vcpu->lock);
} else {
/*
* About to switch vCPUs, set the bit for the vCPU to which we
* are switching in the saved copy of the register.
*/
sl_lock(&next->lock);
set_virtual_interrupt(
&next->regs,
next->interrupts.enabled_and_pending_count > 0);
sl_unlock(&next->lock);
}
}
/**
* Processes SMC instruction calls.
*/
static void smc_handler(struct vcpu *vcpu, struct smc_result *ret,
struct vcpu **next)
{
uint32_t func = vcpu->regs.r[0];
if (psci_handler(vcpu, func, vcpu->regs.r[1], vcpu->regs.r[2],
vcpu->regs.r[3], &ret->res0, next)) {
return;
}
switch (func & ~SMCCC_CONVENTION_MASK) {
case HF_DEBUG_LOG:
ret->res0 = api_debug_log(vcpu->regs.r[1], vcpu);
return;
}
smc_forwarder(vcpu, ret);
}
struct vcpu *hvc_handler(struct vcpu *vcpu)
{
struct spci_value args = {
.func = vcpu->regs.r[0],
.arg1 = vcpu->regs.r[1],
.arg2 = vcpu->regs.r[2],
.arg3 = vcpu->regs.r[3],
.arg4 = vcpu->regs.r[4],
.arg5 = vcpu->regs.r[5],
.arg6 = vcpu->regs.r[6],
.arg7 = vcpu->regs.r[7],
};
struct vcpu *next = NULL;
if (psci_handler(vcpu, args.func, args.arg1, args.arg2, args.arg3,
&vcpu->regs.r[0], &next)) {
return next;
}
if (spci_handler(&args, &next)) {
vcpu->regs.r[0] = args.func;
vcpu->regs.r[1] = args.arg1;
vcpu->regs.r[2] = args.arg2;
vcpu->regs.r[3] = args.arg3;
vcpu->regs.r[4] = args.arg4;
vcpu->regs.r[5] = args.arg5;
vcpu->regs.r[6] = args.arg6;
vcpu->regs.r[7] = args.arg7;
update_vi(next);
return next;
}
switch (args.func) {
case HF_VM_GET_COUNT:
vcpu->regs.r[0] = api_vm_get_count();
break;
case HF_VCPU_GET_COUNT:
vcpu->regs.r[0] = api_vcpu_get_count(args.arg1, vcpu);
break;
case HF_VCPU_RUN:
vcpu->regs.r[0] = hf_vcpu_run_return_encode(
api_vcpu_run(args.arg1, args.arg2, vcpu, &next));
break;
case HF_VM_CONFIGURE:
vcpu->regs.r[0] = api_vm_configure(
ipa_init(args.arg1), ipa_init(args.arg2), vcpu, &next);
break;
case HF_MAILBOX_CLEAR:
vcpu->regs.r[0] = api_mailbox_clear(vcpu, &next);
break;
case HF_MAILBOX_WRITABLE_GET:
vcpu->regs.r[0] = api_mailbox_writable_get(vcpu);
break;
case HF_MAILBOX_WAITER_GET:
vcpu->regs.r[0] = api_mailbox_waiter_get(args.arg1, vcpu);
break;
case HF_INTERRUPT_ENABLE:
vcpu->regs.r[0] =
api_interrupt_enable(args.arg1, args.arg2, vcpu);
break;
case HF_INTERRUPT_GET:
vcpu->regs.r[0] = api_interrupt_get(vcpu);
break;
case HF_INTERRUPT_INJECT:
vcpu->regs.r[0] = api_interrupt_inject(args.arg1, args.arg2,
args.arg3, vcpu, &next);
break;
case HF_SHARE_MEMORY:
vcpu->regs.r[0] = api_share_memory(
args.arg1 >> 32, ipa_init(args.arg2), args.arg3,
args.arg1 & 0xffffffff, vcpu);
break;
case HF_DEBUG_LOG:
vcpu->regs.r[0] = api_debug_log(args.arg1, vcpu);
break;
default:
vcpu->regs.r[0] = SMCCC_ERROR_UNKNOWN;
}
update_vi(next);
return next;
}
struct vcpu *irq_lower(void)
{
/*
* Switch back to primary VM, interrupts will be handled there.
*
* If the VM has aborted, this vCPU will be aborted when the scheduler
* tries to run it again. This means the interrupt will not be delayed
* by the aborted VM.
*
* TODO: Only switch when the interrupt isn't for the current VM.
*/
return api_preempt(current());
}
struct vcpu *fiq_lower(void)
{
return irq_lower();
}
struct vcpu *serr_lower(void)
{
dlog("SERR from lower\n");
return api_abort(current());
}
/**
* Initialises a fault info structure. It assumes that an FnV bit exists at
* bit offset 10 of the ESR, and that it is only valid when the bottom 6 bits of
* the ESR (the fault status code) are 010000; this is the case for both
* instruction and data aborts, but not necessarily for other exception reasons.
*/
static struct vcpu_fault_info fault_info_init(uintreg_t esr,
const struct vcpu *vcpu,
uint32_t mode)
{
uint32_t fsc = esr & 0x3f;
struct vcpu_fault_info r;
r.mode = mode;
r.pc = va_init(vcpu->regs.pc);
/*
* Check the FnV bit, which is only valid if dfsc/ifsc is 010000. It
* indicates that we cannot rely on far_el2.
*/
if (fsc == 0x10 && esr & (1U << 10)) {
r.vaddr = va_init(0);
r.ipaddr = ipa_init(read_msr(hpfar_el2) << 8);
} else {
r.vaddr = va_init(read_msr(far_el2));
r.ipaddr = ipa_init((read_msr(hpfar_el2) << 8) |
(read_msr(far_el2) & (PAGE_SIZE - 1)));
}
return r;
}
struct vcpu *sync_lower_exception(uintreg_t esr)
{
struct vcpu *vcpu = current();
struct vcpu_fault_info info;
struct vcpu *new_vcpu;
uintreg_t ec = GET_EC(esr);
switch (ec) {
case 0x01: /* EC = 000001, WFI or WFE. */
/* Skip the instruction. */
vcpu->regs.pc += GET_NEXT_PC_INC(esr);
/* Check TI bit of ISS, 0 = WFI, 1 = WFE. */
if (esr & 1) {
/* WFE */
/*
* TODO: consider giving the scheduler more context,
* somehow.
*/
api_yield(vcpu, &new_vcpu);
return new_vcpu;
}
/* WFI */
return api_wait_for_interrupt(vcpu);
case 0x24: /* EC = 100100, Data abort. */
info = fault_info_init(
esr, vcpu, (esr & (1U << 6)) ? MM_MODE_W : MM_MODE_R);
if (vcpu_handle_page_fault(vcpu, &info)) {
return NULL;
}
break;
case 0x20: /* EC = 100000, Instruction abort. */
info = fault_info_init(esr, vcpu, MM_MODE_X);
if (vcpu_handle_page_fault(vcpu, &info)) {
return NULL;
}
break;
case 0x16: /* EC = 010110, HVC instruction */
return hvc_handler(vcpu);
case 0x17: /* EC = 010111, SMC instruction. */ {
uintreg_t smc_pc = vcpu->regs.pc;
struct vcpu *next = NULL;
struct smc_result ret = {.res4 = vcpu->regs.r[4],
.res5 = vcpu->regs.r[5],
.res6 = vcpu->regs.r[6],
.res7 = vcpu->regs.r[7]};
smc_handler(vcpu, &ret, &next);
/* Skip the SMC instruction. */
vcpu->regs.pc = smc_pc + GET_NEXT_PC_INC(esr);
vcpu->regs.r[0] = ret.res0;
vcpu->regs.r[1] = ret.res1;
vcpu->regs.r[2] = ret.res2;
vcpu->regs.r[3] = ret.res3;
vcpu->regs.r[4] = ret.res4;
vcpu->regs.r[5] = ret.res5;
vcpu->regs.r[6] = ret.res6;
vcpu->regs.r[7] = ret.res7;
return next;
}
/*
* EC = 011000, MSR, MRS or System instruction execution that is not
* reported using EC 000000, 000001 or 000111.
*/
case 0x18:
/*
* NOTE: This should never be reached because it goes through a
* separate path handled by handle_system_register_access().
*/
panic("Handled by handle_system_register_access().");
default:
dlog("Unknown lower sync exception pc=%#x, esr=%#x, "
"ec=%#x\n",
vcpu->regs.pc, esr, ec);
break;
}
/* The exception wasn't handled so abort the VM. */
return api_abort(vcpu);
}
/**
* Handles EC = 011000, msr, mrs instruction traps.
* Returns non-null ONLY if the access failed and the vcpu is changing.
*/
struct vcpu *handle_system_register_access(uintreg_t esr)
{
struct vcpu *vcpu = current();
spci_vm_id_t vm_id = vcpu->vm->id;
uintreg_t ec = GET_EC(esr);
char *direction_str;
CHECK(ec == 0x18);
/*
* Handle accesses to debug and performance monitor registers.
* Abort when encountering unhandled register accesses.
*/
if (debug_el1_is_register_access(esr)) {
if (!debug_el1_process_access(vcpu, vm_id, esr)) {
goto fail;
}
} else if (perfmon_is_register_access(esr)) {
if (!perfmon_process_access(vcpu, vm_id, esr)) {
goto fail;
}
} else {
goto fail;
}
/* Instruction was fulfilled. Skip it and run the next one. */
vcpu->regs.pc += GET_NEXT_PC_INC(esr);
return NULL;
fail:
direction_str = ISS_IS_READ(esr) ? "read" : "write";
dlog("Unhandled system register %s: op0=%d, op1=%d, crn=%d, "
"crm=%d, op2=%d, rt=%d.\n",
direction_str, GET_ISS_OP0(esr), GET_ISS_OP1(esr),
GET_ISS_CRN(esr), GET_ISS_CRM(esr), GET_ISS_OP2(esr),
GET_ISS_RT(esr));
/* Abort if unable to fulfill the register access. */
return api_abort(vcpu);
}