blob: a1f1e38e188ef57e8768a810964198884bb8c557 [file] [log] [blame]
/*
* Copyright 2018 Google LLC
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/hrtimer.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/sched/task.h>
#include <linux/slab.h>
#include <hf/call.h>
#define CONFIG_HAFNIUM_MAX_VMS 16
#define CONFIG_HAFNIUM_MAX_VCPUS 32
struct hf_vcpu {
spinlock_t lock;
struct hf_vm *vm;
uint32_t vcpu_index;
struct task_struct *task;
atomic_t abort_sleep;
struct hrtimer timer;
};
struct hf_vm {
uint32_t id;
uint32_t vcpu_count;
struct hf_vcpu *vcpu;
};
static struct hf_vm *hf_vms;
static uint32_t hf_vm_count;
static struct page *hf_send_page = NULL;
static struct page *hf_recv_page = NULL;
/**
* Wakes up the kernel thread responsible for running the given vcpu.
*
* Returns 0 if the thread was already running, 1 otherwise.
*/
static int hf_vcpu_wake_up(struct hf_vcpu *vcpu)
{
/* Set a flag indicating that the thread should not go to sleep. */
atomic_set(&vcpu->abort_sleep, 1);
/* Set the thread to running state. */
return wake_up_process(vcpu->task);
}
/**
* Puts the current thread to sleep. The current thread must be responsible for
* running the given vcpu.
*
* Going to sleep will fail if hf_vcpu_wake_up() or kthread_stop() was called on
* this vcpu/thread since the last time it [re]started running.
*/
static void hf_vcpu_sleep(struct hf_vcpu *vcpu)
{
int abort;
set_current_state(TASK_INTERRUPTIBLE);
/* Check the sleep-abort flag after making thread interruptible. */
abort = atomic_read(&vcpu->abort_sleep);
if (!abort && !kthread_should_stop())
schedule();
/* Set state back to running on the way out. */
set_current_state(TASK_RUNNING);
}
/**
* Wakes up the thread associated with the vcpu that owns the given timer. This
* is called when the timer the thread is waiting on expires.
*/
static enum hrtimer_restart hf_vcpu_timer_expired(struct hrtimer *timer)
{
struct hf_vcpu *vcpu = container_of(timer, struct hf_vcpu, timer);
/* TODO: Inject interrupt. */
hf_vcpu_wake_up(vcpu);
return HRTIMER_NORESTART;
}
/**
* This is the main loop of each vcpu.
*/
static int hf_vcpu_thread(void *data)
{
struct hf_vcpu *vcpu = data;
struct hf_vcpu_run_return ret;
hrtimer_init(&vcpu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
vcpu->timer.function = &hf_vcpu_timer_expired;
while (!kthread_should_stop()) {
/*
* We're about to run the vcpu, so we can reset the abort-sleep
* flag.
*/
atomic_set(&vcpu->abort_sleep, 0);
/* Call into Hafnium to run vcpu. */
ret = hf_vcpu_run(vcpu->vm->id, vcpu->vcpu_index);
switch (ret.code) {
/* Yield (forcibly or voluntarily). */
case HF_VCPU_RUN_YIELD:
break;
/* WFI. */
case HF_VCPU_RUN_WAIT_FOR_INTERRUPT:
hf_vcpu_sleep(vcpu);
break;
/* Wake up another vcpu. */
case HF_VCPU_RUN_WAKE_UP:
{
struct hf_vm *vm;
if (ret.wake_up.vm_id > hf_vm_count)
break;
vm = &hf_vms[ret.wake_up.vm_id - 1];
if (ret.wake_up.vcpu < vm->vcpu_count) {
hf_vcpu_wake_up(&vm->vcpu[ret.wake_up.vcpu]);
} else if (ret.wake_up.vcpu == HF_INVALID_VCPU) {
/* TODO: pick one to interrupt. */
pr_warning("No vcpu to wake.");
}
}
break;
/* Response available. */
case HF_VCPU_RUN_MESSAGE:
{
uint32_t i;
const char *buf = page_address(hf_recv_page);
pr_info("Received response from vm %u (%u bytes): ",
vcpu->vm->id, ret.message.size);
for (i = 0; i < ret.message.size; i++)
printk(KERN_CONT "%c", buf[i]);
printk(KERN_CONT "\n");
hf_mailbox_clear();
}
break;
case HF_VCPU_RUN_SLEEP:
hrtimer_start(&vcpu->timer, ret.sleep.ns, HRTIMER_MODE_REL);
hf_vcpu_sleep(vcpu);
hrtimer_cancel(&vcpu->timer);
break;
}
}
return 0;
}
/**
* Frees all resources, including threads, associated with the Hafnium driver.
*/
static void hf_free_resources(void)
{
uint32_t i, j;
/*
* First stop all worker threads. We need to do this before freeing
* resources because workers may reference each other, so it is only
* safe to free resources after they have all stopped.
*/
for (i = 0; i < hf_vm_count; i++) {
struct hf_vm *vm = &hf_vms[i];
for (j = 0; j < vm->vcpu_count; j++)
kthread_stop(vm->vcpu[j].task);
}
/* Free resources. */
for (i = 0; i < hf_vm_count; i++) {
struct hf_vm *vm = &hf_vms[i];
for (j = 0; j < vm->vcpu_count; j++)
put_task_struct(vm->vcpu[j].task);
kfree(vm->vcpu);
}
kfree(hf_vms);
}
static ssize_t hf_send_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
int64_t ret;
struct hf_vm *vm;
count = min_t(size_t, count, HF_MAILBOX_SIZE);
/* Copy data to send buffer. */
memcpy(page_address(hf_send_page), buf, count);
vm = &hf_vms[0];
ret = hf_mailbox_send(vm->id, count);
if (ret < 0)
return -EAGAIN;
if (ret == HF_INVALID_VCPU) {
/*
* TODO: We need to interrupt some vcpu because none are waiting
* for data.
*/
pr_warning("No vcpu to receive message.");
return -ENOSYS;
}
if (ret >= vm->vcpu_count)
return -EINVAL;
/* Wake up the vcpu that is going to process the data. */
hf_vcpu_wake_up(&vm->vcpu[ret]);
return count;
}
static struct kobject *hf_sysfs_obj = NULL;
static struct kobj_attribute send_attr =
__ATTR(send, 0200, NULL, hf_send_store);
/**
* Initializes the Hafnium driver's sysfs interface.
*/
static void __init hf_init_sysfs(void)
{
int ret;
/* Create the sysfs interface to interrupt vcpus. */
hf_sysfs_obj = kobject_create_and_add("hafnium", kernel_kobj);
if (!hf_sysfs_obj) {
pr_err("Unable to create sysfs object");
} else {
ret = sysfs_create_file(hf_sysfs_obj, &send_attr.attr);
if (ret)
pr_err("Unable to create 'send' sysfs file");
}
}
/**
* Initializes the Hafnium driver by creating a thread for each vCPU of each
* virtual machine.
*/
static int __init hf_init(void)
{
int64_t ret;
uint32_t i, j;
uint32_t total_vm_count;
uint32_t total_vcpu_count;
/* Allocate a page for send and receive buffers. */
hf_send_page = alloc_page(GFP_KERNEL);
if (!hf_send_page) {
pr_err("Unable to allocate send buffer\n");
return -ENOMEM;
}
hf_recv_page = alloc_page(GFP_KERNEL);
if (!hf_recv_page) {
__free_page(hf_send_page);
pr_err("Unable to allocate receive buffer\n");
return -ENOMEM;
}
/*
* Configure both addresses. Once configured, we cannot free these pages
* because the hypervisor will use them, even if the module is
* unloaded.
*/
ret = hf_vm_configure(page_to_phys(hf_send_page),
page_to_phys(hf_recv_page));
if (ret) {
__free_page(hf_send_page);
__free_page(hf_recv_page);
/* TODO: We may want to grab this information from hypervisor
* and go from there. */
pr_err("Unable to configure VM\n");
return -EIO;
}
/* Get the number of VMs. */
ret = hf_vm_get_count();
if (ret < 0) {
pr_err("Unable to retrieve number of VMs: %lld\n", ret);
return -EIO;
}
/* Confirm the maximum number of VMs looks sane. */
BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VMS < 1);
BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VMS > U16_MAX);
/* Validate the number of VMs. There must at least be the primary. */
if (ret < 1 || ret > CONFIG_HAFNIUM_MAX_VMS) {
pr_err("Number of VMs is out of range: %lld\n", ret);
return -EDQUOT;
}
/* Only track the secondary VMs. */
total_vm_count = ret - 1;
hf_vms = kmalloc(sizeof(struct hf_vm) * total_vm_count, GFP_KERNEL);
if (!hf_vms)
return -ENOMEM;
/* Initialize each VM. */
total_vcpu_count = 0;
for (i = 0; i < total_vm_count; i++) {
struct hf_vm *vm = &hf_vms[i];
/* Adjust the ID as only the secondaries are tracked. */
vm->id = i + 1;
ret = hf_vcpu_get_count(vm->id);
if (ret < 0) {
pr_err("HF_VCPU_GET_COUNT failed for vm=%u: %lld", vm->id,
ret);
ret = -EIO;
goto fail_with_cleanup;
}
/* Avoid overflowing the vcpu count. */
if (ret > (U32_MAX - total_vcpu_count)) {
pr_err("Too many vcpus: %u\n", total_vcpu_count);
ret = -EDQUOT;
goto fail_with_cleanup;
}
/* Confirm the maximum number of VCPUs looks sane. */
BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VCPUS < 1);
BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VCPUS > U16_MAX);
/* Enforce the limit on vcpus. */
total_vcpu_count += ret;
if (total_vcpu_count > CONFIG_HAFNIUM_MAX_VCPUS) {
pr_err("Too many vcpus: %u\n", total_vcpu_count);
ret = -EDQUOT;
goto fail_with_cleanup;
}
vm->vcpu_count = ret;
vm->vcpu = kmalloc(sizeof(struct hf_vcpu) * vm->vcpu_count,
GFP_KERNEL);
if (!vm->vcpu) {
pr_err("No memory for %u vcpus for vm %u",
vm->vcpu_count, vm->id);
ret = -ENOMEM;
goto fail_with_cleanup;
}
/* Update the number of initialized VMs. */
hf_vm_count = i + 1;
/* Create a kernel thread for each vcpu. */
for (j = 0; j < vm->vcpu_count; j++) {
struct hf_vcpu *vcpu = &vm->vcpu[j];
vcpu->task = kthread_create(hf_vcpu_thread, vcpu,
"vcpu_thread_%u_%u",
vm->id, j);
if (IS_ERR(vcpu->task)) {
pr_err("Error creating task (vm=%u,vcpu=%u)"
": %ld\n", vm->id, j, PTR_ERR(vcpu->task));
vm->vcpu_count = j;
ret = PTR_ERR(vcpu->task);
goto fail_with_cleanup;
}
get_task_struct(vcpu->task);
spin_lock_init(&vcpu->lock);
vcpu->vm = vm;
vcpu->vcpu_index = j;
atomic_set(&vcpu->abort_sleep, 0);
}
}
/* Start running threads now that all is initialized. */
for (i = 0; i < hf_vm_count; i++) {
struct hf_vm *vm = &hf_vms[i];
for (j = 0; j < vm->vcpu_count; j++)
wake_up_process(vm->vcpu[j].task);
}
/* Dump vm/vcpu count info. */
pr_info("Hafnium successfully loaded with %u VMs:\n", hf_vm_count);
for (i = 0; i < hf_vm_count; i++) {
struct hf_vm *vm = &hf_vms[i];
pr_info("\tVM %u: %u vCPUS\n", vm->id, vm->vcpu_count);
}
hf_init_sysfs();
return 0;
fail_with_cleanup:
hf_free_resources();
return ret;
}
/**
* Frees up all resources used by the Hafnium driver in preparation for
* unloading it.
*/
static void __exit hf_exit(void)
{
if (hf_sysfs_obj)
kobject_put(hf_sysfs_obj);
pr_info("Preparing to unload Hafnium\n");
hf_free_resources();
pr_info("Hafnium ready to unload\n");
}
MODULE_LICENSE("GPL");
module_init(hf_init);
module_exit(hf_exit);