1338 lines
34 KiB
C
1338 lines
34 KiB
C
|
// SPDX-License-Identifier: GPL-2.0-only
|
||
|
/*
|
||
|
* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
|
||
|
*/
|
||
|
|
||
|
#include <linux/device.h>
|
||
|
#include <linux/module.h>
|
||
|
#include <linux/mutex.h>
|
||
|
#include <linux/pci.h>
|
||
|
#include <linux/pm_runtime.h>
|
||
|
#include <linux/types.h>
|
||
|
#include <linux/uaccess.h>
|
||
|
#include <linux/vfio.h>
|
||
|
#include <linux/vfio_pci_core.h>
|
||
|
#include <linux/virtio_pci.h>
|
||
|
#include <linux/virtio_net.h>
|
||
|
#include <linux/virtio_pci_admin.h>
|
||
|
#include <linux/anon_inodes.h>
|
||
|
|
||
|
#include "common.h"
|
||
|
|
||
|
/* Device specification max parts size */
|
||
|
#define MAX_LOAD_SIZE (BIT_ULL(BITS_PER_TYPE \
|
||
|
(((struct virtio_admin_cmd_dev_parts_metadata_result *)0)->parts_size.size)) - 1)
|
||
|
|
||
|
/* Initial target buffer size */
|
||
|
#define VIRTIOVF_TARGET_INITIAL_BUF_SIZE SZ_1M
|
||
|
|
||
|
static int
|
||
|
virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
|
||
|
u32 ctx_size);
|
||
|
|
||
|
static struct page *
|
||
|
virtiovf_get_migration_page(struct virtiovf_data_buffer *buf,
|
||
|
unsigned long offset)
|
||
|
{
|
||
|
unsigned long cur_offset = 0;
|
||
|
struct scatterlist *sg;
|
||
|
unsigned int i;
|
||
|
|
||
|
/* All accesses are sequential */
|
||
|
if (offset < buf->last_offset || !buf->last_offset_sg) {
|
||
|
buf->last_offset = 0;
|
||
|
buf->last_offset_sg = buf->table.sgt.sgl;
|
||
|
buf->sg_last_entry = 0;
|
||
|
}
|
||
|
|
||
|
cur_offset = buf->last_offset;
|
||
|
|
||
|
for_each_sg(buf->last_offset_sg, sg,
|
||
|
buf->table.sgt.orig_nents - buf->sg_last_entry, i) {
|
||
|
if (offset < sg->length + cur_offset) {
|
||
|
buf->last_offset_sg = sg;
|
||
|
buf->sg_last_entry += i;
|
||
|
buf->last_offset = cur_offset;
|
||
|
return nth_page(sg_page(sg),
|
||
|
(offset - cur_offset) / PAGE_SIZE);
|
||
|
}
|
||
|
cur_offset += sg->length;
|
||
|
}
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
static int virtiovf_add_migration_pages(struct virtiovf_data_buffer *buf,
|
||
|
unsigned int npages)
|
||
|
{
|
||
|
unsigned int to_alloc = npages;
|
||
|
struct page **page_list;
|
||
|
unsigned long filled;
|
||
|
unsigned int to_fill;
|
||
|
int ret;
|
||
|
int i;
|
||
|
|
||
|
to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
|
||
|
page_list = kvcalloc(to_fill, sizeof(*page_list), GFP_KERNEL_ACCOUNT);
|
||
|
if (!page_list)
|
||
|
return -ENOMEM;
|
||
|
|
||
|
do {
|
||
|
filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
|
||
|
page_list);
|
||
|
if (!filled) {
|
||
|
ret = -ENOMEM;
|
||
|
goto err;
|
||
|
}
|
||
|
to_alloc -= filled;
|
||
|
ret = sg_alloc_append_table_from_pages(&buf->table, page_list,
|
||
|
filled, 0, filled << PAGE_SHIFT, UINT_MAX,
|
||
|
SG_MAX_SINGLE_ALLOC, GFP_KERNEL_ACCOUNT);
|
||
|
|
||
|
if (ret)
|
||
|
goto err_append;
|
||
|
buf->allocated_length += filled * PAGE_SIZE;
|
||
|
/* clean input for another bulk allocation */
|
||
|
memset(page_list, 0, filled * sizeof(*page_list));
|
||
|
to_fill = min_t(unsigned int, to_alloc,
|
||
|
PAGE_SIZE / sizeof(*page_list));
|
||
|
} while (to_alloc > 0);
|
||
|
|
||
|
kvfree(page_list);
|
||
|
return 0;
|
||
|
|
||
|
err_append:
|
||
|
for (i = filled - 1; i >= 0; i--)
|
||
|
__free_page(page_list[i]);
|
||
|
err:
|
||
|
kvfree(page_list);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static void virtiovf_free_data_buffer(struct virtiovf_data_buffer *buf)
|
||
|
{
|
||
|
struct sg_page_iter sg_iter;
|
||
|
|
||
|
/* Undo alloc_pages_bulk_array() */
|
||
|
for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
|
||
|
__free_page(sg_page_iter_page(&sg_iter));
|
||
|
sg_free_append_table(&buf->table);
|
||
|
kfree(buf);
|
||
|
}
|
||
|
|
||
|
static struct virtiovf_data_buffer *
|
||
|
virtiovf_alloc_data_buffer(struct virtiovf_migration_file *migf, size_t length)
|
||
|
{
|
||
|
struct virtiovf_data_buffer *buf;
|
||
|
int ret;
|
||
|
|
||
|
buf = kzalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
|
||
|
if (!buf)
|
||
|
return ERR_PTR(-ENOMEM);
|
||
|
|
||
|
ret = virtiovf_add_migration_pages(buf,
|
||
|
DIV_ROUND_UP_ULL(length, PAGE_SIZE));
|
||
|
if (ret)
|
||
|
goto end;
|
||
|
|
||
|
buf->migf = migf;
|
||
|
return buf;
|
||
|
end:
|
||
|
virtiovf_free_data_buffer(buf);
|
||
|
return ERR_PTR(ret);
|
||
|
}
|
||
|
|
||
|
static void virtiovf_put_data_buffer(struct virtiovf_data_buffer *buf)
|
||
|
{
|
||
|
spin_lock_irq(&buf->migf->list_lock);
|
||
|
list_add_tail(&buf->buf_elm, &buf->migf->avail_list);
|
||
|
spin_unlock_irq(&buf->migf->list_lock);
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
virtiovf_pci_alloc_obj_id(struct virtiovf_pci_core_device *virtvdev, u8 type,
|
||
|
u32 *obj_id)
|
||
|
{
|
||
|
return virtio_pci_admin_obj_create(virtvdev->core_device.pdev,
|
||
|
VIRTIO_RESOURCE_OBJ_DEV_PARTS, type, obj_id);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device *virtvdev, u32 obj_id)
|
||
|
{
|
||
|
virtio_pci_admin_obj_destroy(virtvdev->core_device.pdev,
|
||
|
VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id);
|
||
|
}
|
||
|
|
||
|
static struct virtiovf_data_buffer *
|
||
|
virtiovf_get_data_buffer(struct virtiovf_migration_file *migf, size_t length)
|
||
|
{
|
||
|
struct virtiovf_data_buffer *buf, *temp_buf;
|
||
|
struct list_head free_list;
|
||
|
|
||
|
INIT_LIST_HEAD(&free_list);
|
||
|
|
||
|
spin_lock_irq(&migf->list_lock);
|
||
|
list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
|
||
|
list_del_init(&buf->buf_elm);
|
||
|
if (buf->allocated_length >= length) {
|
||
|
spin_unlock_irq(&migf->list_lock);
|
||
|
goto found;
|
||
|
}
|
||
|
/*
|
||
|
* Prevent holding redundant buffers. Put in a free
|
||
|
* list and call at the end not under the spin lock
|
||
|
* (&migf->list_lock) to minimize its scope usage.
|
||
|
*/
|
||
|
list_add(&buf->buf_elm, &free_list);
|
||
|
}
|
||
|
spin_unlock_irq(&migf->list_lock);
|
||
|
buf = virtiovf_alloc_data_buffer(migf, length);
|
||
|
|
||
|
found:
|
||
|
while ((temp_buf = list_first_entry_or_null(&free_list,
|
||
|
struct virtiovf_data_buffer, buf_elm))) {
|
||
|
list_del(&temp_buf->buf_elm);
|
||
|
virtiovf_free_data_buffer(temp_buf);
|
||
|
}
|
||
|
|
||
|
return buf;
|
||
|
}
|
||
|
|
||
|
static void virtiovf_clean_migf_resources(struct virtiovf_migration_file *migf)
|
||
|
{
|
||
|
struct virtiovf_data_buffer *entry;
|
||
|
|
||
|
if (migf->buf) {
|
||
|
virtiovf_free_data_buffer(migf->buf);
|
||
|
migf->buf = NULL;
|
||
|
}
|
||
|
|
||
|
if (migf->buf_header) {
|
||
|
virtiovf_free_data_buffer(migf->buf_header);
|
||
|
migf->buf_header = NULL;
|
||
|
}
|
||
|
|
||
|
list_splice(&migf->avail_list, &migf->buf_list);
|
||
|
|
||
|
while ((entry = list_first_entry_or_null(&migf->buf_list,
|
||
|
struct virtiovf_data_buffer, buf_elm))) {
|
||
|
list_del(&entry->buf_elm);
|
||
|
virtiovf_free_data_buffer(entry);
|
||
|
}
|
||
|
|
||
|
if (migf->has_obj_id)
|
||
|
virtiovf_pci_free_obj_id(migf->virtvdev, migf->obj_id);
|
||
|
}
|
||
|
|
||
|
static void virtiovf_disable_fd(struct virtiovf_migration_file *migf)
|
||
|
{
|
||
|
mutex_lock(&migf->lock);
|
||
|
migf->state = VIRTIOVF_MIGF_STATE_ERROR;
|
||
|
migf->filp->f_pos = 0;
|
||
|
mutex_unlock(&migf->lock);
|
||
|
}
|
||
|
|
||
|
static void virtiovf_disable_fds(struct virtiovf_pci_core_device *virtvdev)
|
||
|
{
|
||
|
if (virtvdev->resuming_migf) {
|
||
|
virtiovf_disable_fd(virtvdev->resuming_migf);
|
||
|
virtiovf_clean_migf_resources(virtvdev->resuming_migf);
|
||
|
fput(virtvdev->resuming_migf->filp);
|
||
|
virtvdev->resuming_migf = NULL;
|
||
|
}
|
||
|
if (virtvdev->saving_migf) {
|
||
|
virtiovf_disable_fd(virtvdev->saving_migf);
|
||
|
virtiovf_clean_migf_resources(virtvdev->saving_migf);
|
||
|
fput(virtvdev->saving_migf->filp);
|
||
|
virtvdev->saving_migf = NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* This function is called in all state_mutex unlock cases to
|
||
|
* handle a 'deferred_reset' if exists.
|
||
|
*/
|
||
|
static void virtiovf_state_mutex_unlock(struct virtiovf_pci_core_device *virtvdev)
|
||
|
{
|
||
|
again:
|
||
|
spin_lock(&virtvdev->reset_lock);
|
||
|
if (virtvdev->deferred_reset) {
|
||
|
virtvdev->deferred_reset = false;
|
||
|
spin_unlock(&virtvdev->reset_lock);
|
||
|
virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
|
||
|
virtiovf_disable_fds(virtvdev);
|
||
|
goto again;
|
||
|
}
|
||
|
mutex_unlock(&virtvdev->state_mutex);
|
||
|
spin_unlock(&virtvdev->reset_lock);
|
||
|
}
|
||
|
|
||
|
void virtiovf_migration_reset_done(struct pci_dev *pdev)
|
||
|
{
|
||
|
struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev);
|
||
|
|
||
|
if (!virtvdev->migrate_cap)
|
||
|
return;
|
||
|
|
||
|
/*
|
||
|
* As the higher VFIO layers are holding locks across reset and using
|
||
|
* those same locks with the mm_lock we need to prevent ABBA deadlock
|
||
|
* with the state_mutex and mm_lock.
|
||
|
* In case the state_mutex was taken already we defer the cleanup work
|
||
|
* to the unlock flow of the other running context.
|
||
|
*/
|
||
|
spin_lock(&virtvdev->reset_lock);
|
||
|
virtvdev->deferred_reset = true;
|
||
|
if (!mutex_trylock(&virtvdev->state_mutex)) {
|
||
|
spin_unlock(&virtvdev->reset_lock);
|
||
|
return;
|
||
|
}
|
||
|
spin_unlock(&virtvdev->reset_lock);
|
||
|
virtiovf_state_mutex_unlock(virtvdev);
|
||
|
}
|
||
|
|
||
|
static int virtiovf_release_file(struct inode *inode, struct file *filp)
|
||
|
{
|
||
|
struct virtiovf_migration_file *migf = filp->private_data;
|
||
|
|
||
|
virtiovf_disable_fd(migf);
|
||
|
mutex_destroy(&migf->lock);
|
||
|
kfree(migf);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static struct virtiovf_data_buffer *
|
||
|
virtiovf_get_data_buff_from_pos(struct virtiovf_migration_file *migf,
|
||
|
loff_t pos, bool *end_of_data)
|
||
|
{
|
||
|
struct virtiovf_data_buffer *buf;
|
||
|
bool found = false;
|
||
|
|
||
|
*end_of_data = false;
|
||
|
spin_lock_irq(&migf->list_lock);
|
||
|
if (list_empty(&migf->buf_list)) {
|
||
|
*end_of_data = true;
|
||
|
goto end;
|
||
|
}
|
||
|
|
||
|
buf = list_first_entry(&migf->buf_list, struct virtiovf_data_buffer,
|
||
|
buf_elm);
|
||
|
if (pos >= buf->start_pos &&
|
||
|
pos < buf->start_pos + buf->length) {
|
||
|
found = true;
|
||
|
goto end;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* As we use a stream based FD we may expect having the data always
|
||
|
* on first chunk
|
||
|
*/
|
||
|
migf->state = VIRTIOVF_MIGF_STATE_ERROR;
|
||
|
|
||
|
end:
|
||
|
spin_unlock_irq(&migf->list_lock);
|
||
|
return found ? buf : NULL;
|
||
|
}
|
||
|
|
||
|
static ssize_t virtiovf_buf_read(struct virtiovf_data_buffer *vhca_buf,
|
||
|
char __user **buf, size_t *len, loff_t *pos)
|
||
|
{
|
||
|
unsigned long offset;
|
||
|
ssize_t done = 0;
|
||
|
size_t copy_len;
|
||
|
|
||
|
copy_len = min_t(size_t,
|
||
|
vhca_buf->start_pos + vhca_buf->length - *pos, *len);
|
||
|
while (copy_len) {
|
||
|
size_t page_offset;
|
||
|
struct page *page;
|
||
|
size_t page_len;
|
||
|
u8 *from_buff;
|
||
|
int ret;
|
||
|
|
||
|
offset = *pos - vhca_buf->start_pos;
|
||
|
page_offset = offset % PAGE_SIZE;
|
||
|
offset -= page_offset;
|
||
|
page = virtiovf_get_migration_page(vhca_buf, offset);
|
||
|
if (!page)
|
||
|
return -EINVAL;
|
||
|
page_len = min_t(size_t, copy_len, PAGE_SIZE - page_offset);
|
||
|
from_buff = kmap_local_page(page);
|
||
|
ret = copy_to_user(*buf, from_buff + page_offset, page_len);
|
||
|
kunmap_local(from_buff);
|
||
|
if (ret)
|
||
|
return -EFAULT;
|
||
|
*pos += page_len;
|
||
|
*len -= page_len;
|
||
|
*buf += page_len;
|
||
|
done += page_len;
|
||
|
copy_len -= page_len;
|
||
|
}
|
||
|
|
||
|
if (*pos >= vhca_buf->start_pos + vhca_buf->length) {
|
||
|
spin_lock_irq(&vhca_buf->migf->list_lock);
|
||
|
list_del_init(&vhca_buf->buf_elm);
|
||
|
list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list);
|
||
|
spin_unlock_irq(&vhca_buf->migf->list_lock);
|
||
|
}
|
||
|
|
||
|
return done;
|
||
|
}
|
||
|
|
||
|
static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t len,
|
||
|
loff_t *pos)
|
||
|
{
|
||
|
struct virtiovf_migration_file *migf = filp->private_data;
|
||
|
struct virtiovf_data_buffer *vhca_buf;
|
||
|
bool first_loop_call = true;
|
||
|
bool end_of_data;
|
||
|
ssize_t done = 0;
|
||
|
|
||
|
if (pos)
|
||
|
return -ESPIPE;
|
||
|
pos = &filp->f_pos;
|
||
|
|
||
|
mutex_lock(&migf->lock);
|
||
|
if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
|
||
|
done = -ENODEV;
|
||
|
goto out_unlock;
|
||
|
}
|
||
|
|
||
|
while (len) {
|
||
|
ssize_t count;
|
||
|
|
||
|
vhca_buf = virtiovf_get_data_buff_from_pos(migf, *pos, &end_of_data);
|
||
|
if (first_loop_call) {
|
||
|
first_loop_call = false;
|
||
|
/* Temporary end of file as part of PRE_COPY */
|
||
|
if (end_of_data && migf->state == VIRTIOVF_MIGF_STATE_PRECOPY) {
|
||
|
done = -ENOMSG;
|
||
|
goto out_unlock;
|
||
|
}
|
||
|
if (end_of_data && migf->state != VIRTIOVF_MIGF_STATE_COMPLETE) {
|
||
|
done = -EINVAL;
|
||
|
goto out_unlock;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (end_of_data)
|
||
|
goto out_unlock;
|
||
|
|
||
|
if (!vhca_buf) {
|
||
|
done = -EINVAL;
|
||
|
goto out_unlock;
|
||
|
}
|
||
|
|
||
|
count = virtiovf_buf_read(vhca_buf, &buf, &len, pos);
|
||
|
if (count < 0) {
|
||
|
done = count;
|
||
|
goto out_unlock;
|
||
|
}
|
||
|
done += count;
|
||
|
}
|
||
|
|
||
|
out_unlock:
|
||
|
mutex_unlock(&migf->lock);
|
||
|
return done;
|
||
|
}
|
||
|
|
||
|
static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd,
|
||
|
unsigned long arg)
|
||
|
{
|
||
|
struct virtiovf_migration_file *migf = filp->private_data;
|
||
|
struct virtiovf_pci_core_device *virtvdev = migf->virtvdev;
|
||
|
struct vfio_precopy_info info = {};
|
||
|
loff_t *pos = &filp->f_pos;
|
||
|
bool end_of_data = false;
|
||
|
unsigned long minsz;
|
||
|
u32 ctx_size = 0;
|
||
|
int ret;
|
||
|
|
||
|
if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
|
||
|
return -ENOTTY;
|
||
|
|
||
|
minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
|
||
|
if (copy_from_user(&info, (void __user *)arg, minsz))
|
||
|
return -EFAULT;
|
||
|
|
||
|
if (info.argsz < minsz)
|
||
|
return -EINVAL;
|
||
|
|
||
|
mutex_lock(&virtvdev->state_mutex);
|
||
|
if (virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
|
||
|
virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
|
||
|
ret = -EINVAL;
|
||
|
goto err_state_unlock;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* The virtio specification does not include a PRE_COPY concept.
|
||
|
* Since we can expect the data to remain the same for a certain period,
|
||
|
* we use a rate limiter mechanism before making a call to the device.
|
||
|
*/
|
||
|
if (__ratelimit(&migf->pre_copy_rl_state)) {
|
||
|
|
||
|
ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
|
||
|
VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
|
||
|
VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
|
||
|
&ctx_size);
|
||
|
if (ret)
|
||
|
goto err_state_unlock;
|
||
|
}
|
||
|
|
||
|
mutex_lock(&migf->lock);
|
||
|
if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
|
||
|
ret = -ENODEV;
|
||
|
goto err_migf_unlock;
|
||
|
}
|
||
|
|
||
|
if (migf->pre_copy_initial_bytes > *pos) {
|
||
|
info.initial_bytes = migf->pre_copy_initial_bytes - *pos;
|
||
|
} else {
|
||
|
info.dirty_bytes = migf->max_pos - *pos;
|
||
|
if (!info.dirty_bytes)
|
||
|
end_of_data = true;
|
||
|
info.dirty_bytes += ctx_size;
|
||
|
}
|
||
|
|
||
|
if (!end_of_data || !ctx_size) {
|
||
|
mutex_unlock(&migf->lock);
|
||
|
goto done;
|
||
|
}
|
||
|
|
||
|
mutex_unlock(&migf->lock);
|
||
|
/*
|
||
|
* We finished transferring the current state and the device has a
|
||
|
* dirty state, read a new state.
|
||
|
*/
|
||
|
ret = virtiovf_read_device_context_chunk(migf, ctx_size);
|
||
|
if (ret)
|
||
|
/*
|
||
|
* The machine is running, and context size could be grow, so no reason to mark
|
||
|
* the device state as VIRTIOVF_MIGF_STATE_ERROR.
|
||
|
*/
|
||
|
goto err_state_unlock;
|
||
|
|
||
|
done:
|
||
|
virtiovf_state_mutex_unlock(virtvdev);
|
||
|
if (copy_to_user((void __user *)arg, &info, minsz))
|
||
|
return -EFAULT;
|
||
|
return 0;
|
||
|
|
||
|
err_migf_unlock:
|
||
|
mutex_unlock(&migf->lock);
|
||
|
err_state_unlock:
|
||
|
virtiovf_state_mutex_unlock(virtvdev);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static const struct file_operations virtiovf_save_fops = {
|
||
|
.owner = THIS_MODULE,
|
||
|
.read = virtiovf_save_read,
|
||
|
.unlocked_ioctl = virtiovf_precopy_ioctl,
|
||
|
.compat_ioctl = compat_ptr_ioctl,
|
||
|
.release = virtiovf_release_file,
|
||
|
};
|
||
|
|
||
|
static int
|
||
|
virtiovf_add_buf_header(struct virtiovf_data_buffer *header_buf,
|
||
|
u32 data_size)
|
||
|
{
|
||
|
struct virtiovf_migration_file *migf = header_buf->migf;
|
||
|
struct virtiovf_migration_header header = {};
|
||
|
struct page *page;
|
||
|
u8 *to_buff;
|
||
|
|
||
|
header.record_size = cpu_to_le64(data_size);
|
||
|
header.flags = cpu_to_le32(VIRTIOVF_MIGF_HEADER_FLAGS_TAG_MANDATORY);
|
||
|
header.tag = cpu_to_le32(VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA);
|
||
|
page = virtiovf_get_migration_page(header_buf, 0);
|
||
|
if (!page)
|
||
|
return -EINVAL;
|
||
|
to_buff = kmap_local_page(page);
|
||
|
memcpy(to_buff, &header, sizeof(header));
|
||
|
kunmap_local(to_buff);
|
||
|
header_buf->length = sizeof(header);
|
||
|
header_buf->start_pos = header_buf->migf->max_pos;
|
||
|
migf->max_pos += header_buf->length;
|
||
|
spin_lock_irq(&migf->list_lock);
|
||
|
list_add_tail(&header_buf->buf_elm, &migf->buf_list);
|
||
|
spin_unlock_irq(&migf->list_lock);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
|
||
|
u32 ctx_size)
|
||
|
{
|
||
|
struct virtiovf_data_buffer *header_buf;
|
||
|
struct virtiovf_data_buffer *buf;
|
||
|
bool unmark_end = false;
|
||
|
struct scatterlist *sg;
|
||
|
unsigned int i;
|
||
|
u32 res_size;
|
||
|
int nent;
|
||
|
int ret;
|
||
|
|
||
|
buf = virtiovf_get_data_buffer(migf, ctx_size);
|
||
|
if (IS_ERR(buf))
|
||
|
return PTR_ERR(buf);
|
||
|
|
||
|
/* Find the total count of SG entries which satisfies the size */
|
||
|
nent = sg_nents_for_len(buf->table.sgt.sgl, ctx_size);
|
||
|
if (nent <= 0) {
|
||
|
ret = -EINVAL;
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Iterate to that SG entry and mark it as last (if it's not already)
|
||
|
* to let underlay layers iterate only till that entry.
|
||
|
*/
|
||
|
for_each_sg(buf->table.sgt.sgl, sg, nent - 1, i)
|
||
|
;
|
||
|
|
||
|
if (!sg_is_last(sg)) {
|
||
|
unmark_end = true;
|
||
|
sg_mark_end(sg);
|
||
|
}
|
||
|
|
||
|
ret = virtio_pci_admin_dev_parts_get(migf->virtvdev->core_device.pdev,
|
||
|
VIRTIO_RESOURCE_OBJ_DEV_PARTS,
|
||
|
migf->obj_id,
|
||
|
VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_ALL,
|
||
|
buf->table.sgt.sgl, &res_size);
|
||
|
/* Restore the original SG mark end */
|
||
|
if (unmark_end)
|
||
|
sg_unmark_end(sg);
|
||
|
if (ret)
|
||
|
goto out;
|
||
|
|
||
|
buf->length = res_size;
|
||
|
header_buf = virtiovf_get_data_buffer(migf,
|
||
|
sizeof(struct virtiovf_migration_header));
|
||
|
if (IS_ERR(header_buf)) {
|
||
|
ret = PTR_ERR(header_buf);
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
ret = virtiovf_add_buf_header(header_buf, res_size);
|
||
|
if (ret)
|
||
|
goto out_header;
|
||
|
|
||
|
buf->start_pos = buf->migf->max_pos;
|
||
|
migf->max_pos += buf->length;
|
||
|
spin_lock(&migf->list_lock);
|
||
|
list_add_tail(&buf->buf_elm, &migf->buf_list);
|
||
|
spin_unlock_irq(&migf->list_lock);
|
||
|
return 0;
|
||
|
|
||
|
out_header:
|
||
|
virtiovf_put_data_buffer(header_buf);
|
||
|
out:
|
||
|
virtiovf_put_data_buffer(buf);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
virtiovf_pci_save_device_final_data(struct virtiovf_pci_core_device *virtvdev)
|
||
|
{
|
||
|
struct virtiovf_migration_file *migf = virtvdev->saving_migf;
|
||
|
u32 ctx_size;
|
||
|
int ret;
|
||
|
|
||
|
if (migf->state == VIRTIOVF_MIGF_STATE_ERROR)
|
||
|
return -ENODEV;
|
||
|
|
||
|
ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
|
||
|
VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
|
||
|
VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
|
||
|
&ctx_size);
|
||
|
if (ret)
|
||
|
goto err;
|
||
|
|
||
|
if (!ctx_size) {
|
||
|
ret = -EINVAL;
|
||
|
goto err;
|
||
|
}
|
||
|
|
||
|
ret = virtiovf_read_device_context_chunk(migf, ctx_size);
|
||
|
if (ret)
|
||
|
goto err;
|
||
|
|
||
|
migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
|
||
|
return 0;
|
||
|
|
||
|
err:
|
||
|
migf->state = VIRTIOVF_MIGF_STATE_ERROR;
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static struct virtiovf_migration_file *
|
||
|
virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev,
|
||
|
bool pre_copy)
|
||
|
{
|
||
|
struct virtiovf_migration_file *migf;
|
||
|
u32 ctx_size;
|
||
|
u32 obj_id;
|
||
|
int ret;
|
||
|
|
||
|
migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
|
||
|
if (!migf)
|
||
|
return ERR_PTR(-ENOMEM);
|
||
|
|
||
|
migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_save_fops, migf,
|
||
|
O_RDONLY);
|
||
|
if (IS_ERR(migf->filp)) {
|
||
|
ret = PTR_ERR(migf->filp);
|
||
|
kfree(migf);
|
||
|
return ERR_PTR(ret);
|
||
|
}
|
||
|
|
||
|
stream_open(migf->filp->f_inode, migf->filp);
|
||
|
mutex_init(&migf->lock);
|
||
|
INIT_LIST_HEAD(&migf->buf_list);
|
||
|
INIT_LIST_HEAD(&migf->avail_list);
|
||
|
spin_lock_init(&migf->list_lock);
|
||
|
migf->virtvdev = virtvdev;
|
||
|
|
||
|
lockdep_assert_held(&virtvdev->state_mutex);
|
||
|
ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET,
|
||
|
&obj_id);
|
||
|
if (ret)
|
||
|
goto out;
|
||
|
|
||
|
migf->obj_id = obj_id;
|
||
|
/* Mark as having a valid obj id which can be even 0 */
|
||
|
migf->has_obj_id = true;
|
||
|
ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
|
||
|
VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id,
|
||
|
VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
|
||
|
&ctx_size);
|
||
|
if (ret)
|
||
|
goto out_clean;
|
||
|
|
||
|
if (!ctx_size) {
|
||
|
ret = -EINVAL;
|
||
|
goto out_clean;
|
||
|
}
|
||
|
|
||
|
ret = virtiovf_read_device_context_chunk(migf, ctx_size);
|
||
|
if (ret)
|
||
|
goto out_clean;
|
||
|
|
||
|
if (pre_copy) {
|
||
|
migf->pre_copy_initial_bytes = migf->max_pos;
|
||
|
/* Arbitrarily set the pre-copy rate limit to 1-second intervals */
|
||
|
ratelimit_state_init(&migf->pre_copy_rl_state, 1 * HZ, 1);
|
||
|
/* Prevent any rate messages upon its usage */
|
||
|
ratelimit_set_flags(&migf->pre_copy_rl_state,
|
||
|
RATELIMIT_MSG_ON_RELEASE);
|
||
|
migf->state = VIRTIOVF_MIGF_STATE_PRECOPY;
|
||
|
} else {
|
||
|
migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
|
||
|
}
|
||
|
|
||
|
return migf;
|
||
|
|
||
|
out_clean:
|
||
|
virtiovf_clean_migf_resources(migf);
|
||
|
out:
|
||
|
fput(migf->filp);
|
||
|
return ERR_PTR(ret);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Set the required object header at the beginning of the buffer.
|
||
|
* The actual device parts data will be written post of the header offset.
|
||
|
*/
|
||
|
static int virtiovf_set_obj_cmd_header(struct virtiovf_data_buffer *vhca_buf)
|
||
|
{
|
||
|
struct virtio_admin_cmd_resource_obj_cmd_hdr obj_hdr = {};
|
||
|
struct page *page;
|
||
|
u8 *to_buff;
|
||
|
|
||
|
obj_hdr.type = cpu_to_le16(VIRTIO_RESOURCE_OBJ_DEV_PARTS);
|
||
|
obj_hdr.id = cpu_to_le32(vhca_buf->migf->obj_id);
|
||
|
page = virtiovf_get_migration_page(vhca_buf, 0);
|
||
|
if (!page)
|
||
|
return -EINVAL;
|
||
|
to_buff = kmap_local_page(page);
|
||
|
memcpy(to_buff, &obj_hdr, sizeof(obj_hdr));
|
||
|
kunmap_local(to_buff);
|
||
|
|
||
|
/* Mark the buffer as including the header object data */
|
||
|
vhca_buf->include_header_object = 1;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
virtiovf_append_page_to_mig_buf(struct virtiovf_data_buffer *vhca_buf,
|
||
|
const char __user **buf, size_t *len,
|
||
|
loff_t *pos, ssize_t *done)
|
||
|
{
|
||
|
unsigned long offset;
|
||
|
size_t page_offset;
|
||
|
struct page *page;
|
||
|
size_t page_len;
|
||
|
u8 *to_buff;
|
||
|
int ret;
|
||
|
|
||
|
offset = *pos - vhca_buf->start_pos;
|
||
|
|
||
|
if (vhca_buf->include_header_object)
|
||
|
/* The buffer holds the object header, update the offset accordingly */
|
||
|
offset += sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr);
|
||
|
|
||
|
page_offset = offset % PAGE_SIZE;
|
||
|
|
||
|
page = virtiovf_get_migration_page(vhca_buf, offset - page_offset);
|
||
|
if (!page)
|
||
|
return -EINVAL;
|
||
|
|
||
|
page_len = min_t(size_t, *len, PAGE_SIZE - page_offset);
|
||
|
to_buff = kmap_local_page(page);
|
||
|
ret = copy_from_user(to_buff + page_offset, *buf, page_len);
|
||
|
kunmap_local(to_buff);
|
||
|
if (ret)
|
||
|
return -EFAULT;
|
||
|
|
||
|
*pos += page_len;
|
||
|
*done += page_len;
|
||
|
*buf += page_len;
|
||
|
*len -= page_len;
|
||
|
vhca_buf->length += page_len;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static ssize_t
|
||
|
virtiovf_resume_read_chunk(struct virtiovf_migration_file *migf,
|
||
|
struct virtiovf_data_buffer *vhca_buf,
|
||
|
size_t chunk_size, const char __user **buf,
|
||
|
size_t *len, loff_t *pos, ssize_t *done,
|
||
|
bool *has_work)
|
||
|
{
|
||
|
size_t copy_len, to_copy;
|
||
|
int ret;
|
||
|
|
||
|
to_copy = min_t(size_t, *len, chunk_size - vhca_buf->length);
|
||
|
copy_len = to_copy;
|
||
|
while (to_copy) {
|
||
|
ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy,
|
||
|
pos, done);
|
||
|
if (ret)
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
*len -= copy_len;
|
||
|
if (vhca_buf->length == chunk_size) {
|
||
|
migf->load_state = VIRTIOVF_LOAD_STATE_LOAD_CHUNK;
|
||
|
migf->max_pos += chunk_size;
|
||
|
*has_work = true;
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
virtiovf_resume_read_header_data(struct virtiovf_migration_file *migf,
|
||
|
struct virtiovf_data_buffer *vhca_buf,
|
||
|
const char __user **buf, size_t *len,
|
||
|
loff_t *pos, ssize_t *done)
|
||
|
{
|
||
|
size_t copy_len, to_copy;
|
||
|
size_t required_data;
|
||
|
int ret;
|
||
|
|
||
|
required_data = migf->record_size - vhca_buf->length;
|
||
|
to_copy = min_t(size_t, *len, required_data);
|
||
|
copy_len = to_copy;
|
||
|
while (to_copy) {
|
||
|
ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy,
|
||
|
pos, done);
|
||
|
if (ret)
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
*len -= copy_len;
|
||
|
if (vhca_buf->length == migf->record_size) {
|
||
|
switch (migf->record_tag) {
|
||
|
default:
|
||
|
/* Optional tag */
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
|
||
|
migf->max_pos += migf->record_size;
|
||
|
vhca_buf->length = 0;
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
virtiovf_resume_read_header(struct virtiovf_migration_file *migf,
|
||
|
struct virtiovf_data_buffer *vhca_buf,
|
||
|
const char __user **buf,
|
||
|
size_t *len, loff_t *pos,
|
||
|
ssize_t *done, bool *has_work)
|
||
|
{
|
||
|
struct page *page;
|
||
|
size_t copy_len;
|
||
|
u8 *to_buff;
|
||
|
int ret;
|
||
|
|
||
|
copy_len = min_t(size_t, *len,
|
||
|
sizeof(struct virtiovf_migration_header) - vhca_buf->length);
|
||
|
page = virtiovf_get_migration_page(vhca_buf, 0);
|
||
|
if (!page)
|
||
|
return -EINVAL;
|
||
|
to_buff = kmap_local_page(page);
|
||
|
ret = copy_from_user(to_buff + vhca_buf->length, *buf, copy_len);
|
||
|
if (ret) {
|
||
|
ret = -EFAULT;
|
||
|
goto end;
|
||
|
}
|
||
|
|
||
|
*buf += copy_len;
|
||
|
*pos += copy_len;
|
||
|
*done += copy_len;
|
||
|
*len -= copy_len;
|
||
|
vhca_buf->length += copy_len;
|
||
|
if (vhca_buf->length == sizeof(struct virtiovf_migration_header)) {
|
||
|
u64 record_size;
|
||
|
u32 flags;
|
||
|
|
||
|
record_size = le64_to_cpup((__le64 *)to_buff);
|
||
|
if (record_size > MAX_LOAD_SIZE) {
|
||
|
ret = -ENOMEM;
|
||
|
goto end;
|
||
|
}
|
||
|
|
||
|
migf->record_size = record_size;
|
||
|
flags = le32_to_cpup((__le32 *)(to_buff +
|
||
|
offsetof(struct virtiovf_migration_header, flags)));
|
||
|
migf->record_tag = le32_to_cpup((__le32 *)(to_buff +
|
||
|
offsetof(struct virtiovf_migration_header, tag)));
|
||
|
switch (migf->record_tag) {
|
||
|
case VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA:
|
||
|
migf->load_state = VIRTIOVF_LOAD_STATE_PREP_CHUNK;
|
||
|
break;
|
||
|
default:
|
||
|
if (!(flags & VIRTIOVF_MIGF_HEADER_FLAGS_TAG_OPTIONAL)) {
|
||
|
ret = -EOPNOTSUPP;
|
||
|
goto end;
|
||
|
}
|
||
|
/* We may read and skip this optional record data */
|
||
|
migf->load_state = VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA;
|
||
|
}
|
||
|
|
||
|
migf->max_pos += vhca_buf->length;
|
||
|
vhca_buf->length = 0;
|
||
|
*has_work = true;
|
||
|
}
|
||
|
end:
|
||
|
kunmap_local(to_buff);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static ssize_t virtiovf_resume_write(struct file *filp, const char __user *buf,
|
||
|
size_t len, loff_t *pos)
|
||
|
{
|
||
|
struct virtiovf_migration_file *migf = filp->private_data;
|
||
|
struct virtiovf_data_buffer *vhca_buf = migf->buf;
|
||
|
struct virtiovf_data_buffer *vhca_buf_header = migf->buf_header;
|
||
|
unsigned int orig_length;
|
||
|
bool has_work = false;
|
||
|
ssize_t done = 0;
|
||
|
int ret = 0;
|
||
|
|
||
|
if (pos)
|
||
|
return -ESPIPE;
|
||
|
|
||
|
pos = &filp->f_pos;
|
||
|
if (*pos < vhca_buf->start_pos)
|
||
|
return -EINVAL;
|
||
|
|
||
|
mutex_lock(&migf->virtvdev->state_mutex);
|
||
|
mutex_lock(&migf->lock);
|
||
|
if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
|
||
|
done = -ENODEV;
|
||
|
goto out_unlock;
|
||
|
}
|
||
|
|
||
|
while (len || has_work) {
|
||
|
has_work = false;
|
||
|
switch (migf->load_state) {
|
||
|
case VIRTIOVF_LOAD_STATE_READ_HEADER:
|
||
|
ret = virtiovf_resume_read_header(migf, vhca_buf_header, &buf,
|
||
|
&len, pos, &done, &has_work);
|
||
|
if (ret)
|
||
|
goto out_unlock;
|
||
|
break;
|
||
|
case VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA:
|
||
|
if (vhca_buf_header->allocated_length < migf->record_size) {
|
||
|
virtiovf_free_data_buffer(vhca_buf_header);
|
||
|
|
||
|
migf->buf_header = virtiovf_alloc_data_buffer(migf,
|
||
|
migf->record_size);
|
||
|
if (IS_ERR(migf->buf_header)) {
|
||
|
ret = PTR_ERR(migf->buf_header);
|
||
|
migf->buf_header = NULL;
|
||
|
goto out_unlock;
|
||
|
}
|
||
|
|
||
|
vhca_buf_header = migf->buf_header;
|
||
|
}
|
||
|
|
||
|
vhca_buf_header->start_pos = migf->max_pos;
|
||
|
migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER_DATA;
|
||
|
break;
|
||
|
case VIRTIOVF_LOAD_STATE_READ_HEADER_DATA:
|
||
|
ret = virtiovf_resume_read_header_data(migf, vhca_buf_header,
|
||
|
&buf, &len, pos, &done);
|
||
|
if (ret)
|
||
|
goto out_unlock;
|
||
|
break;
|
||
|
case VIRTIOVF_LOAD_STATE_PREP_CHUNK:
|
||
|
{
|
||
|
u32 cmd_size = migf->record_size +
|
||
|
sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr);
|
||
|
|
||
|
/*
|
||
|
* The DMA map/unmap is managed in virtio layer, we just need to extend
|
||
|
* the SG pages to hold the extra required chunk data.
|
||
|
*/
|
||
|
if (vhca_buf->allocated_length < cmd_size) {
|
||
|
ret = virtiovf_add_migration_pages(vhca_buf,
|
||
|
DIV_ROUND_UP_ULL(cmd_size - vhca_buf->allocated_length,
|
||
|
PAGE_SIZE));
|
||
|
if (ret)
|
||
|
goto out_unlock;
|
||
|
}
|
||
|
|
||
|
vhca_buf->start_pos = migf->max_pos;
|
||
|
migf->load_state = VIRTIOVF_LOAD_STATE_READ_CHUNK;
|
||
|
break;
|
||
|
}
|
||
|
case VIRTIOVF_LOAD_STATE_READ_CHUNK:
|
||
|
ret = virtiovf_resume_read_chunk(migf, vhca_buf, migf->record_size,
|
||
|
&buf, &len, pos, &done, &has_work);
|
||
|
if (ret)
|
||
|
goto out_unlock;
|
||
|
break;
|
||
|
case VIRTIOVF_LOAD_STATE_LOAD_CHUNK:
|
||
|
/* Mark the last SG entry and set its length */
|
||
|
sg_mark_end(vhca_buf->last_offset_sg);
|
||
|
orig_length = vhca_buf->last_offset_sg->length;
|
||
|
/* Length should include the resource object command header */
|
||
|
vhca_buf->last_offset_sg->length = vhca_buf->length +
|
||
|
sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr) -
|
||
|
vhca_buf->last_offset;
|
||
|
ret = virtio_pci_admin_dev_parts_set(migf->virtvdev->core_device.pdev,
|
||
|
vhca_buf->table.sgt.sgl);
|
||
|
/* Restore the original SG data */
|
||
|
vhca_buf->last_offset_sg->length = orig_length;
|
||
|
sg_unmark_end(vhca_buf->last_offset_sg);
|
||
|
if (ret)
|
||
|
goto out_unlock;
|
||
|
migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
|
||
|
/* be ready for reading the next chunk */
|
||
|
vhca_buf->length = 0;
|
||
|
break;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
out_unlock:
|
||
|
if (ret)
|
||
|
migf->state = VIRTIOVF_MIGF_STATE_ERROR;
|
||
|
mutex_unlock(&migf->lock);
|
||
|
virtiovf_state_mutex_unlock(migf->virtvdev);
|
||
|
return ret ? ret : done;
|
||
|
}
|
||
|
|
||
|
static const struct file_operations virtiovf_resume_fops = {
|
||
|
.owner = THIS_MODULE,
|
||
|
.write = virtiovf_resume_write,
|
||
|
.release = virtiovf_release_file,
|
||
|
};
|
||
|
|
||
|
static struct virtiovf_migration_file *
|
||
|
virtiovf_pci_resume_device_data(struct virtiovf_pci_core_device *virtvdev)
|
||
|
{
|
||
|
struct virtiovf_migration_file *migf;
|
||
|
struct virtiovf_data_buffer *buf;
|
||
|
u32 obj_id;
|
||
|
int ret;
|
||
|
|
||
|
migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
|
||
|
if (!migf)
|
||
|
return ERR_PTR(-ENOMEM);
|
||
|
|
||
|
migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_resume_fops, migf,
|
||
|
O_WRONLY);
|
||
|
if (IS_ERR(migf->filp)) {
|
||
|
ret = PTR_ERR(migf->filp);
|
||
|
kfree(migf);
|
||
|
return ERR_PTR(ret);
|
||
|
}
|
||
|
|
||
|
stream_open(migf->filp->f_inode, migf->filp);
|
||
|
mutex_init(&migf->lock);
|
||
|
INIT_LIST_HEAD(&migf->buf_list);
|
||
|
INIT_LIST_HEAD(&migf->avail_list);
|
||
|
spin_lock_init(&migf->list_lock);
|
||
|
|
||
|
buf = virtiovf_alloc_data_buffer(migf, VIRTIOVF_TARGET_INITIAL_BUF_SIZE);
|
||
|
if (IS_ERR(buf)) {
|
||
|
ret = PTR_ERR(buf);
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
migf->buf = buf;
|
||
|
|
||
|
buf = virtiovf_alloc_data_buffer(migf,
|
||
|
sizeof(struct virtiovf_migration_header));
|
||
|
if (IS_ERR(buf)) {
|
||
|
ret = PTR_ERR(buf);
|
||
|
goto out_clean;
|
||
|
}
|
||
|
|
||
|
migf->buf_header = buf;
|
||
|
migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
|
||
|
|
||
|
migf->virtvdev = virtvdev;
|
||
|
ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_SET,
|
||
|
&obj_id);
|
||
|
if (ret)
|
||
|
goto out_clean;
|
||
|
|
||
|
migf->obj_id = obj_id;
|
||
|
/* Mark as having a valid obj id which can be even 0 */
|
||
|
migf->has_obj_id = true;
|
||
|
ret = virtiovf_set_obj_cmd_header(migf->buf);
|
||
|
if (ret)
|
||
|
goto out_clean;
|
||
|
|
||
|
return migf;
|
||
|
|
||
|
out_clean:
|
||
|
virtiovf_clean_migf_resources(migf);
|
||
|
out:
|
||
|
fput(migf->filp);
|
||
|
return ERR_PTR(ret);
|
||
|
}
|
||
|
|
||
|
static struct file *
|
||
|
virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
|
||
|
u32 new)
|
||
|
{
|
||
|
u32 cur = virtvdev->mig_state;
|
||
|
int ret;
|
||
|
|
||
|
if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) {
|
||
|
/* NOP */
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
|
||
|
/* NOP */
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
|
||
|
(cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
|
||
|
ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev,
|
||
|
BIT(VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED));
|
||
|
if (ret)
|
||
|
return ERR_PTR(ret);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) ||
|
||
|
(cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) {
|
||
|
ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev, 0);
|
||
|
if (ret)
|
||
|
return ERR_PTR(ret);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
|
||
|
struct virtiovf_migration_file *migf;
|
||
|
|
||
|
migf = virtiovf_pci_save_device_data(virtvdev, false);
|
||
|
if (IS_ERR(migf))
|
||
|
return ERR_CAST(migf);
|
||
|
get_file(migf->filp);
|
||
|
virtvdev->saving_migf = migf;
|
||
|
return migf->filp;
|
||
|
}
|
||
|
|
||
|
if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) ||
|
||
|
(cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
|
||
|
(cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
|
||
|
virtiovf_disable_fds(virtvdev);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
|
||
|
struct virtiovf_migration_file *migf;
|
||
|
|
||
|
migf = virtiovf_pci_resume_device_data(virtvdev);
|
||
|
if (IS_ERR(migf))
|
||
|
return ERR_CAST(migf);
|
||
|
get_file(migf->filp);
|
||
|
virtvdev->resuming_migf = migf;
|
||
|
return migf->filp;
|
||
|
}
|
||
|
|
||
|
if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
|
||
|
virtiovf_disable_fds(virtvdev);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) ||
|
||
|
(cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
|
||
|
new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
|
||
|
struct virtiovf_migration_file *migf;
|
||
|
|
||
|
migf = virtiovf_pci_save_device_data(virtvdev, true);
|
||
|
if (IS_ERR(migf))
|
||
|
return ERR_CAST(migf);
|
||
|
get_file(migf->filp);
|
||
|
virtvdev->saving_migf = migf;
|
||
|
return migf->filp;
|
||
|
}
|
||
|
|
||
|
if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) {
|
||
|
ret = virtiovf_pci_save_device_final_data(virtvdev);
|
||
|
return ret ? ERR_PTR(ret) : NULL;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* vfio_mig_get_next_state() does not use arcs other than the above
|
||
|
*/
|
||
|
WARN_ON(true);
|
||
|
return ERR_PTR(-EINVAL);
|
||
|
}
|
||
|
|
||
|
static struct file *
|
||
|
virtiovf_pci_set_device_state(struct vfio_device *vdev,
|
||
|
enum vfio_device_mig_state new_state)
|
||
|
{
|
||
|
struct virtiovf_pci_core_device *virtvdev = container_of(
|
||
|
vdev, struct virtiovf_pci_core_device, core_device.vdev);
|
||
|
enum vfio_device_mig_state next_state;
|
||
|
struct file *res = NULL;
|
||
|
int ret;
|
||
|
|
||
|
mutex_lock(&virtvdev->state_mutex);
|
||
|
while (new_state != virtvdev->mig_state) {
|
||
|
ret = vfio_mig_get_next_state(vdev, virtvdev->mig_state,
|
||
|
new_state, &next_state);
|
||
|
if (ret) {
|
||
|
res = ERR_PTR(ret);
|
||
|
break;
|
||
|
}
|
||
|
res = virtiovf_pci_step_device_state_locked(virtvdev, next_state);
|
||
|
if (IS_ERR(res))
|
||
|
break;
|
||
|
virtvdev->mig_state = next_state;
|
||
|
if (WARN_ON(res && new_state != virtvdev->mig_state)) {
|
||
|
fput(res);
|
||
|
res = ERR_PTR(-EINVAL);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
virtiovf_state_mutex_unlock(virtvdev);
|
||
|
return res;
|
||
|
}
|
||
|
|
||
|
static int virtiovf_pci_get_device_state(struct vfio_device *vdev,
|
||
|
enum vfio_device_mig_state *curr_state)
|
||
|
{
|
||
|
struct virtiovf_pci_core_device *virtvdev = container_of(
|
||
|
vdev, struct virtiovf_pci_core_device, core_device.vdev);
|
||
|
|
||
|
mutex_lock(&virtvdev->state_mutex);
|
||
|
*curr_state = virtvdev->mig_state;
|
||
|
virtiovf_state_mutex_unlock(virtvdev);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static int virtiovf_pci_get_data_size(struct vfio_device *vdev,
|
||
|
unsigned long *stop_copy_length)
|
||
|
{
|
||
|
struct virtiovf_pci_core_device *virtvdev = container_of(
|
||
|
vdev, struct virtiovf_pci_core_device, core_device.vdev);
|
||
|
bool obj_id_exists;
|
||
|
u32 res_size;
|
||
|
u32 obj_id;
|
||
|
int ret;
|
||
|
|
||
|
mutex_lock(&virtvdev->state_mutex);
|
||
|
obj_id_exists = virtvdev->saving_migf && virtvdev->saving_migf->has_obj_id;
|
||
|
if (!obj_id_exists) {
|
||
|
ret = virtiovf_pci_alloc_obj_id(virtvdev,
|
||
|
VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET,
|
||
|
&obj_id);
|
||
|
if (ret)
|
||
|
goto end;
|
||
|
} else {
|
||
|
obj_id = virtvdev->saving_migf->obj_id;
|
||
|
}
|
||
|
|
||
|
ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
|
||
|
VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id,
|
||
|
VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
|
||
|
&res_size);
|
||
|
if (!ret)
|
||
|
*stop_copy_length = res_size;
|
||
|
|
||
|
/*
|
||
|
* We can't leave this obj_id alive if didn't exist before, otherwise, it might
|
||
|
* stay alive, even without an active migration flow (e.g. migration was cancelled)
|
||
|
*/
|
||
|
if (!obj_id_exists)
|
||
|
virtiovf_pci_free_obj_id(virtvdev, obj_id);
|
||
|
end:
|
||
|
virtiovf_state_mutex_unlock(virtvdev);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static const struct vfio_migration_ops virtvdev_pci_mig_ops = {
|
||
|
.migration_set_state = virtiovf_pci_set_device_state,
|
||
|
.migration_get_state = virtiovf_pci_get_device_state,
|
||
|
.migration_get_data_size = virtiovf_pci_get_data_size,
|
||
|
};
|
||
|
|
||
|
void virtiovf_set_migratable(struct virtiovf_pci_core_device *virtvdev)
|
||
|
{
|
||
|
virtvdev->migrate_cap = 1;
|
||
|
mutex_init(&virtvdev->state_mutex);
|
||
|
spin_lock_init(&virtvdev->reset_lock);
|
||
|
virtvdev->core_device.vdev.migration_flags =
|
||
|
VFIO_MIGRATION_STOP_COPY |
|
||
|
VFIO_MIGRATION_P2P |
|
||
|
VFIO_MIGRATION_PRE_COPY;
|
||
|
virtvdev->core_device.vdev.mig_ops = &virtvdev_pci_mig_ops;
|
||
|
}
|
||
|
|
||
|
void virtiovf_open_migration(struct virtiovf_pci_core_device *virtvdev)
|
||
|
{
|
||
|
if (!virtvdev->migrate_cap)
|
||
|
return;
|
||
|
|
||
|
virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
|
||
|
}
|
||
|
|
||
|
void virtiovf_close_migration(struct virtiovf_pci_core_device *virtvdev)
|
||
|
{
|
||
|
if (!virtvdev->migrate_cap)
|
||
|
return;
|
||
|
|
||
|
virtiovf_disable_fds(virtvdev);
|
||
|
}
|