411 lines
9.2 KiB
C
411 lines
9.2 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* vgic_lpi_stress - Stress test for KVM's ITS emulation
|
|
*
|
|
* Copyright (c) 2024 Google LLC
|
|
*/
|
|
|
|
#include <linux/sizes.h>
|
|
#include <pthread.h>
|
|
#include <stdatomic.h>
|
|
#include <sys/sysinfo.h>
|
|
|
|
#include "kvm_util.h"
|
|
#include "gic.h"
|
|
#include "gic_v3.h"
|
|
#include "gic_v3_its.h"
|
|
#include "processor.h"
|
|
#include "ucall.h"
|
|
#include "vgic.h"
|
|
|
|
#define TEST_MEMSLOT_INDEX 1
|
|
|
|
#define GIC_LPI_OFFSET 8192
|
|
|
|
static size_t nr_iterations = 1000;
|
|
static vm_paddr_t gpa_base;
|
|
|
|
static struct kvm_vm *vm;
|
|
static struct kvm_vcpu **vcpus;
|
|
static int gic_fd, its_fd;
|
|
|
|
static struct test_data {
|
|
bool request_vcpus_stop;
|
|
u32 nr_cpus;
|
|
u32 nr_devices;
|
|
u32 nr_event_ids;
|
|
|
|
vm_paddr_t device_table;
|
|
vm_paddr_t collection_table;
|
|
vm_paddr_t cmdq_base;
|
|
void *cmdq_base_va;
|
|
vm_paddr_t itt_tables;
|
|
|
|
vm_paddr_t lpi_prop_table;
|
|
vm_paddr_t lpi_pend_tables;
|
|
} test_data = {
|
|
.nr_cpus = 1,
|
|
.nr_devices = 1,
|
|
.nr_event_ids = 16,
|
|
};
|
|
|
|
static void guest_irq_handler(struct ex_regs *regs)
|
|
{
|
|
u32 intid = gic_get_and_ack_irq();
|
|
|
|
if (intid == IAR_SPURIOUS)
|
|
return;
|
|
|
|
GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
|
|
gic_set_eoi(intid);
|
|
}
|
|
|
|
static void guest_setup_its_mappings(void)
|
|
{
|
|
u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
|
|
u32 nr_events = test_data.nr_event_ids;
|
|
u32 nr_devices = test_data.nr_devices;
|
|
u32 nr_cpus = test_data.nr_cpus;
|
|
|
|
for (coll_id = 0; coll_id < nr_cpus; coll_id++)
|
|
its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);
|
|
|
|
/* Round-robin the LPIs to all of the vCPUs in the VM */
|
|
coll_id = 0;
|
|
for (device_id = 0; device_id < nr_devices; device_id++) {
|
|
vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
|
|
|
|
its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
|
|
itt_base, SZ_64K, true);
|
|
|
|
for (event_id = 0; event_id < nr_events; event_id++) {
|
|
its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
|
|
event_id, coll_id, intid++);
|
|
|
|
coll_id = (coll_id + 1) % test_data.nr_cpus;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void guest_invalidate_all_rdists(void)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < test_data.nr_cpus; i++)
|
|
its_send_invall_cmd(test_data.cmdq_base_va, i);
|
|
}
|
|
|
|
static void guest_setup_gic(void)
|
|
{
|
|
static atomic_int nr_cpus_ready = 0;
|
|
u32 cpuid = guest_get_vcpuid();
|
|
|
|
gic_init(GIC_V3, test_data.nr_cpus);
|
|
gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
|
|
test_data.lpi_pend_tables + (cpuid * SZ_64K));
|
|
|
|
atomic_fetch_add(&nr_cpus_ready, 1);
|
|
|
|
if (cpuid > 0)
|
|
return;
|
|
|
|
while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
|
|
cpu_relax();
|
|
|
|
its_init(test_data.collection_table, SZ_64K,
|
|
test_data.device_table, SZ_64K,
|
|
test_data.cmdq_base, SZ_64K);
|
|
|
|
guest_setup_its_mappings();
|
|
guest_invalidate_all_rdists();
|
|
}
|
|
|
|
static void guest_code(size_t nr_lpis)
|
|
{
|
|
guest_setup_gic();
|
|
|
|
GUEST_SYNC(0);
|
|
|
|
/*
|
|
* Don't use WFI here to avoid blocking the vCPU thread indefinitely and
|
|
* never getting the stop signal.
|
|
*/
|
|
while (!READ_ONCE(test_data.request_vcpus_stop))
|
|
cpu_relax();
|
|
|
|
GUEST_DONE();
|
|
}
|
|
|
|
static void setup_memslot(void)
|
|
{
|
|
size_t pages;
|
|
size_t sz;
|
|
|
|
/*
|
|
* For the ITS:
|
|
* - A single level device table
|
|
* - A single level collection table
|
|
* - The command queue
|
|
* - An ITT for each device
|
|
*/
|
|
sz = (3 + test_data.nr_devices) * SZ_64K;
|
|
|
|
/*
|
|
* For the redistributors:
|
|
* - A shared LPI configuration table
|
|
* - An LPI pending table for each vCPU
|
|
*/
|
|
sz += (1 + test_data.nr_cpus) * SZ_64K;
|
|
|
|
pages = sz / vm->page_size;
|
|
gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
|
|
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
|
|
TEST_MEMSLOT_INDEX, pages, 0);
|
|
}
|
|
|
|
#define LPI_PROP_DEFAULT_PRIO 0xa0
|
|
|
|
static void configure_lpis(void)
|
|
{
|
|
size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
|
|
u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
|
|
size_t i;
|
|
|
|
for (i = 0; i < nr_lpis; i++) {
|
|
tbl[i] = LPI_PROP_DEFAULT_PRIO |
|
|
LPI_PROP_GROUP1 |
|
|
LPI_PROP_ENABLED;
|
|
}
|
|
}
|
|
|
|
static void setup_test_data(void)
|
|
{
|
|
size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
|
|
u32 nr_devices = test_data.nr_devices;
|
|
u32 nr_cpus = test_data.nr_cpus;
|
|
vm_paddr_t cmdq_base;
|
|
|
|
test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
|
|
gpa_base,
|
|
TEST_MEMSLOT_INDEX);
|
|
|
|
test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
|
|
gpa_base,
|
|
TEST_MEMSLOT_INDEX);
|
|
|
|
cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
|
|
TEST_MEMSLOT_INDEX);
|
|
virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
|
|
test_data.cmdq_base = cmdq_base;
|
|
test_data.cmdq_base_va = (void *)cmdq_base;
|
|
|
|
test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
|
|
gpa_base, TEST_MEMSLOT_INDEX);
|
|
|
|
test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
|
|
gpa_base, TEST_MEMSLOT_INDEX);
|
|
configure_lpis();
|
|
|
|
test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
|
|
gpa_base, TEST_MEMSLOT_INDEX);
|
|
|
|
sync_global_to_guest(vm, test_data);
|
|
}
|
|
|
|
static void setup_gic(void)
|
|
{
|
|
gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
|
|
__TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");
|
|
|
|
its_fd = vgic_its_setup(vm);
|
|
}
|
|
|
|
static void signal_lpi(u32 device_id, u32 event_id)
|
|
{
|
|
vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
|
|
|
|
struct kvm_msi msi = {
|
|
.address_lo = db_addr,
|
|
.address_hi = db_addr >> 32,
|
|
.data = event_id,
|
|
.devid = device_id,
|
|
.flags = KVM_MSI_VALID_DEVID,
|
|
};
|
|
|
|
/*
|
|
* KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
|
|
* which for arm64 implies having a valid translation in the ITS.
|
|
*/
|
|
TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
|
|
"KVM_SIGNAL_MSI ioctl failed");
|
|
}
|
|
|
|
static pthread_barrier_t test_setup_barrier;
|
|
|
|
static void *lpi_worker_thread(void *data)
|
|
{
|
|
u32 device_id = (size_t)data;
|
|
u32 event_id;
|
|
size_t i;
|
|
|
|
pthread_barrier_wait(&test_setup_barrier);
|
|
|
|
for (i = 0; i < nr_iterations; i++)
|
|
for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
|
|
signal_lpi(device_id, event_id);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static void *vcpu_worker_thread(void *data)
|
|
{
|
|
struct kvm_vcpu *vcpu = data;
|
|
struct ucall uc;
|
|
|
|
while (true) {
|
|
vcpu_run(vcpu);
|
|
|
|
switch (get_ucall(vcpu, &uc)) {
|
|
case UCALL_SYNC:
|
|
pthread_barrier_wait(&test_setup_barrier);
|
|
continue;
|
|
case UCALL_DONE:
|
|
return NULL;
|
|
case UCALL_ABORT:
|
|
REPORT_GUEST_ASSERT(uc);
|
|
break;
|
|
default:
|
|
TEST_FAIL("Unknown ucall: %lu", uc.cmd);
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static void report_stats(struct timespec delta)
|
|
{
|
|
double nr_lpis;
|
|
double time;
|
|
|
|
nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;
|
|
|
|
time = delta.tv_sec;
|
|
time += ((double)delta.tv_nsec) / NSEC_PER_SEC;
|
|
|
|
pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
|
|
}
|
|
|
|
static void run_test(void)
|
|
{
|
|
u32 nr_devices = test_data.nr_devices;
|
|
u32 nr_vcpus = test_data.nr_cpus;
|
|
pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
|
|
pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
|
|
struct timespec start, delta;
|
|
size_t i;
|
|
|
|
TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");
|
|
|
|
pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);
|
|
|
|
for (i = 0; i < nr_vcpus; i++)
|
|
pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);
|
|
|
|
for (i = 0; i < nr_devices; i++)
|
|
pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);
|
|
|
|
pthread_barrier_wait(&test_setup_barrier);
|
|
|
|
clock_gettime(CLOCK_MONOTONIC, &start);
|
|
|
|
for (i = 0; i < nr_devices; i++)
|
|
pthread_join(lpi_threads[i], NULL);
|
|
|
|
delta = timespec_elapsed(start);
|
|
write_guest_global(vm, test_data.request_vcpus_stop, true);
|
|
|
|
for (i = 0; i < nr_vcpus; i++)
|
|
pthread_join(vcpu_threads[i], NULL);
|
|
|
|
report_stats(delta);
|
|
}
|
|
|
|
static void setup_vm(void)
|
|
{
|
|
int i;
|
|
|
|
vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
|
|
TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
|
|
|
|
vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
|
|
|
|
vm_init_descriptor_tables(vm);
|
|
for (i = 0; i < test_data.nr_cpus; i++)
|
|
vcpu_init_descriptor_tables(vcpus[i]);
|
|
|
|
vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
|
|
|
|
setup_memslot();
|
|
|
|
setup_gic();
|
|
|
|
setup_test_data();
|
|
}
|
|
|
|
static void destroy_vm(void)
|
|
{
|
|
close(its_fd);
|
|
close(gic_fd);
|
|
kvm_vm_free(vm);
|
|
free(vcpus);
|
|
}
|
|
|
|
static void pr_usage(const char *name)
|
|
{
|
|
pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
|
|
pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
|
|
pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
|
|
pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
|
|
pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
|
|
}
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
u32 nr_threads;
|
|
int c;
|
|
|
|
while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
|
|
switch (c) {
|
|
case 'v':
|
|
test_data.nr_cpus = atoi(optarg);
|
|
break;
|
|
case 'd':
|
|
test_data.nr_devices = atoi(optarg);
|
|
break;
|
|
case 'e':
|
|
test_data.nr_event_ids = atoi(optarg);
|
|
break;
|
|
case 'i':
|
|
nr_iterations = strtoul(optarg, NULL, 0);
|
|
break;
|
|
case 'h':
|
|
default:
|
|
pr_usage(argv[0]);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
nr_threads = test_data.nr_cpus + test_data.nr_devices;
|
|
if (nr_threads > get_nprocs())
|
|
pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
|
|
nr_threads, get_nprocs());
|
|
|
|
setup_vm();
|
|
|
|
run_test();
|
|
|
|
destroy_vm();
|
|
|
|
return 0;
|
|
}
|