374 lines
9.8 KiB
C
374 lines
9.8 KiB
C
// SPDX-License-Identifier: GPL-2.0+
|
|
/*
|
|
* PCIe bandwidth controller
|
|
*
|
|
* Author: Alexandru Gagniuc <mr.nuke.me@gmail.com>
|
|
*
|
|
* Copyright (C) 2019 Dell Inc
|
|
* Copyright (C) 2023-2024 Intel Corporation
|
|
*
|
|
* The PCIe bandwidth controller provides a way to alter PCIe Link Speeds
|
|
* and notify the operating system when the Link Width or Speed changes. The
|
|
* notification capability is required for all Root Ports and Downstream
|
|
* Ports supporting Link Width wider than x1 and/or multiple Link Speeds.
|
|
*
|
|
* This service port driver hooks into the Bandwidth Notification interrupt
|
|
* watching for changes or links becoming degraded in operation. It updates
|
|
* the cached Current Link Speed that is exposed to user space through sysfs.
|
|
*/
|
|
|
|
#define dev_fmt(fmt) "bwctrl: " fmt
|
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/bits.h>
|
|
#include <linux/cleanup.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/pci-bwctrl.h>
|
|
#include <linux/rwsem.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/types.h>
|
|
|
|
#include "../pci.h"
|
|
#include "portdrv.h"
|
|
|
|
/**
|
|
* struct pcie_bwctrl_data - PCIe bandwidth controller
|
|
* @set_speed_mutex: Serializes link speed changes
|
|
* @lbms_count: Count for LBMS (since last reset)
|
|
* @cdev: Thermal cooling device associated with the port
|
|
*/
|
|
struct pcie_bwctrl_data {
|
|
struct mutex set_speed_mutex;
|
|
atomic_t lbms_count;
|
|
struct thermal_cooling_device *cdev;
|
|
};
|
|
|
|
/*
|
|
* Prevent port removal during LBMS count accessors and Link Speed changes.
|
|
*
|
|
* These have to be differentiated because pcie_bwctrl_change_speed() calls
|
|
* pcie_retrain_link() which uses LBMS count reset accessor on success
|
|
* (using just one rwsem triggers "possible recursive locking detected"
|
|
* warning).
|
|
*/
|
|
static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem);
|
|
static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem);
|
|
|
|
static bool pcie_valid_speed(enum pci_bus_speed speed)
|
|
{
|
|
return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT);
|
|
}
|
|
|
|
static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed)
|
|
{
|
|
static const u8 speed_conv[] = {
|
|
[PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT,
|
|
[PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT,
|
|
[PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT,
|
|
[PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT,
|
|
[PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT,
|
|
[PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT,
|
|
};
|
|
|
|
if (WARN_ON_ONCE(!pcie_valid_speed(speed)))
|
|
return 0;
|
|
|
|
return speed_conv[speed];
|
|
}
|
|
|
|
static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds)
|
|
{
|
|
return __fls(supported_speeds);
|
|
}
|
|
|
|
/**
|
|
* pcie_bwctrl_select_speed - Select Target Link Speed
|
|
* @port: PCIe Port
|
|
* @speed_req: Requested PCIe Link Speed
|
|
*
|
|
* Select Target Link Speed by take into account Supported Link Speeds of
|
|
* both the Root Port and the Endpoint.
|
|
*
|
|
* Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.)
|
|
*/
|
|
static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req)
|
|
{
|
|
struct pci_bus *bus = port->subordinate;
|
|
u8 desired_speeds, supported_speeds;
|
|
struct pci_dev *dev;
|
|
|
|
desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req),
|
|
__fls(PCI_EXP_LNKCAP2_SLS_2_5GB));
|
|
|
|
supported_speeds = port->supported_speeds;
|
|
if (bus) {
|
|
down_read(&pci_bus_sem);
|
|
dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list);
|
|
if (dev)
|
|
supported_speeds &= dev->supported_speeds;
|
|
up_read(&pci_bus_sem);
|
|
}
|
|
if (!supported_speeds)
|
|
return PCI_EXP_LNKCAP2_SLS_2_5GB;
|
|
|
|
return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds);
|
|
}
|
|
|
|
static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt)
|
|
{
|
|
int ret;
|
|
|
|
ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2,
|
|
PCI_EXP_LNKCTL2_TLS, target_speed);
|
|
if (ret != PCIBIOS_SUCCESSFUL)
|
|
return pcibios_err_to_errno(ret);
|
|
|
|
ret = pcie_retrain_link(port, use_lt);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
/*
|
|
* Ensure link speed updates also with platforms that have problems
|
|
* with notifications.
|
|
*/
|
|
if (port->subordinate)
|
|
pcie_update_link_speed(port->subordinate);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* pcie_set_target_speed - Set downstream Link Speed for PCIe Port
|
|
* @port: PCIe Port
|
|
* @speed_req: Requested PCIe Link Speed
|
|
* @use_lt: Wait for the LT or DLLLA bit to detect the end of link training
|
|
*
|
|
* Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be
|
|
* adjusted downwards to the best speed supported by both the Port and PCIe
|
|
* Device underneath it.
|
|
*
|
|
* Return:
|
|
* * 0 - on success
|
|
* * -EINVAL - @speed_req is not a PCIe Link Speed
|
|
* * -ENODEV - @port is not controllable
|
|
* * -ETIMEDOUT - changing Link Speed took too long
|
|
* * -EAGAIN - Link Speed was changed but @speed_req was not achieved
|
|
*/
|
|
int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req,
|
|
bool use_lt)
|
|
{
|
|
struct pci_bus *bus = port->subordinate;
|
|
u16 target_speed;
|
|
int ret;
|
|
|
|
if (WARN_ON_ONCE(!pcie_valid_speed(speed_req)))
|
|
return -EINVAL;
|
|
|
|
if (bus && bus->cur_bus_speed == speed_req)
|
|
return 0;
|
|
|
|
target_speed = pcie_bwctrl_select_speed(port, speed_req);
|
|
|
|
scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) {
|
|
struct pcie_bwctrl_data *data = port->link_bwctrl;
|
|
|
|
/*
|
|
* port->link_bwctrl is NULL during initial scan when called
|
|
* e.g. from the Target Speed quirk.
|
|
*/
|
|
if (data)
|
|
mutex_lock(&data->set_speed_mutex);
|
|
|
|
ret = pcie_bwctrl_change_speed(port, target_speed, use_lt);
|
|
|
|
if (data)
|
|
mutex_unlock(&data->set_speed_mutex);
|
|
}
|
|
|
|
/*
|
|
* Despite setting higher speed into the Target Link Speed, empty
|
|
* bus won't train to 5GT+ speeds.
|
|
*/
|
|
if (!ret && bus && bus->cur_bus_speed != speed_req &&
|
|
!list_empty(&bus->devices))
|
|
ret = -EAGAIN;
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void pcie_bwnotif_enable(struct pcie_device *srv)
|
|
{
|
|
struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
|
|
struct pci_dev *port = srv->port;
|
|
u16 link_status;
|
|
int ret;
|
|
|
|
/* Count LBMS seen so far as one */
|
|
ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
|
|
if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS)
|
|
atomic_inc(&data->lbms_count);
|
|
|
|
pcie_capability_set_word(port, PCI_EXP_LNKCTL,
|
|
PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
|
|
pcie_capability_write_word(port, PCI_EXP_LNKSTA,
|
|
PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
|
|
|
|
/*
|
|
* Update after enabling notifications & clearing status bits ensures
|
|
* link speed is up to date.
|
|
*/
|
|
pcie_update_link_speed(port->subordinate);
|
|
}
|
|
|
|
static void pcie_bwnotif_disable(struct pci_dev *port)
|
|
{
|
|
pcie_capability_clear_word(port, PCI_EXP_LNKCTL,
|
|
PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
|
|
}
|
|
|
|
static irqreturn_t pcie_bwnotif_irq(int irq, void *context)
|
|
{
|
|
struct pcie_device *srv = context;
|
|
struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
|
|
struct pci_dev *port = srv->port;
|
|
u16 link_status, events;
|
|
int ret;
|
|
|
|
ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
|
|
if (ret != PCIBIOS_SUCCESSFUL)
|
|
return IRQ_NONE;
|
|
|
|
events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
|
|
if (!events)
|
|
return IRQ_NONE;
|
|
|
|
if (events & PCI_EXP_LNKSTA_LBMS)
|
|
atomic_inc(&data->lbms_count);
|
|
|
|
pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
|
|
|
|
/*
|
|
* Interrupts will not be triggered from any further Link Speed
|
|
* change until LBMS is cleared by the write. Therefore, re-read the
|
|
* speed (inside pcie_update_link_speed()) after LBMS has been
|
|
* cleared to avoid missing link speed changes.
|
|
*/
|
|
pcie_update_link_speed(port->subordinate);
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
void pcie_reset_lbms_count(struct pci_dev *port)
|
|
{
|
|
struct pcie_bwctrl_data *data;
|
|
|
|
guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
|
|
data = port->link_bwctrl;
|
|
if (data)
|
|
atomic_set(&data->lbms_count, 0);
|
|
else
|
|
pcie_capability_write_word(port, PCI_EXP_LNKSTA,
|
|
PCI_EXP_LNKSTA_LBMS);
|
|
}
|
|
|
|
int pcie_lbms_count(struct pci_dev *port, unsigned long *val)
|
|
{
|
|
struct pcie_bwctrl_data *data;
|
|
|
|
guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
|
|
data = port->link_bwctrl;
|
|
if (!data)
|
|
return -ENOTTY;
|
|
|
|
*val = atomic_read(&data->lbms_count);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int pcie_bwnotif_probe(struct pcie_device *srv)
|
|
{
|
|
struct pci_dev *port = srv->port;
|
|
int ret;
|
|
|
|
struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device,
|
|
sizeof(*data), GFP_KERNEL);
|
|
if (!data)
|
|
return -ENOMEM;
|
|
|
|
ret = devm_mutex_init(&srv->device, &data->set_speed_mutex);
|
|
if (ret)
|
|
return ret;
|
|
|
|
scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
|
|
scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
|
|
port->link_bwctrl = data;
|
|
|
|
ret = request_irq(srv->irq, pcie_bwnotif_irq,
|
|
IRQF_SHARED, "PCIe bwctrl", srv);
|
|
if (ret) {
|
|
port->link_bwctrl = NULL;
|
|
return ret;
|
|
}
|
|
|
|
pcie_bwnotif_enable(srv);
|
|
}
|
|
}
|
|
|
|
pci_dbg(port, "enabled with IRQ %d\n", srv->irq);
|
|
|
|
/* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */
|
|
port->link_bwctrl->cdev = pcie_cooling_device_register(port);
|
|
if (IS_ERR(port->link_bwctrl->cdev))
|
|
port->link_bwctrl->cdev = NULL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void pcie_bwnotif_remove(struct pcie_device *srv)
|
|
{
|
|
struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
|
|
|
|
pcie_cooling_device_unregister(data->cdev);
|
|
|
|
scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
|
|
scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
|
|
pcie_bwnotif_disable(srv->port);
|
|
|
|
free_irq(srv->irq, srv);
|
|
|
|
srv->port->link_bwctrl = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int pcie_bwnotif_suspend(struct pcie_device *srv)
|
|
{
|
|
pcie_bwnotif_disable(srv->port);
|
|
return 0;
|
|
}
|
|
|
|
static int pcie_bwnotif_resume(struct pcie_device *srv)
|
|
{
|
|
pcie_bwnotif_enable(srv);
|
|
return 0;
|
|
}
|
|
|
|
static struct pcie_port_service_driver pcie_bwctrl_driver = {
|
|
.name = "pcie_bwctrl",
|
|
.port_type = PCIE_ANY_PORT,
|
|
.service = PCIE_PORT_SERVICE_BWCTRL,
|
|
.probe = pcie_bwnotif_probe,
|
|
.suspend = pcie_bwnotif_suspend,
|
|
.resume = pcie_bwnotif_resume,
|
|
.remove = pcie_bwnotif_remove,
|
|
};
|
|
|
|
int __init pcie_bwctrl_init(void)
|
|
{
|
|
return pcie_port_service_register(&pcie_bwctrl_driver);
|
|
}
|