SRCU lets synchronize_srcu() depend on VMD-local RCU primitives, preventing
long delays from locking up RCU in other systems. VMD performs a
synchronize when removing a device, but will hit all IRQ lists if the
device uses all VMD vectors. This patch will not help VMD's RCU
synchronization, but will isolate the read side delays to the VMD
subsystem. Additionally, the use of SRCU in VMD's ISR will keep it
isolated from any other RCU waiters in the rest of the system.
Tested using concurrent FIO and NVMe resets:
[global]
rw=read
bs=4k
direct=1
ioengine=libaio
iodepth=32
norandommap
timeout=300
runtime=
1000000000
[nvme0]
cpus_allowed=0-63
numjobs=8
filename=/dev/nvme0n1
[nvme1]
cpus_allowed=0-63
numjobs=8
filename=/dev/nvme1n1
while (true) do
for i in /sys/class/nvme/nvme*; do
echo "Resetting ${i##*/}"
echo 1 > $i/reset_controller;
sleep 5
done;
done
Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
- depends on PCI_MSI && X86_64
+ depends on PCI_MSI && X86_64 && SRCU
tristate "Intel Volume Management Device Driver"
default N
---help---
tristate "Intel Volume Management Device Driver"
default N
---help---
#include <linux/module.h>
#include <linux/msi.h>
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/msi.h>
#include <linux/pci.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
/**
* struct vmd_irq - private data to map driver IRQ to the VMD shared vector
* @node: list item for parent traversal.
/**
* struct vmd_irq - private data to map driver IRQ to the VMD shared vector
* @node: list item for parent traversal.
- * @rcu: RCU callback item for freeing.
* @irq: back pointer to parent.
* @enabled: true if driver enabled IRQ
* @virq: the virtual IRQ value provided to the requesting driver.
* @irq: back pointer to parent.
* @enabled: true if driver enabled IRQ
* @virq: the virtual IRQ value provided to the requesting driver.
*/
struct vmd_irq {
struct list_head node;
*/
struct vmd_irq {
struct list_head node;
struct vmd_irq_list *irq;
bool enabled;
unsigned int virq;
struct vmd_irq_list *irq;
bool enabled;
unsigned int virq;
/**
* struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector
* @irq_list: the list of irq's the VMD one demuxes to.
/**
* struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector
* @irq_list: the list of irq's the VMD one demuxes to.
+ * @srcu: SRCU struct for local synchronization.
* @count: number of child IRQs assigned to this vector; used to track
* sharing.
*/
struct vmd_irq_list {
struct list_head irq_list;
* @count: number of child IRQs assigned to this vector; used to track
* sharing.
*/
struct vmd_irq_list {
struct list_head irq_list;
+ struct srcu_struct srcu;
struct vmd_irq *vmdirq = irq_get_chip_data(virq);
unsigned long flags;
struct vmd_irq *vmdirq = irq_get_chip_data(virq);
unsigned long flags;
+ synchronize_srcu(&vmdirq->irq->srcu);
/* XXX: Potential optimization to rebalance */
raw_spin_lock_irqsave(&list_lock, flags);
vmdirq->irq->count--;
raw_spin_unlock_irqrestore(&list_lock, flags);
/* XXX: Potential optimization to rebalance */
raw_spin_lock_irqsave(&list_lock, flags);
vmdirq->irq->count--;
raw_spin_unlock_irqrestore(&list_lock, flags);
- kfree_rcu(vmdirq, rcu);
}
static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev,
}
static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev,
{
struct vmd_irq_list *irqs = data;
struct vmd_irq *vmdirq;
{
struct vmd_irq_list *irqs = data;
struct vmd_irq *vmdirq;
+ idx = srcu_read_lock(&irqs->srcu);
list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node)
generic_handle_irq(vmdirq->virq);
list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node)
generic_handle_irq(vmdirq->virq);
+ srcu_read_unlock(&irqs->srcu, idx);
return -ENOMEM;
for (i = 0; i < vmd->msix_count; i++) {
return -ENOMEM;
for (i = 0; i < vmd->msix_count; i++) {
+ err = init_srcu_struct(&vmd->irqs[i].srcu);
+ if (err)
+ return err;
+
INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i),
vmd_irq, 0, "vmd", &vmd->irqs[i]);
INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i),
vmd_irq, 0, "vmd", &vmd->irqs[i]);
+static void vmd_cleanup_srcu(struct vmd_dev *vmd)
+{
+ int i;
+
+ for (i = 0; i < vmd->msix_count; i++)
+ cleanup_srcu_struct(&vmd->irqs[i].srcu);
+}
+
static void vmd_remove(struct pci_dev *dev)
{
struct vmd_dev *vmd = pci_get_drvdata(dev);
vmd_detach_resources(vmd);
static void vmd_remove(struct pci_dev *dev)
{
struct vmd_dev *vmd = pci_get_drvdata(dev);
vmd_detach_resources(vmd);
sysfs_remove_link(&vmd->dev->dev.kobj, "domain");
pci_stop_root_bus(vmd->bus);
pci_remove_root_bus(vmd->bus);
sysfs_remove_link(&vmd->dev->dev.kobj, "domain");
pci_stop_root_bus(vmd->bus);
pci_remove_root_bus(vmd->bus);