Commit 3a48dc6f authored by Bjorn Helgaas's avatar Bjorn Helgaas

Merge branch 'pci/virtualization'

  - To avoid bus errors, enable PASID only if entire path supports End-End
    TLP prefixes (Sinan Kaya)

  - Unify slot and bus reset functions and remove hotplug knowledge from
    callers (Sinan Kaya)

  - Add Function-Level Reset quirks for Intel and Samsung NVMe devices to
    fix guest reboot issues (Alex Williamson)

  - Add function 1 DMA alias quirk for Marvell 88SS9183 PCIe SSD Controller
    (Bjorn Helgaas)

* pci/virtualization:
  PCI: Add function 1 DMA alias quirk for Marvell 88SS9183
  PCI: Delay after FLR of Intel DC P3700 NVMe
  PCI: Disable Samsung SM961/PM961 NVMe before FLR
  PCI: Export pcie_has_flr()
  PCI: Rename pci_try_reset_bus() to pci_reset_bus()
  PCI: Deprecate pci_reset_bus() and pci_reset_slot() functions
  PCI: Unify try slot and bus reset API
  PCI: Hide pci_reset_bridge_secondary_bus() from drivers
  IB/hfi1: Use pci_try_reset_bus() for initiating PCI Secondary Bus Reset
  PCI: Handle error return from pci_reset_bridge_secondary_bus()
  PCI/IOV: Tidy pci_sriov_set_totalvfs()
  PCI: Enable PASID only if entire path supports End-End TLP prefixes

# Conflicts:
#	drivers/pci/hotplug/pciehp_hpc.c
parents e7aaf90f 7695e73f
......@@ -905,9 +905,7 @@ static int trigger_sbr(struct hfi1_devdata *dd)
* delay after a reset is required. Per spec requirements,
* the link is either working or not after that point.
*/
pci_reset_bridge_secondary_bus(dev->bus->self);
return 0;
return pci_reset_bus(dev);
}
/*
......
......@@ -273,6 +273,9 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
if (WARN_ON(pdev->pasid_enabled))
return -EBUSY;
if (!pdev->eetlp_prefix_path)
return -EINVAL;
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
if (!pos)
return -EINVAL;
......
......@@ -761,6 +761,7 @@ int pciehp_reset_slot(struct slot *slot, int probe)
struct controller *ctrl = slot->ctrl;
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 stat_mask = 0, ctrl_mask = 0;
int rc;
if (probe)
return 0;
......@@ -778,7 +779,7 @@ int pciehp_reset_slot(struct slot *slot, int probe)
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, 0);
pci_reset_bridge_secondary_bus(ctrl->pcie->port);
rc = pci_bridge_secondary_bus_reset(ctrl->pcie->port);
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, stat_mask);
pcie_write_cmd_nowait(ctrl, ctrl_mask, ctrl_mask);
......@@ -786,7 +787,7 @@ int pciehp_reset_slot(struct slot *slot, int probe)
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, ctrl_mask);
up_write(&ctrl->reset_lock);
return 0;
return rc;
}
int pcie_init_notification(struct controller *ctrl)
......
......@@ -818,15 +818,15 @@ int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs)
{
if (!dev->is_physfn)
return -ENOSYS;
if (numvfs > dev->sriov->total_VFs)
return -EINVAL;
/* Shouldn't change if VFs already enabled */
if (dev->sriov->ctrl & PCI_SRIOV_CTRL_VFE)
return -EBUSY;
else
dev->sriov->driver_max_VFs = numvfs;
dev->sriov->driver_max_VFs = numvfs;
return 0;
}
EXPORT_SYMBOL_GPL(pci_sriov_set_totalvfs);
......
......@@ -4349,7 +4349,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
* Returns true if the device advertises support for PCIe function level
* resets.
*/
static bool pcie_has_flr(struct pci_dev *dev)
bool pcie_has_flr(struct pci_dev *dev)
{
u32 cap;
......@@ -4359,6 +4359,7 @@ static bool pcie_has_flr(struct pci_dev *dev)
pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap);
return cap & PCI_EXP_DEVCAP_FLR;
}
EXPORT_SYMBOL_GPL(pcie_has_flr);
/**
* pcie_flr - initiate a PCIe function level reset
......@@ -4534,19 +4535,18 @@ void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
}
/**
* pci_reset_bridge_secondary_bus - Reset the secondary bus on a PCI bridge.
* pci_bridge_secondary_bus_reset - Reset the secondary bus on a PCI bridge.
* @dev: Bridge device
*
* Use the bridge control register to assert reset on the secondary bus.
* Devices on the secondary bus are left in power-on state.
*/
int pci_reset_bridge_secondary_bus(struct pci_dev *dev)
int pci_bridge_secondary_bus_reset(struct pci_dev *dev)
{
pcibios_reset_secondary_bus(dev);
return pci_dev_wait(dev, "bus reset", PCIE_RESET_READY_POLL_MS);
}
EXPORT_SYMBOL_GPL(pci_reset_bridge_secondary_bus);
static int pci_parent_bus_reset(struct pci_dev *dev, int probe)
{
......@@ -4563,9 +4563,7 @@ static int pci_parent_bus_reset(struct pci_dev *dev, int probe)
if (probe)
return 0;
pci_reset_bridge_secondary_bus(dev->bus->self);
return 0;
return pci_bridge_secondary_bus_reset(dev->bus->self);
}
static int pci_reset_hotplug_slot(struct hotplug_slot *hotplug, int probe)
......@@ -5097,7 +5095,7 @@ int pci_probe_reset_slot(struct pci_slot *slot)
EXPORT_SYMBOL_GPL(pci_probe_reset_slot);
/**
* pci_reset_slot - reset a PCI slot
* __pci_reset_slot - Try to reset a PCI slot
* @slot: PCI slot to reset
*
* A PCI bus may host multiple slots, each slot may support a reset mechanism
......@@ -5109,33 +5107,9 @@ EXPORT_SYMBOL_GPL(pci_probe_reset_slot);
* through this function. PCI config space of all devices in the slot and
* behind the slot is saved before and restored after reset.
*
* Return 0 on success, non-zero on error.
*/
int pci_reset_slot(struct pci_slot *slot)
{
int rc;
rc = pci_slot_reset(slot, 1);
if (rc)
return rc;
pci_slot_save_and_disable(slot);
rc = pci_slot_reset(slot, 0);
pci_slot_restore(slot);
return rc;
}
EXPORT_SYMBOL_GPL(pci_reset_slot);
/**
* pci_try_reset_slot - Try to reset a PCI slot
* @slot: PCI slot to reset
*
* Same as above except return -EAGAIN if the slot cannot be locked
*/
int pci_try_reset_slot(struct pci_slot *slot)
static int __pci_reset_slot(struct pci_slot *slot)
{
int rc;
......@@ -5156,10 +5130,11 @@ int pci_try_reset_slot(struct pci_slot *slot)
return rc;
}
EXPORT_SYMBOL_GPL(pci_try_reset_slot);
static int pci_bus_reset(struct pci_bus *bus, int probe)
{
int ret;
if (!bus->self || !pci_bus_resetable(bus))
return -ENOTTY;
......@@ -5170,11 +5145,11 @@ static int pci_bus_reset(struct pci_bus *bus, int probe)
might_sleep();
pci_reset_bridge_secondary_bus(bus->self);
ret = pci_bridge_secondary_bus_reset(bus->self);
pci_bus_unlock(bus);
return 0;
return ret;
}
/**
......@@ -5190,15 +5165,12 @@ int pci_probe_reset_bus(struct pci_bus *bus)
EXPORT_SYMBOL_GPL(pci_probe_reset_bus);
/**
* pci_reset_bus - reset a PCI bus
* __pci_reset_bus - Try to reset a PCI bus
* @bus: top level PCI bus to reset
*
* Do a bus reset on the given bus and any subordinate buses, saving
* and restoring state of all devices.
*
* Return 0 on success, non-zero on error.
* Same as above except return -EAGAIN if the bus cannot be locked
*/
int pci_reset_bus(struct pci_bus *bus)
static int __pci_reset_bus(struct pci_bus *bus)
{
int rc;
......@@ -5208,42 +5180,30 @@ int pci_reset_bus(struct pci_bus *bus)
pci_bus_save_and_disable(bus);
rc = pci_bus_reset(bus, 0);
if (pci_bus_trylock(bus)) {
might_sleep();
rc = pci_bridge_secondary_bus_reset(bus->self);
pci_bus_unlock(bus);
} else
rc = -EAGAIN;
pci_bus_restore(bus);
return rc;
}
EXPORT_SYMBOL_GPL(pci_reset_bus);
/**
* pci_try_reset_bus - Try to reset a PCI bus
* @bus: top level PCI bus to reset
* pci_reset_bus - Try to reset a PCI bus
* @pdev: top level PCI device to reset via slot/bus
*
* Same as above except return -EAGAIN if the bus cannot be locked
*/
int pci_try_reset_bus(struct pci_bus *bus)
int pci_reset_bus(struct pci_dev *pdev)
{
int rc;
rc = pci_bus_reset(bus, 1);
if (rc)
return rc;
pci_bus_save_and_disable(bus);
if (pci_bus_trylock(bus)) {
might_sleep();
pci_reset_bridge_secondary_bus(bus->self);
pci_bus_unlock(bus);
} else
rc = -EAGAIN;
pci_bus_restore(bus);
return rc;
return pci_probe_reset_slot(pdev->slot) ?
__pci_reset_slot(pdev->slot) : __pci_reset_bus(pdev->bus);
}
EXPORT_SYMBOL_GPL(pci_try_reset_bus);
EXPORT_SYMBOL_GPL(pci_reset_bus);
/**
* pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count
......
......@@ -34,6 +34,7 @@ int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vmai,
enum pci_mmap_api mmap_api);
int pci_probe_reset_function(struct pci_dev *dev);
int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
/**
* struct pci_platform_pm_ops - Firmware PM callbacks
......
......@@ -1517,6 +1517,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
{
u32 reg32;
int pos;
int rc;
pos = dev->aer_cap;
......@@ -1525,7 +1526,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
pci_reset_bridge_secondary_bus(dev);
rc = pci_bridge_secondary_bus_reset(dev);
pci_printk(KERN_DEBUG, dev, "Root Port link has been reset\n");
/* Clear Root Error Status */
......@@ -1537,7 +1538,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
return PCI_ERS_RESULT_RECOVERED;
return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
}
/**
......
......@@ -175,9 +175,11 @@ static int report_resume(struct pci_dev *dev, void *data)
*/
static pci_ers_result_t default_reset_link(struct pci_dev *dev)
{
pci_reset_bridge_secondary_bus(dev);
int rc;
rc = pci_bridge_secondary_bus_reset(dev);
pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
return PCI_ERS_RESULT_RECOVERED;
return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
}
static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
......
......@@ -2070,6 +2070,29 @@ static void pci_configure_ltr(struct pci_dev *dev)
#endif
}
static void pci_configure_eetlp_prefix(struct pci_dev *dev)
{
#ifdef CONFIG_PCI_PASID
struct pci_dev *bridge;
u32 cap;
if (!pci_is_pcie(dev))
return;
pcie_capability_read_dword(dev, PCI_EXP_DEVCAP2, &cap);
if (!(cap & PCI_EXP_DEVCAP2_EE_PREFIX))
return;
if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
dev->eetlp_prefix_path = 1;
else {
bridge = pci_upstream_bridge(dev);
if (bridge && bridge->eetlp_prefix_path)
dev->eetlp_prefix_path = 1;
}
#endif
}
static void pci_configure_device(struct pci_dev *dev)
{
struct hotplug_params hpp;
......@@ -2079,6 +2102,7 @@ static void pci_configure_device(struct pci_dev *dev)
pci_configure_extended_tags(dev, NULL);
pci_configure_relaxed_ordering(dev);
pci_configure_ltr(dev);
pci_configure_eetlp_prefix(dev);
memset(&hpp, 0, sizeof(hpp));
ret = pci_get_hp_params(dev, &hpp);
......
......@@ -25,6 +25,7 @@
#include <linux/sched.h>
#include <linux/ktime.h>
#include <linux/mm.h>
#include <linux/nvme.h>
#include <linux/platform_data/x86/apple.h>
#include <linux/pm_runtime.h>
#include <linux/switchtec.h>
......@@ -3667,6 +3668,108 @@ static int reset_chelsio_generic_dev(struct pci_dev *dev, int probe)
#define PCI_DEVICE_ID_INTEL_IVB_M_VGA 0x0156
#define PCI_DEVICE_ID_INTEL_IVB_M2_VGA 0x0166
/*
* The Samsung SM961/PM961 controller can sometimes enter a fatal state after
* FLR where config space reads from the device return -1. We seem to be
* able to avoid this condition if we disable the NVMe controller prior to
* FLR. This quirk is generic for any NVMe class device requiring similar
* assistance to quiesce the device prior to FLR.
*
* NVMe specification: https://nvmexpress.org/resources/specifications/
* Revision 1.0e:
* Chapter 2: Required and optional PCI config registers
* Chapter 3: NVMe control registers
* Chapter 7.3: Reset behavior
*/
static int nvme_disable_and_flr(struct pci_dev *dev, int probe)
{
void __iomem *bar;
u16 cmd;
u32 cfg;
if (dev->class != PCI_CLASS_STORAGE_EXPRESS ||
!pcie_has_flr(dev) || !pci_resource_start(dev, 0))
return -ENOTTY;
if (probe)
return 0;
bar = pci_iomap(dev, 0, NVME_REG_CC + sizeof(cfg));
if (!bar)
return -ENOTTY;
pci_read_config_word(dev, PCI_COMMAND, &cmd);
pci_write_config_word(dev, PCI_COMMAND, cmd | PCI_COMMAND_MEMORY);
cfg = readl(bar + NVME_REG_CC);
/* Disable controller if enabled */
if (cfg & NVME_CC_ENABLE) {
u32 cap = readl(bar + NVME_REG_CAP);
unsigned long timeout;
/*
* Per nvme_disable_ctrl() skip shutdown notification as it
* could complete commands to the admin queue. We only intend
* to quiesce the device before reset.
*/
cfg &= ~(NVME_CC_SHN_MASK | NVME_CC_ENABLE);
writel(cfg, bar + NVME_REG_CC);
/*
* Some controllers require an additional delay here, see
* NVME_QUIRK_DELAY_BEFORE_CHK_RDY. None of those are yet
* supported by this quirk.
*/
/* Cap register provides max timeout in 500ms increments */
timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
for (;;) {
u32 status = readl(bar + NVME_REG_CSTS);
/* Ready status becomes zero on disable complete */
if (!(status & NVME_CSTS_RDY))
break;
msleep(100);
if (time_after(jiffies, timeout)) {
pci_warn(dev, "Timeout waiting for NVMe ready status to clear after disable\n");
break;
}
}
}
pci_iounmap(dev, bar);
pcie_flr(dev);
return 0;
}
/*
* Intel DC P3700 NVMe controller will timeout waiting for ready status
* to change after NVMe enable if the driver starts interacting with the
* device too soon after FLR. A 250ms delay after FLR has heuristically
* proven to produce reliably working results for device assignment cases.
*/
static int delay_250ms_after_flr(struct pci_dev *dev, int probe)
{
if (!pcie_has_flr(dev))
return -ENOTTY;
if (probe)
return 0;
pcie_flr(dev);
msleep(250);
return 0;
}
static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF,
reset_intel_82599_sfp_virtfn },
......@@ -3674,6 +3777,8 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
reset_ivb_igd },
{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_M2_VGA,
reset_ivb_igd },
{ PCI_VENDOR_ID_SAMSUNG, 0xa804, nvme_disable_and_flr },
{ PCI_VENDOR_ID_INTEL, 0x0953, delay_250ms_after_flr },
{ PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
reset_chelsio_generic_dev },
{ 0 }
......@@ -3743,6 +3848,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x917a,
/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c78 */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9182,
quirk_dma_func1_alias);
/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c134 */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9183,
quirk_dma_func1_alias);
/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c46 */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x91a0,
quirk_dma_func1_alias);
......
......@@ -1010,8 +1010,7 @@ static long vfio_pci_ioctl(void *device_data,
&info, slot);
if (!ret)
/* User has access, do the reset */
ret = slot ? pci_try_reset_slot(vdev->pdev->slot) :
pci_try_reset_bus(vdev->pdev->bus);
ret = pci_reset_bus(vdev->pdev);
hot_reset_release:
for (i--; i >= 0; i--)
......@@ -1373,8 +1372,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
}
if (needs_reset)
ret = slot ? pci_try_reset_slot(vdev->pdev->slot) :
pci_try_reset_bus(vdev->pdev->bus);
ret = pci_reset_bus(vdev->pdev);
put_devs:
for (i = 0; i < devs.cur_index; i++) {
......
......@@ -351,6 +351,7 @@ struct pci_dev {
unsigned int ltr_path:1; /* Latency Tolerance Reporting
supported from root to here */
#endif
unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */
pci_channel_state_t error_state; /* Current connectivity state */
struct device dev; /* Generic device interface */
......@@ -1105,20 +1106,17 @@ u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev,
enum pci_bus_speed *speed,
enum pcie_link_width *width);
void pcie_print_link_status(struct pci_dev *dev);
bool pcie_has_flr(struct pci_dev *dev);
int pcie_flr(struct pci_dev *dev);
int __pci_reset_function_locked(struct pci_dev *dev);
int pci_reset_function(struct pci_dev *dev);
int pci_reset_function_locked(struct pci_dev *dev);
int pci_try_reset_function(struct pci_dev *dev);
int pci_probe_reset_slot(struct pci_slot *slot);
int pci_reset_slot(struct pci_slot *slot);
int pci_try_reset_slot(struct pci_slot *slot);
int pci_probe_reset_bus(struct pci_bus *bus);
int pci_reset_bus(struct pci_bus *bus);
int pci_try_reset_bus(struct pci_bus *bus);
int pci_reset_bus(struct pci_dev *dev);
void pci_reset_secondary_bus(struct pci_dev *dev);
void pcibios_reset_secondary_bus(struct pci_dev *dev);
int pci_reset_bridge_secondary_bus(struct pci_dev *dev);
void pci_update_resource(struct pci_dev *dev, int resno);
int __must_check pci_assign_resource(struct pci_dev *dev, int i);
int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align);
......
......@@ -636,6 +636,7 @@
#define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */
#define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */
#define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */
#define PCI_EXP_DEVCAP2_EE_PREFIX 0x00200000 /* End-End TLP Prefix */
#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */
#define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */
#define PCI_EXP_DEVCTL2_COMP_TMOUT_DIS 0x0010 /* Completion Timeout Disable */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment