Commit db89ccbe authored by Rajat Jain's avatar Rajat Jain Committed by Bjorn Helgaas

PCI/AER: Define aer_stats structure for AER capable devices

Define a structure to hold the AER statistics.  There are 2 groups of
statistics: dev_* counters that are to be collected for all AER capable
devices and rootport_* counters that are collected for all (AER capable)
rootports only.  Allocate and free this structure when device is added or
released (thus counters survive the lifetime of the device).
Signed-off-by: default avatarRajat Jain <rajatja@google.com>
Signed-off-by: default avatarBjorn Helgaas <bhelgaas@google.com>
parent 60ed982a
...@@ -483,9 +483,11 @@ static inline int devm_of_pci_get_host_bridge_resources(struct device *dev, ...@@ -483,9 +483,11 @@ static inline int devm_of_pci_get_host_bridge_resources(struct device *dev,
#ifdef CONFIG_PCIEAER #ifdef CONFIG_PCIEAER
void pci_no_aer(void); void pci_no_aer(void);
void pci_aer_init(struct pci_dev *dev); void pci_aer_init(struct pci_dev *dev);
void pci_aer_exit(struct pci_dev *dev);
#else #else
static inline void pci_no_aer(void) { } static inline void pci_no_aer(void) { }
static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; } static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; }
static inline void pci_aer_exit(struct pci_dev *d) { }
#endif #endif
#endif /* DRIVERS_PCI_H */ #endif /* DRIVERS_PCI_H */
...@@ -32,6 +32,9 @@ ...@@ -32,6 +32,9 @@
#define AER_ERROR_SOURCES_MAX 100 #define AER_ERROR_SOURCES_MAX 100
#define AER_MAX_TYPEOF_COR_ERRS 16 /* as per PCI_ERR_COR_STATUS */
#define AER_MAX_TYPEOF_UNCOR_ERRS 26 /* as per PCI_ERR_UNCOR_STATUS*/
struct aer_err_source { struct aer_err_source {
unsigned int status; unsigned int status;
unsigned int id; unsigned int id;
...@@ -56,6 +59,42 @@ struct aer_rpc { ...@@ -56,6 +59,42 @@ struct aer_rpc {
*/ */
}; };
/* AER stats for the device */
struct aer_stats {
/*
* Fields for all AER capable devices. They indicate the errors
* "as seen by this device". Note that this may mean that if an
* end point is causing problems, the AER counters may increment
* at its link partner (e.g. root port) because the errors will be
* "seen" by the link partner and not the the problematic end point
* itself (which may report all counters as 0 as it never saw any
* problems).
*/
/* Counters for different type of correctable errors */
u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS];
/* Counters for different type of fatal uncorrectable errors */
u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
/* Counters for different type of nonfatal uncorrectable errors */
u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
/* Total number of ERR_COR sent by this device */
u64 dev_total_cor_errs;
/* Total number of ERR_FATAL sent by this device */
u64 dev_total_fatal_errs;
/* Total number of ERR_NONFATAL sent by this device */
u64 dev_total_nonfatal_errs;
/*
* Fields for Root ports & root complex event collectors only, these
* indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL
* messages received by the root port / event collector, INCLUDING the
* ones that are generated internally (by the rootport itself)
*/
u64 rootport_total_cor_errs;
u64 rootport_total_fatal_errs;
u64 rootport_total_nonfatal_errs;
};
#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \
PCI_ERR_UNC_ECRC| \ PCI_ERR_UNC_ECRC| \
PCI_ERR_UNC_UNSUP| \ PCI_ERR_UNC_UNSUP| \
...@@ -385,9 +424,19 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) ...@@ -385,9 +424,19 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
void pci_aer_init(struct pci_dev *dev) void pci_aer_init(struct pci_dev *dev)
{ {
dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
if (dev->aer_cap)
dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL);
pci_cleanup_aer_error_status_regs(dev); pci_cleanup_aer_error_status_regs(dev);
} }
void pci_aer_exit(struct pci_dev *dev)
{
kfree(dev->aer_stats);
dev->aer_stats = NULL;
}
#define AER_AGENT_RECEIVER 0 #define AER_AGENT_RECEIVER 0
#define AER_AGENT_REQUESTER 1 #define AER_AGENT_REQUESTER 1
#define AER_AGENT_COMPLETER 2 #define AER_AGENT_COMPLETER 2
...@@ -438,7 +487,7 @@ static const char *aer_error_layer[] = { ...@@ -438,7 +487,7 @@ static const char *aer_error_layer[] = {
"Transaction Layer" "Transaction Layer"
}; };
static const char *aer_correctable_error_string[] = { static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = {
"RxErr", /* Bit Position 0 */ "RxErr", /* Bit Position 0 */
NULL, NULL,
NULL, NULL,
...@@ -457,7 +506,7 @@ static const char *aer_correctable_error_string[] = { ...@@ -457,7 +506,7 @@ static const char *aer_correctable_error_string[] = {
"HeaderOF", /* Bit Position 15 */ "HeaderOF", /* Bit Position 15 */
}; };
static const char *aer_uncorrectable_error_string[] = { static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = {
"Undefined", /* Bit Position 0 */ "Undefined", /* Bit Position 0 */
NULL, NULL,
NULL, NULL,
......
...@@ -2064,6 +2064,7 @@ static void pci_configure_device(struct pci_dev *dev) ...@@ -2064,6 +2064,7 @@ static void pci_configure_device(struct pci_dev *dev)
static void pci_release_capabilities(struct pci_dev *dev) static void pci_release_capabilities(struct pci_dev *dev)
{ {
pci_aer_exit(dev);
pci_vpd_release(dev); pci_vpd_release(dev);
pci_iov_release(dev); pci_iov_release(dev);
pci_free_cap_save_buffers(dev); pci_free_cap_save_buffers(dev);
......
...@@ -299,6 +299,7 @@ struct pci_dev { ...@@ -299,6 +299,7 @@ struct pci_dev {
u8 hdr_type; /* PCI header type (`multi' flag masked out) */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */
#ifdef CONFIG_PCIEAER #ifdef CONFIG_PCIEAER
u16 aer_cap; /* AER capability offset */ u16 aer_cap; /* AER capability offset */
struct aer_stats *aer_stats; /* AER stats for this device */
#endif #endif
u8 pcie_cap; /* PCIe capability offset */ u8 pcie_cap; /* PCIe capability offset */
u8 msi_cap; /* MSI capability offset */ u8 msi_cap; /* MSI capability offset */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment