Commit 32517fc0 authored by James Smart's avatar James Smart Committed by Martin K. Petersen

scsi: lpfc: Rework EQ/CQ processing to address interrupt coalescing

When driving high iop counts, auto_imax coalescing kicks in and drives the
performance to extremely small iops levels.

There are two issues:

 1) auto_imax is enabled by default. The auto algorithm, when iops gets
    high, divides the iops by the hdwq count and uses that value to
    calculate EQ_Delay. The EQ_Delay is set uniformly on all EQs whether
    they have load or not. The EQ_delay is only manipulated every 5s (a
    long time). Thus there were large 5s swings of no interrupt delay
    followed by large/maximum delay, before repeating.

 2) When processing a CQ, the driver got mixed up on the rate of when
    to ring the doorbell to keep the chip appraised of the eqe or cqe
    consumption as well as how how long to sit in the thread and
    process queue entries. Currently, the driver capped its work at
    64 entries (very small) and exited/rearmed the CQ.  Thus, on heavy
    loads, additional overheads were taken to exit and re-enter the
    interrupt handler. Worse, if in the large/maximum coalescing
    windows,k it could be a while before getting back to servicing.

The issues are corrected by the following:

 - A change in defaults. Auto_imax is turned OFF and fcp_imax is set
   to 0. Thus all interrupts are immediate.

 - Cleanup of field names and their meanings. Existing names were
   non-intuitive or used for duplicate things.

 - Added max_proc_limit field, to control the length of time the
   handlers would service completions.

 - Reworked EQ handling:
    Added common routine that walks eq, applying notify interval and max
      processing limits. Use queue_claimed to claim ownership of the queue
      while processing. Always rearm the queue whenever the common routine
      is called.
    Rework queue element processing, namely to eliminate hba_index vs
      host_index. Only one index is necessary. The queue entry can be
      marked invalid and the host_index updated immediately after eqe
      processing.
    After rework, xx_release routines are now DB write functions. Renamed
      the routines as such.
    Moved lpfc_sli4_eq_flush(), which does similar action, to same area.
    Replaced the 2 individual loops that walk an eq with a call to the
      common routine.
    Slightly revised lpfc_sli4_hba_handle_eqe() calling syntax.
    Added per-cpu counters to detect interrupt rates and scale
      interrupt coalescing values.

 - Reworked CQ handling:
    Added common routine that walks cq, applying notify interval and max
      processing limits. Use queue_claimed to claim ownership of the queue
      while processing. Always rearm the queue whenever the common routine
      is called.
    Rework queue element processing, namely to eliminate hba_index vs
      host_index. Only one index is necessary. The queue entry can be
      marked invalid and the host_index updated immediately after cqe
      processing.
    After rework, xx_release routines are now DB write functions.  Renamed
      the routines as such.
    Replaced the 3 individual loops that walk a cq with a call to the
      common routine.
    Redefined lpfc_sli4_sp_handle_mcqe() to commong handler definition with
      queue reference. Add increment for mbox completion to handler.

 - Added a new module/sysfs attribute: lpfc_cq_max_proc_limit To allow
   dynamic changing of the CQ max_proc_limit value being used.

Although this leaves an EQ as an immediate interrupt, that interrupt will
only occur if a CQ bound to it is in an armed state and has cqe's to
process.  By staying in the cq processing routine longer, high loads will
avoid generating more interrupts as they will only rearm as the processing
thread exits. The immediately interrupt is also beneficial to idle or
lower-processing CQ's as they get serviced immediately without being
penalized by sharing an EQ with a more loaded CQ.
Signed-off-by: default avatarDick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: default avatarJames Smart <jsmart2021@gmail.com>
Reviewed-by: default avatarHannes Reinecke <hare@suse.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent cb733e35
...@@ -686,6 +686,7 @@ struct lpfc_hba { ...@@ -686,6 +686,7 @@ struct lpfc_hba {
struct lpfc_sli4_hba sli4_hba; struct lpfc_sli4_hba sli4_hba;
struct workqueue_struct *wq; struct workqueue_struct *wq;
struct delayed_work eq_delay_work;
struct lpfc_sli sli; struct lpfc_sli sli;
uint8_t pci_dev_grp; /* lpfc PCI dev group: 0x0, 0x1, 0x2,... */ uint8_t pci_dev_grp; /* lpfc PCI dev group: 0x0, 0x1, 0x2,... */
...@@ -789,7 +790,6 @@ struct lpfc_hba { ...@@ -789,7 +790,6 @@ struct lpfc_hba {
uint8_t nvmet_support; /* driver supports NVMET */ uint8_t nvmet_support; /* driver supports NVMET */
#define LPFC_NVMET_MAX_PORTS 32 #define LPFC_NVMET_MAX_PORTS 32
uint8_t mds_diags_support; uint8_t mds_diags_support;
uint32_t initial_imax;
uint8_t bbcredit_support; uint8_t bbcredit_support;
uint8_t enab_exp_wqcq_pages; uint8_t enab_exp_wqcq_pages;
...@@ -817,6 +817,8 @@ struct lpfc_hba { ...@@ -817,6 +817,8 @@ struct lpfc_hba {
uint32_t cfg_use_msi; uint32_t cfg_use_msi;
uint32_t cfg_auto_imax; uint32_t cfg_auto_imax;
uint32_t cfg_fcp_imax; uint32_t cfg_fcp_imax;
uint32_t cfg_cq_poll_threshold;
uint32_t cfg_cq_max_proc_limit;
uint32_t cfg_fcp_cpu_map; uint32_t cfg_fcp_cpu_map;
uint32_t cfg_hdw_queue; uint32_t cfg_hdw_queue;
uint32_t cfg_irq_chann; uint32_t cfg_irq_chann;
...@@ -1084,7 +1086,6 @@ struct lpfc_hba { ...@@ -1084,7 +1086,6 @@ struct lpfc_hba {
uint8_t temp_sensor_support; uint8_t temp_sensor_support;
/* Fields used for heart beat. */ /* Fields used for heart beat. */
unsigned long last_eqdelay_time;
unsigned long last_completion_time; unsigned long last_completion_time;
unsigned long skipped_hb; unsigned long skipped_hb;
struct timer_list hb_tmofunc; struct timer_list hb_tmofunc;
...@@ -1287,3 +1288,23 @@ lpfc_phba_elsring(struct lpfc_hba *phba) ...@@ -1287,3 +1288,23 @@ lpfc_phba_elsring(struct lpfc_hba *phba)
} }
return &phba->sli.sli3_ring[LPFC_ELS_RING]; return &phba->sli.sli3_ring[LPFC_ELS_RING];
} }
/**
* lpfc_sli4_mod_hba_eq_delay - update EQ delay
* @phba: Pointer to HBA context object.
* @q: The Event Queue to update.
* @delay: The delay value (in us) to be written.
*
**/
static inline void
lpfc_sli4_mod_hba_eq_delay(struct lpfc_hba *phba, struct lpfc_queue *eq,
u32 delay)
{
struct lpfc_register reg_data;
reg_data.word0 = 0;
bf_set(lpfc_sliport_eqdelay_id, &reg_data, eq->queue_id);
bf_set(lpfc_sliport_eqdelay_delay, &reg_data, delay);
writel(reg_data.word0, phba->sli4_hba.u.if_type2.EQDregaddr);
eq->q_mode = delay;
}
...@@ -4935,6 +4935,7 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr, ...@@ -4935,6 +4935,7 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
struct Scsi_Host *shost = class_to_shost(dev); struct Scsi_Host *shost = class_to_shost(dev);
struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata; struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata;
struct lpfc_hba *phba = vport->phba; struct lpfc_hba *phba = vport->phba;
struct lpfc_eq_intr_info *eqi;
uint32_t usdelay; uint32_t usdelay;
int val = 0, i; int val = 0, i;
...@@ -4956,8 +4957,18 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr, ...@@ -4956,8 +4957,18 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
if (val && (val < LPFC_MIN_IMAX || val > LPFC_MAX_IMAX)) if (val && (val < LPFC_MIN_IMAX || val > LPFC_MAX_IMAX))
return -EINVAL; return -EINVAL;
phba->cfg_auto_imax = (val) ? 0 : 1;
if (phba->cfg_fcp_imax && !val) {
queue_delayed_work(phba->wq, &phba->eq_delay_work,
msecs_to_jiffies(LPFC_EQ_DELAY_MSECS));
for_each_present_cpu(i) {
eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i);
eqi->icnt = 0;
}
}
phba->cfg_fcp_imax = (uint32_t)val; phba->cfg_fcp_imax = (uint32_t)val;
phba->initial_imax = phba->cfg_fcp_imax;
if (phba->cfg_fcp_imax) if (phba->cfg_fcp_imax)
usdelay = LPFC_SEC_TO_USEC / phba->cfg_fcp_imax; usdelay = LPFC_SEC_TO_USEC / phba->cfg_fcp_imax;
...@@ -5020,15 +5031,119 @@ lpfc_fcp_imax_init(struct lpfc_hba *phba, int val) ...@@ -5020,15 +5031,119 @@ lpfc_fcp_imax_init(struct lpfc_hba *phba, int val)
static DEVICE_ATTR_RW(lpfc_fcp_imax); static DEVICE_ATTR_RW(lpfc_fcp_imax);
/**
* lpfc_cq_max_proc_limit_store
*
* @dev: class device that is converted into a Scsi_host.
* @attr: device attribute, not used.
* @buf: string with the cq max processing limit of cqes
* @count: unused variable.
*
* Description:
* If val is in a valid range, then set value on each cq
*
* Returns:
* The length of the buf: if successful
* -ERANGE: if val is not in the valid range
* -EINVAL: if bad value format or intended mode is not supported.
**/
static ssize_t
lpfc_cq_max_proc_limit_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct Scsi_Host *shost = class_to_shost(dev);
struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata;
struct lpfc_hba *phba = vport->phba;
struct lpfc_queue *eq, *cq;
unsigned long val;
int i;
/* cq_max_proc_limit is only valid for SLI4 */
if (phba->sli_rev != LPFC_SLI_REV4)
return -EINVAL;
/* Sanity check on user data */
if (!isdigit(buf[0]))
return -EINVAL;
if (kstrtoul(buf, 0, &val))
return -EINVAL;
if (val < LPFC_CQ_MIN_PROC_LIMIT || val > LPFC_CQ_MAX_PROC_LIMIT)
return -ERANGE;
phba->cfg_cq_max_proc_limit = (uint32_t)val;
/* set the values on the cq's */
for (i = 0; i < phba->cfg_irq_chann; i++) {
eq = phba->sli4_hba.hdwq[i].hba_eq;
if (!eq)
continue;
list_for_each_entry(cq, &eq->child_list, list)
cq->max_proc_limit = min(phba->cfg_cq_max_proc_limit,
cq->entry_count);
}
return strlen(buf);
}
/* /*
* lpfc_auto_imax: Controls Auto-interrupt coalescing values support. * lpfc_cq_max_proc_limit: The maximum number CQE entries processed in an
* 0 No auto_imax support * itteration of CQ processing.
* 1 auto imax on
* Auto imax will change the value of fcp_imax on a per EQ basis, using
* the EQ Delay Multiplier, depending on the activity for that EQ.
* Value range [0,1]. Default value is 1.
*/ */
LPFC_ATTR_RW(auto_imax, 1, 0, 1, "Enable Auto imax"); static int lpfc_cq_max_proc_limit = LPFC_CQ_DEF_MAX_PROC_LIMIT;
module_param(lpfc_cq_max_proc_limit, int, 0644);
MODULE_PARM_DESC(lpfc_cq_max_proc_limit,
"Set the maximum number CQEs processed in an iteration of "
"CQ processing");
lpfc_param_show(cq_max_proc_limit)
/*
* lpfc_cq_poll_threshold: Set the threshold of CQE completions in a
* single handler call which should request a polled completion rather
* than re-enabling interrupts.
*/
LPFC_ATTR_RW(cq_poll_threshold, LPFC_CQ_DEF_THRESHOLD_TO_POLL,
LPFC_CQ_MIN_THRESHOLD_TO_POLL,
LPFC_CQ_MAX_THRESHOLD_TO_POLL,
"CQE Processing Threshold to enable Polling");
/**
* lpfc_cq_max_proc_limit_init - Set the initial cq max_proc_limit
* @phba: lpfc_hba pointer.
* @val: entry limit
*
* Description:
* If val is in a valid range, then initialize the adapter's maximum
* value.
*
* Returns:
* Always returns 0 for success, even if value not always set to
* requested value. If value out of range or not supported, will fall
* back to default.
**/
static int
lpfc_cq_max_proc_limit_init(struct lpfc_hba *phba, int val)
{
phba->cfg_cq_max_proc_limit = LPFC_CQ_DEF_MAX_PROC_LIMIT;
if (phba->sli_rev != LPFC_SLI_REV4)
return 0;
if (val >= LPFC_CQ_MIN_PROC_LIMIT && val <= LPFC_CQ_MAX_PROC_LIMIT) {
phba->cfg_cq_max_proc_limit = val;
return 0;
}
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"0371 "LPFC_DRIVER_NAME"_cq_max_proc_limit: "
"%d out of range, using default\n",
phba->cfg_cq_max_proc_limit);
return 0;
}
static DEVICE_ATTR_RW(lpfc_cq_max_proc_limit);
/** /**
* lpfc_state_show - Display current driver CPU affinity * lpfc_state_show - Display current driver CPU affinity
...@@ -5788,8 +5903,9 @@ struct device_attribute *lpfc_hba_attrs[] = { ...@@ -5788,8 +5903,9 @@ struct device_attribute *lpfc_hba_attrs[] = {
&dev_attr_lpfc_use_msi, &dev_attr_lpfc_use_msi,
&dev_attr_lpfc_nvme_oas, &dev_attr_lpfc_nvme_oas,
&dev_attr_lpfc_nvme_embed_cmd, &dev_attr_lpfc_nvme_embed_cmd,
&dev_attr_lpfc_auto_imax,
&dev_attr_lpfc_fcp_imax, &dev_attr_lpfc_fcp_imax,
&dev_attr_lpfc_cq_poll_threshold,
&dev_attr_lpfc_cq_max_proc_limit,
&dev_attr_lpfc_fcp_cpu_map, &dev_attr_lpfc_fcp_cpu_map,
&dev_attr_lpfc_hdw_queue, &dev_attr_lpfc_hdw_queue,
&dev_attr_lpfc_irq_chann, &dev_attr_lpfc_irq_chann,
...@@ -6834,8 +6950,9 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) ...@@ -6834,8 +6950,9 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
lpfc_use_msi_init(phba, lpfc_use_msi); lpfc_use_msi_init(phba, lpfc_use_msi);
lpfc_nvme_oas_init(phba, lpfc_nvme_oas); lpfc_nvme_oas_init(phba, lpfc_nvme_oas);
lpfc_nvme_embed_cmd_init(phba, lpfc_nvme_embed_cmd); lpfc_nvme_embed_cmd_init(phba, lpfc_nvme_embed_cmd);
lpfc_auto_imax_init(phba, lpfc_auto_imax);
lpfc_fcp_imax_init(phba, lpfc_fcp_imax); lpfc_fcp_imax_init(phba, lpfc_fcp_imax);
lpfc_cq_poll_threshold_init(phba, lpfc_cq_poll_threshold);
lpfc_cq_max_proc_limit_init(phba, lpfc_cq_max_proc_limit);
lpfc_fcp_cpu_map_init(phba, lpfc_fcp_cpu_map); lpfc_fcp_cpu_map_init(phba, lpfc_fcp_cpu_map);
lpfc_enable_hba_reset_init(phba, lpfc_enable_hba_reset); lpfc_enable_hba_reset_init(phba, lpfc_enable_hba_reset);
lpfc_enable_hba_heartbeat_init(phba, lpfc_enable_hba_heartbeat); lpfc_enable_hba_heartbeat_init(phba, lpfc_enable_hba_heartbeat);
...@@ -6888,9 +7005,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) ...@@ -6888,9 +7005,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
phba->cfg_enable_fc4_type |= LPFC_ENABLE_FCP; phba->cfg_enable_fc4_type |= LPFC_ENABLE_FCP;
} }
if (phba->cfg_auto_imax && !phba->cfg_fcp_imax) phba->cfg_auto_imax = (phba->cfg_fcp_imax) ? 0 : 1;
phba->cfg_auto_imax = 0;
phba->initial_imax = phba->cfg_fcp_imax;
phba->cfg_enable_pbde = 0; phba->cfg_enable_pbde = 0;
......
...@@ -3766,10 +3766,10 @@ __lpfc_idiag_print_wq(struct lpfc_queue *qp, char *wqtype, ...@@ -3766,10 +3766,10 @@ __lpfc_idiag_print_wq(struct lpfc_queue *qp, char *wqtype,
(unsigned long long)qp->q_cnt_4); (unsigned long long)qp->q_cnt_4);
len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
"\t\tWQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " "\t\tWQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]", "HST-IDX[%04d], PRT-IDX[%04d], NTFI[%03d]",
qp->queue_id, qp->entry_count, qp->queue_id, qp->entry_count,
qp->entry_size, qp->host_index, qp->entry_size, qp->host_index,
qp->hba_index, qp->entry_repost); qp->hba_index, qp->notify_interval);
len += snprintf(pbuffer + len, len += snprintf(pbuffer + len,
LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
return len; return len;
...@@ -3819,10 +3819,10 @@ __lpfc_idiag_print_cq(struct lpfc_queue *qp, char *cqtype, ...@@ -3819,10 +3819,10 @@ __lpfc_idiag_print_cq(struct lpfc_queue *qp, char *cqtype,
qp->q_cnt_3, (unsigned long long)qp->q_cnt_4); qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
"\tCQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " "\tCQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]", "HST-IDX[%04d], NTFI[%03d], PLMT[%03d]",
qp->queue_id, qp->entry_count, qp->queue_id, qp->entry_count,
qp->entry_size, qp->host_index, qp->entry_size, qp->host_index,
qp->hba_index, qp->entry_repost); qp->notify_interval, qp->max_proc_limit);
len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
...@@ -3845,15 +3845,15 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp, ...@@ -3845,15 +3845,15 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp,
qp->q_cnt_3, (unsigned long long)qp->q_cnt_4); qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
"\t\tHQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " "\t\tHQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n", "HST-IDX[%04d], PRT-IDX[%04d], NTFI[%03d]\n",
qp->queue_id, qp->entry_count, qp->entry_size, qp->queue_id, qp->entry_count, qp->entry_size,
qp->host_index, qp->hba_index, qp->entry_repost); qp->host_index, qp->hba_index, qp->notify_interval);
len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
"\t\tDQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " "\t\tDQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n", "HST-IDX[%04d], PRT-IDX[%04d], NTFI[%03d]\n",
datqp->queue_id, datqp->entry_count, datqp->queue_id, datqp->entry_count,
datqp->entry_size, datqp->host_index, datqp->entry_size, datqp->host_index,
datqp->hba_index, datqp->entry_repost); datqp->hba_index, datqp->notify_interval);
return len; return len;
} }
...@@ -3934,10 +3934,10 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype, ...@@ -3934,10 +3934,10 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype,
(unsigned long long)qp->q_cnt_4, qp->q_mode); (unsigned long long)qp->q_cnt_4, qp->q_mode);
len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
"EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " "EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d] AFFIN[%03d]", "HST-IDX[%04d], NTFI[%03d], PLMT[%03d], AFFIN[%03d]",
qp->queue_id, qp->entry_count, qp->entry_size, qp->queue_id, qp->entry_count, qp->entry_size,
qp->host_index, qp->hba_index, qp->entry_repost, qp->host_index, qp->notify_interval,
qp->chann); qp->max_proc_limit, qp->chann);
len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
return len; return len;
......
...@@ -208,7 +208,14 @@ struct lpfc_sli_intf { ...@@ -208,7 +208,14 @@ struct lpfc_sli_intf {
/* Configuration of Interrupts / sec for entire HBA port */ /* Configuration of Interrupts / sec for entire HBA port */
#define LPFC_MIN_IMAX 5000 #define LPFC_MIN_IMAX 5000
#define LPFC_MAX_IMAX 5000000 #define LPFC_MAX_IMAX 5000000
#define LPFC_DEF_IMAX 150000 #define LPFC_DEF_IMAX 0
#define LPFC_IMAX_THRESHOLD 1000
#define LPFC_MAX_AUTO_EQ_DELAY 120
#define LPFC_EQ_DELAY_STEP 15
#define LPFC_EQD_ISR_TRIGGER 20000
/* 1s intervals */
#define LPFC_EQ_DELAY_MSECS 1000
#define LPFC_MIN_CPU_MAP 0 #define LPFC_MIN_CPU_MAP 0
#define LPFC_MAX_CPU_MAP 1 #define LPFC_MAX_CPU_MAP 1
......
...@@ -1251,6 +1251,69 @@ lpfc_hb_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq) ...@@ -1251,6 +1251,69 @@ lpfc_hb_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq)
return; return;
} }
static void
lpfc_hb_eq_delay_work(struct work_struct *work)
{
struct lpfc_hba *phba = container_of(to_delayed_work(work),
struct lpfc_hba, eq_delay_work);
struct lpfc_eq_intr_info *eqi, *eqi_new;
struct lpfc_queue *eq, *eq_next;
unsigned char *eqcnt = NULL;
uint32_t usdelay;
int i;
if (!phba->cfg_auto_imax || phba->pport->load_flag & FC_UNLOADING)
return;
if (phba->link_state == LPFC_HBA_ERROR ||
phba->pport->fc_flag & FC_OFFLINE_MODE)
goto requeue;
eqcnt = kcalloc(num_possible_cpus(), sizeof(unsigned char),
GFP_KERNEL);
if (!eqcnt)
goto requeue;
for (i = 0; i < phba->cfg_irq_chann; i++) {
eq = phba->sli4_hba.hdwq[i].hba_eq;
if (eq && eqcnt[eq->last_cpu] < 2)
eqcnt[eq->last_cpu]++;
continue;
}
for_each_present_cpu(i) {
if (phba->cfg_irq_chann > 1 && eqcnt[i] < 2)
continue;
eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i);
usdelay = (eqi->icnt / LPFC_IMAX_THRESHOLD) *
LPFC_EQ_DELAY_STEP;
if (usdelay > LPFC_MAX_AUTO_EQ_DELAY)
usdelay = LPFC_MAX_AUTO_EQ_DELAY;
eqi->icnt = 0;
list_for_each_entry_safe(eq, eq_next, &eqi->list, cpu_list) {
if (eq->last_cpu != i) {
eqi_new = per_cpu_ptr(phba->sli4_hba.eq_info,
eq->last_cpu);
list_move_tail(&eq->cpu_list, &eqi_new->list);
continue;
}
if (usdelay != eq->q_mode)
lpfc_modify_hba_eq_delay(phba, eq->hdwq, 1,
usdelay);
}
}
kfree(eqcnt);
requeue:
queue_delayed_work(phba->wq, &phba->eq_delay_work,
msecs_to_jiffies(LPFC_EQ_DELAY_MSECS));
}
/** /**
* lpfc_hb_mxp_handler - Multi-XRI pools handler to adjust XRI distribution * lpfc_hb_mxp_handler - Multi-XRI pools handler to adjust XRI distribution
* @phba: pointer to lpfc hba data structure. * @phba: pointer to lpfc hba data structure.
...@@ -1303,16 +1366,6 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba) ...@@ -1303,16 +1366,6 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
int retval, i; int retval, i;
struct lpfc_sli *psli = &phba->sli; struct lpfc_sli *psli = &phba->sli;
LIST_HEAD(completions); LIST_HEAD(completions);
struct lpfc_queue *qp;
unsigned long time_elapsed;
uint32_t tick_cqe, max_cqe, val;
uint64_t tot, data1, data2, data3;
struct lpfc_nvmet_tgtport *tgtp;
struct lpfc_register reg_data;
struct nvme_fc_local_port *localport;
struct lpfc_nvme_lport *lport;
struct lpfc_fc4_ctrl_stat *cstat;
void __iomem *eqdreg = phba->sli4_hba.u.if_type2.EQDregaddr;
if (phba->cfg_xri_rebalancing) { if (phba->cfg_xri_rebalancing) {
/* Multi-XRI pools handler */ /* Multi-XRI pools handler */
...@@ -1332,104 +1385,6 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba) ...@@ -1332,104 +1385,6 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
(phba->pport->fc_flag & FC_OFFLINE_MODE)) (phba->pport->fc_flag & FC_OFFLINE_MODE))
return; return;
if (phba->cfg_auto_imax) {
if (!phba->last_eqdelay_time) {
phba->last_eqdelay_time = jiffies;
goto skip_eqdelay;
}
time_elapsed = jiffies - phba->last_eqdelay_time;
phba->last_eqdelay_time = jiffies;
tot = 0xffff;
/* Check outstanding IO count */
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
if (phba->nvmet_support) {
tgtp = phba->targetport->private;
/* Calculate outstanding IOs */
tot = atomic_read(&tgtp->rcv_fcp_cmd_drop);
tot += atomic_read(&tgtp->xmt_fcp_release);
tot = atomic_read(&tgtp->rcv_fcp_cmd_in) - tot;
} else {
localport = phba->pport->localport;
if (!localport || !localport->private)
goto skip_eqdelay;
lport = (struct lpfc_nvme_lport *)
localport->private;
tot = 0;
for (i = 0;
i < phba->cfg_hdw_queue; i++) {
cstat =
&phba->sli4_hba.hdwq[i].nvme_cstat;
data1 = cstat->input_requests;
data2 = cstat->output_requests;
data3 = cstat->control_requests;
tot += (data1 + data2 + data3);
tot -= cstat->io_cmpls;
}
}
}
/* Interrupts per sec per EQ */
val = phba->cfg_fcp_imax / phba->cfg_irq_chann;
tick_cqe = val / CONFIG_HZ; /* Per tick per EQ */
/* Assume 1 CQE/ISR, calc max CQEs allowed for time duration */
max_cqe = time_elapsed * tick_cqe;
for (i = 0; i < phba->cfg_irq_chann; i++) {
/* Fast-path EQ */
qp = phba->sli4_hba.hdwq[i].hba_eq;
if (!qp)
continue;
/* Use no EQ delay if we don't have many outstanding
* IOs, or if we are only processing 1 CQE/ISR or less.
* Otherwise, assume we can process up to lpfc_fcp_imax
* interrupts per HBA.
*/
if (tot < LPFC_NODELAY_MAX_IO ||
qp->EQ_cqe_cnt <= max_cqe)
val = 0;
else
val = phba->cfg_fcp_imax;
if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) {
/* Use EQ Delay Register method */
/* Convert for EQ Delay register */
if (val) {
/* First, interrupts per sec per EQ */
val = phba->cfg_fcp_imax /
phba->cfg_irq_chann;
/* us delay between each interrupt */
val = LPFC_SEC_TO_USEC / val;
}
if (val != qp->q_mode) {
reg_data.word0 = 0;
bf_set(lpfc_sliport_eqdelay_id,
&reg_data, qp->queue_id);
bf_set(lpfc_sliport_eqdelay_delay,
&reg_data, val);
writel(reg_data.word0, eqdreg);
}
} else {
/* Use mbox command method */
if (val != qp->q_mode)
lpfc_modify_hba_eq_delay(phba, i,
1, val);
}
/*
* val is cfg_fcp_imax or 0 for mbox delay or us delay
* between interrupts for EQDR.
*/
qp->q_mode = val;
qp->EQ_cqe_cnt = 0;
}
}
skip_eqdelay:
spin_lock_irq(&phba->pport->work_port_lock); spin_lock_irq(&phba->pport->work_port_lock);
if (time_after(phba->last_completion_time + if (time_after(phba->last_completion_time +
...@@ -2986,6 +2941,7 @@ lpfc_stop_hba_timers(struct lpfc_hba *phba) ...@@ -2986,6 +2941,7 @@ lpfc_stop_hba_timers(struct lpfc_hba *phba)
{ {
if (phba->pport) if (phba->pport)
lpfc_stop_vport_timers(phba->pport); lpfc_stop_vport_timers(phba->pport);
cancel_delayed_work_sync(&phba->eq_delay_work);
del_timer_sync(&phba->sli.mbox_tmo); del_timer_sync(&phba->sli.mbox_tmo);
del_timer_sync(&phba->fabric_block_timer); del_timer_sync(&phba->fabric_block_timer);
del_timer_sync(&phba->eratt_poll); del_timer_sync(&phba->eratt_poll);
...@@ -6234,6 +6190,8 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba) ...@@ -6234,6 +6190,8 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
/* Heartbeat timer */ /* Heartbeat timer */
timer_setup(&phba->hb_tmofunc, lpfc_hb_timeout, 0); timer_setup(&phba->hb_tmofunc, lpfc_hb_timeout, 0);
INIT_DELAYED_WORK(&phba->eq_delay_work, lpfc_hb_eq_delay_work);
return 0; return 0;
} }
...@@ -6849,6 +6807,13 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) ...@@ -6849,6 +6807,13 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
goto out_free_hba_eq_hdl; goto out_free_hba_eq_hdl;
} }
phba->sli4_hba.eq_info = alloc_percpu(struct lpfc_eq_intr_info);
if (!phba->sli4_hba.eq_info) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"3321 Failed allocation for per_cpu stats\n");
rc = -ENOMEM;
goto out_free_hba_cpu_map;
}
/* /*
* Enable sr-iov virtual functions if supported and configured * Enable sr-iov virtual functions if supported and configured
* through the module parameter. * through the module parameter.
...@@ -6868,6 +6833,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) ...@@ -6868,6 +6833,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
return 0; return 0;
out_free_hba_cpu_map:
kfree(phba->sli4_hba.cpu_map);
out_free_hba_eq_hdl: out_free_hba_eq_hdl:
kfree(phba->sli4_hba.hba_eq_hdl); kfree(phba->sli4_hba.hba_eq_hdl);
out_free_fcf_rr_bmask: out_free_fcf_rr_bmask:
...@@ -6897,6 +6864,8 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba) ...@@ -6897,6 +6864,8 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba)
{ {
struct lpfc_fcf_conn_entry *conn_entry, *next_conn_entry; struct lpfc_fcf_conn_entry *conn_entry, *next_conn_entry;
free_percpu(phba->sli4_hba.eq_info);
/* Free memory allocated for msi-x interrupt vector to CPU mapping */ /* Free memory allocated for msi-x interrupt vector to CPU mapping */
kfree(phba->sli4_hba.cpu_map); kfree(phba->sli4_hba.cpu_map);
phba->sli4_hba.num_present_cpu = 0; phba->sli4_hba.num_present_cpu = 0;
...@@ -8753,6 +8722,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) ...@@ -8753,6 +8722,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
struct lpfc_queue *qdesc; struct lpfc_queue *qdesc;
int idx, eqidx; int idx, eqidx;
struct lpfc_sli4_hdw_queue *qp; struct lpfc_sli4_hdw_queue *qp;
struct lpfc_eq_intr_info *eqi;
/* /*
* Create HBA Record arrays. * Create HBA Record arrays.
...@@ -8865,6 +8835,9 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) ...@@ -8865,6 +8835,9 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
qdesc->chann = lpfc_find_cpu_handle(phba, eqidx, qdesc->chann = lpfc_find_cpu_handle(phba, eqidx,
LPFC_FIND_BY_EQ); LPFC_FIND_BY_EQ);
phba->sli4_hba.hdwq[idx].hba_eq = qdesc; phba->sli4_hba.hdwq[idx].hba_eq = qdesc;
qdesc->last_cpu = qdesc->chann;
eqi = per_cpu_ptr(phba->sli4_hba.eq_info, qdesc->last_cpu);
list_add(&qdesc->cpu_list, &eqi->list);
} }
...@@ -10246,13 +10219,13 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba) ...@@ -10246,13 +10219,13 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
case LPFC_SLI_INTF_IF_TYPE_0: case LPFC_SLI_INTF_IF_TYPE_0:
case LPFC_SLI_INTF_IF_TYPE_2: case LPFC_SLI_INTF_IF_TYPE_2:
phba->sli4_hba.sli4_eq_clr_intr = lpfc_sli4_eq_clr_intr; phba->sli4_hba.sli4_eq_clr_intr = lpfc_sli4_eq_clr_intr;
phba->sli4_hba.sli4_eq_release = lpfc_sli4_eq_release; phba->sli4_hba.sli4_write_eq_db = lpfc_sli4_write_eq_db;
phba->sli4_hba.sli4_cq_release = lpfc_sli4_cq_release; phba->sli4_hba.sli4_write_cq_db = lpfc_sli4_write_cq_db;
break; break;
case LPFC_SLI_INTF_IF_TYPE_6: case LPFC_SLI_INTF_IF_TYPE_6:
phba->sli4_hba.sli4_eq_clr_intr = lpfc_sli4_if6_eq_clr_intr; phba->sli4_hba.sli4_eq_clr_intr = lpfc_sli4_if6_eq_clr_intr;
phba->sli4_hba.sli4_eq_release = lpfc_sli4_if6_eq_release; phba->sli4_hba.sli4_write_eq_db = lpfc_sli4_if6_write_eq_db;
phba->sli4_hba.sli4_cq_release = lpfc_sli4_if6_cq_release; phba->sli4_hba.sli4_write_cq_db = lpfc_sli4_if6_write_cq_db;
break; break;
default: default:
break; break;
...@@ -10773,6 +10746,14 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) ...@@ -10773,6 +10746,14 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
cpup++; cpup++;
} }
for_each_possible_cpu(i) {
struct lpfc_eq_intr_info *eqi =
per_cpu_ptr(phba->sli4_hba.eq_info, i);
INIT_LIST_HEAD(&eqi->list);
eqi->icnt = 0;
}
/* /*
* If the number of IRQ vectors == number of CPUs, * If the number of IRQ vectors == number of CPUs,
* mapping is pretty simple: 1 to 1. * mapping is pretty simple: 1 to 1.
......
This diff is collapsed.
...@@ -154,14 +154,41 @@ struct lpfc_queue { ...@@ -154,14 +154,41 @@ struct lpfc_queue {
struct list_head child_list; struct list_head child_list;
struct list_head page_list; struct list_head page_list;
struct list_head sgl_list; struct list_head sgl_list;
struct list_head cpu_list;
uint32_t entry_count; /* Number of entries to support on the queue */ uint32_t entry_count; /* Number of entries to support on the queue */
uint32_t entry_size; /* Size of each queue entry. */ uint32_t entry_size; /* Size of each queue entry. */
uint32_t entry_repost; /* Count of entries before doorbell is rung */ uint32_t notify_interval; /* Queue Notification Interval
#define LPFC_EQ_REPOST 8 * For chip->host queues (EQ, CQ, RQ):
#define LPFC_MQ_REPOST 8 * specifies the interval (number of
#define LPFC_CQ_REPOST 64 * entries) where the doorbell is rung to
#define LPFC_RQ_REPOST 64 * notify the chip of entry consumption.
#define LPFC_RELEASE_NOTIFICATION_INTERVAL 32 /* For WQs */ * For host->chip queues (WQ):
* specifies the interval (number of
* entries) where consumption CQE is
* requested to indicate WQ entries
* consumed by the chip.
* Not used on an MQ.
*/
#define LPFC_EQ_NOTIFY_INTRVL 16
#define LPFC_CQ_NOTIFY_INTRVL 16
#define LPFC_WQ_NOTIFY_INTRVL 16
#define LPFC_RQ_NOTIFY_INTRVL 16
uint32_t max_proc_limit; /* Queue Processing Limit
* For chip->host queues (EQ, CQ):
* specifies the maximum number of
* entries to be consumed in one
* processing iteration sequence. Queue
* will be rearmed after each iteration.
* Not used on an MQ, RQ or WQ.
*/
#define LPFC_EQ_MAX_PROC_LIMIT 256
#define LPFC_CQ_MIN_PROC_LIMIT 64
#define LPFC_CQ_MAX_PROC_LIMIT LPFC_CQE_EXP_COUNT // 4096
#define LPFC_CQ_DEF_MAX_PROC_LIMIT LPFC_CQE_DEF_COUNT // 1024
#define LPFC_CQ_MIN_THRESHOLD_TO_POLL 64
#define LPFC_CQ_MAX_THRESHOLD_TO_POLL LPFC_CQ_DEF_MAX_PROC_LIMIT
#define LPFC_CQ_DEF_THRESHOLD_TO_POLL LPFC_CQ_DEF_MAX_PROC_LIMIT
uint32_t queue_claimed; /* indicates queue is being processed */
uint32_t queue_id; /* Queue ID assigned by the hardware */ uint32_t queue_id; /* Queue ID assigned by the hardware */
uint32_t assoc_qid; /* Queue ID associated with, for CQ/WQ/MQ */ uint32_t assoc_qid; /* Queue ID associated with, for CQ/WQ/MQ */
uint32_t host_index; /* The host's index for putting or getting */ uint32_t host_index; /* The host's index for putting or getting */
...@@ -219,9 +246,12 @@ struct lpfc_queue { ...@@ -219,9 +246,12 @@ struct lpfc_queue {
struct work_struct irqwork; struct work_struct irqwork;
struct work_struct spwork; struct work_struct spwork;
struct delayed_work sched_irqwork;
struct delayed_work sched_spwork;
uint64_t isr_timestamp; uint64_t isr_timestamp;
uint16_t hdwq; uint16_t hdwq;
uint16_t last_cpu; /* most recent cpu */
uint8_t qe_valid; uint8_t qe_valid;
struct lpfc_queue *assoc_qp; struct lpfc_queue *assoc_qp;
union sli4_qe qe[1]; /* array to index entries (must be last) */ union sli4_qe qe[1]; /* array to index entries (must be last) */
...@@ -608,6 +638,11 @@ struct lpfc_lock_stat { ...@@ -608,6 +638,11 @@ struct lpfc_lock_stat {
}; };
#endif #endif
struct lpfc_eq_intr_info {
struct list_head list;
uint32_t icnt;
};
/* SLI4 HBA data structure entries */ /* SLI4 HBA data structure entries */
struct lpfc_sli4_hdw_queue { struct lpfc_sli4_hdw_queue {
/* Pointers to the constructed SLI4 queues */ /* Pointers to the constructed SLI4 queues */
...@@ -749,8 +784,10 @@ struct lpfc_sli4_hba { ...@@ -749,8 +784,10 @@ struct lpfc_sli4_hba {
struct lpfc_hba_eq_hdl *hba_eq_hdl; /* HBA per-WQ handle */ struct lpfc_hba_eq_hdl *hba_eq_hdl; /* HBA per-WQ handle */
void (*sli4_eq_clr_intr)(struct lpfc_queue *q); void (*sli4_eq_clr_intr)(struct lpfc_queue *q);
uint32_t (*sli4_eq_release)(struct lpfc_queue *q, bool arm); void (*sli4_write_eq_db)(struct lpfc_hba *phba, struct lpfc_queue *eq,
uint32_t (*sli4_cq_release)(struct lpfc_queue *q, bool arm); uint32_t count, bool arm);
void (*sli4_write_cq_db)(struct lpfc_hba *phba, struct lpfc_queue *cq,
uint32_t count, bool arm);
/* Pointers to the constructed SLI4 queues */ /* Pointers to the constructed SLI4 queues */
struct lpfc_sli4_hdw_queue *hdwq; struct lpfc_sli4_hdw_queue *hdwq;
...@@ -856,6 +893,7 @@ struct lpfc_sli4_hba { ...@@ -856,6 +893,7 @@ struct lpfc_sli4_hba {
uint16_t num_online_cpu; uint16_t num_online_cpu;
uint16_t num_present_cpu; uint16_t num_present_cpu;
uint16_t curr_disp_cpu; uint16_t curr_disp_cpu;
struct lpfc_eq_intr_info __percpu *eq_info;
uint32_t conf_trunk; uint32_t conf_trunk;
#define lpfc_conf_trunk_port0_WORD conf_trunk #define lpfc_conf_trunk_port0_WORD conf_trunk
#define lpfc_conf_trunk_port0_SHIFT 0 #define lpfc_conf_trunk_port0_SHIFT 0
...@@ -1020,11 +1058,15 @@ int lpfc_sli4_get_els_iocb_cnt(struct lpfc_hba *); ...@@ -1020,11 +1058,15 @@ int lpfc_sli4_get_els_iocb_cnt(struct lpfc_hba *);
int lpfc_sli4_get_iocb_cnt(struct lpfc_hba *phba); int lpfc_sli4_get_iocb_cnt(struct lpfc_hba *phba);
int lpfc_sli4_init_vpi(struct lpfc_vport *); int lpfc_sli4_init_vpi(struct lpfc_vport *);
inline void lpfc_sli4_eq_clr_intr(struct lpfc_queue *); inline void lpfc_sli4_eq_clr_intr(struct lpfc_queue *);
uint32_t lpfc_sli4_cq_release(struct lpfc_queue *, bool); void lpfc_sli4_write_cq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
uint32_t lpfc_sli4_eq_release(struct lpfc_queue *, bool); uint32_t count, bool arm);
void lpfc_sli4_write_eq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
uint32_t count, bool arm);
inline void lpfc_sli4_if6_eq_clr_intr(struct lpfc_queue *q); inline void lpfc_sli4_if6_eq_clr_intr(struct lpfc_queue *q);
uint32_t lpfc_sli4_if6_cq_release(struct lpfc_queue *q, bool arm); void lpfc_sli4_if6_write_cq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
uint32_t lpfc_sli4_if6_eq_release(struct lpfc_queue *q, bool arm); uint32_t count, bool arm);
void lpfc_sli4_if6_write_eq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
uint32_t count, bool arm);
void lpfc_sli4_fcfi_unreg(struct lpfc_hba *, uint16_t); void lpfc_sli4_fcfi_unreg(struct lpfc_hba *, uint16_t);
int lpfc_sli4_fcf_scan_read_fcf_rec(struct lpfc_hba *, uint16_t); int lpfc_sli4_fcf_scan_read_fcf_rec(struct lpfc_hba *, uint16_t);
int lpfc_sli4_fcf_rr_read_fcf_rec(struct lpfc_hba *, uint16_t); int lpfc_sli4_fcf_rr_read_fcf_rec(struct lpfc_hba *, uint16_t);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment