Commit b5cf6b63 authored by Eddie Wai's avatar Eddie Wai Committed by James Bottomley

[SCSI] bnx2i: Added the use of kthreads to handle SCSI cmd completion

This patch breaks the SCSI cmd completion into two parts:
1. The bh will allocate and queued work to the cmd specific CPU IO
completion kthread.  The CPU for the cmd is from the sc->request->cpu.

2. The CPU specific IO completion kthread will call the scsi_cmd_resp
routine to do the actual cmd completion.

In the normal case, these IO completion kthreads should complete before
the blk IO times out at 60s.  However, in the case when these kthreads
are blocked for whatever reason and exceeded the timeout, the call
to conn_destroy will have to iterate and exhaust all related work in the
percpu work list for all online CPUs.  This will guarantee the protection
of the work->session and conn pointers before they get freed.

Also modified the event coalescing formula to have at least the
event_coal_min outstanding cmds in the pipeline so the SCSI producer
would not get underrun.

Also changed the following SCSI parameters:
- can_queue from 1024 to 2048
- cmds_per_lun from 24 to 128
Signed-off-by: default avatarEddie Wai <eddie.wai@broadcom.com>
Acked-by: default avatarBenjamin Li <benli@broadcom.com>
Acked-by: default avatarMichael Chan <mchan@broadcom.com>
Signed-off-by: default avatarJames Bottomley <JBottomley@Parallels.com>
parent 74dcd0ec
......@@ -22,11 +22,14 @@
#include <linux/pci.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/in.h>
#include <linux/kfifo.h>
#include <linux/netdevice.h>
#include <linux/completion.h>
#include <linux/kthread.h>
#include <linux/cpu.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
......@@ -202,10 +205,13 @@ struct io_bdt {
/**
* bnx2i_cmd - iscsi command structure
*
* @hdr: iSCSI header
* @conn: iscsi_conn pointer
* @scsi_cmd: SCSI-ML task pointer corresponding to this iscsi cmd
* @sg: SG list
* @io_tbl: buffer descriptor (BD) table
* @bd_tbl_dma: buffer descriptor (BD) table's dma address
* @req: bnx2i specific command request struct
*/
struct bnx2i_cmd {
struct iscsi_hdr hdr;
......@@ -229,6 +235,7 @@ struct bnx2i_cmd {
* @gen_pdu: login/nopout/logout pdu resources
* @violation_notified: bit mask used to track iscsi error/warning messages
* already printed out
* @work_cnt: keeps track of the number of outstanding work
*
* iSCSI connection structure
*/
......@@ -252,6 +259,8 @@ struct bnx2i_conn {
*/
struct generic_pdu_resc gen_pdu;
u64 violation_notified;
atomic_t work_cnt;
};
......@@ -661,7 +670,6 @@ enum {
* @hba: adapter to which this connection belongs
* @conn: iscsi connection this EP is linked to
* @cls_ep: associated iSCSI endpoint pointer
* @sess: iscsi session this EP is linked to
* @cm_sk: cnic sock struct
* @hba_age: age to detect if 'iscsid' issues ep_disconnect()
* after HBA reset is completed by bnx2i/cnic/bnx2
......@@ -687,7 +695,7 @@ struct bnx2i_endpoint {
u32 hba_age;
u32 state;
unsigned long timestamp;
int num_active_cmds;
atomic_t num_active_cmds;
u32 ec_shift;
struct qp_info qp;
......@@ -700,6 +708,19 @@ struct bnx2i_endpoint {
};
struct bnx2i_work {
struct list_head list;
struct iscsi_session *session;
struct bnx2i_conn *bnx2i_conn;
struct cqe cqe;
};
struct bnx2i_percpu_s {
struct task_struct *iothread;
struct list_head work_list;
spinlock_t p_work_lock;
};
/* Global variables */
extern unsigned int error_mask1, error_mask2;
......@@ -783,7 +804,7 @@ extern struct bnx2i_endpoint *bnx2i_find_ep_in_destroy_list(
struct bnx2i_hba *hba, u32 iscsi_cid);
extern int bnx2i_map_ep_dbell_regs(struct bnx2i_endpoint *ep);
extern void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action);
extern int bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action);
extern int bnx2i_hw_ep_disconnect(struct bnx2i_endpoint *bnx2i_ep);
......@@ -793,4 +814,8 @@ extern void bnx2i_print_active_cmd_queue(struct bnx2i_conn *conn);
extern void bnx2i_print_xmit_pdu_queue(struct bnx2i_conn *conn);
extern void bnx2i_print_recv_state(struct bnx2i_conn *conn);
extern int bnx2i_percpu_io_thread(void *arg);
extern int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session,
struct bnx2i_conn *bnx2i_conn,
struct cqe *cqe);
#endif
This diff is collapsed.
......@@ -40,7 +40,7 @@ unsigned int event_coal_min = 24;
module_param(event_coal_min, int, 0664);
MODULE_PARM_DESC(event_coal_min, "Event Coalescing Minimum Commands");
unsigned int event_coal_div = 1;
unsigned int event_coal_div = 2;
module_param(event_coal_div, int, 0664);
MODULE_PARM_DESC(event_coal_div, "Event Coalescing Divide Factor");
......@@ -66,6 +66,15 @@ MODULE_PARM_DESC(rq_size, "Configure RQ size");
u64 iscsi_error_mask = 0x00;
DEFINE_PER_CPU(struct bnx2i_percpu_s, bnx2i_percpu);
static int bnx2i_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu);
/* notification function for CPU hotplug events */
static struct notifier_block bnx2i_cpu_notifier = {
.notifier_call = bnx2i_cpu_callback,
};
/**
* bnx2i_identify_device - identifies NetXtreme II device type
......@@ -361,6 +370,91 @@ void bnx2i_ulp_exit(struct cnic_dev *dev)
}
/**
* bnx2i_percpu_thread_create - Create a receive thread for an
* online CPU
*
* @cpu: cpu index for the online cpu
*/
static void bnx2i_percpu_thread_create(unsigned int cpu)
{
struct bnx2i_percpu_s *p;
struct task_struct *thread;
p = &per_cpu(bnx2i_percpu, cpu);
thread = kthread_create(bnx2i_percpu_io_thread, (void *)p,
"bnx2i_thread/%d", cpu);
/* bind thread to the cpu */
if (likely(!IS_ERR(thread))) {
kthread_bind(thread, cpu);
p->iothread = thread;
wake_up_process(thread);
}
}
static void bnx2i_percpu_thread_destroy(unsigned int cpu)
{
struct bnx2i_percpu_s *p;
struct task_struct *thread;
struct bnx2i_work *work, *tmp;
/* Prevent any new work from being queued for this CPU */
p = &per_cpu(bnx2i_percpu, cpu);
spin_lock_bh(&p->p_work_lock);
thread = p->iothread;
p->iothread = NULL;
/* Free all work in the list */
list_for_each_entry_safe(work, tmp, &p->work_list, list) {
list_del_init(&work->list);
bnx2i_process_scsi_cmd_resp(work->session,
work->bnx2i_conn, &work->cqe);
kfree(work);
}
spin_unlock_bh(&p->p_work_lock);
if (thread)
kthread_stop(thread);
}
/**
* bnx2i_cpu_callback - Handler for CPU hotplug events
*
* @nfb: The callback data block
* @action: The event triggering the callback
* @hcpu: The index of the CPU that the event is for
*
* This creates or destroys per-CPU data for iSCSI
*
* Returns NOTIFY_OK always.
*/
static int bnx2i_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned cpu = (unsigned long)hcpu;
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
printk(KERN_INFO "bnx2i: CPU %x online: Create Rx thread\n",
cpu);
bnx2i_percpu_thread_create(cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
printk(KERN_INFO "CPU %x offline: Remove Rx thread\n", cpu);
bnx2i_percpu_thread_destroy(cpu);
break;
default:
break;
}
return NOTIFY_OK;
}
/**
* bnx2i_mod_init - module init entry point
*
......@@ -371,6 +465,8 @@ void bnx2i_ulp_exit(struct cnic_dev *dev)
static int __init bnx2i_mod_init(void)
{
int err;
unsigned cpu = 0;
struct bnx2i_percpu_s *p;
printk(KERN_INFO "%s", version);
......@@ -393,6 +489,20 @@ static int __init bnx2i_mod_init(void)
goto unreg_xport;
}
/* Create percpu kernel threads to handle iSCSI I/O completions */
for_each_possible_cpu(cpu) {
p = &per_cpu(bnx2i_percpu, cpu);
INIT_LIST_HEAD(&p->work_list);
spin_lock_init(&p->p_work_lock);
p->iothread = NULL;
}
for_each_online_cpu(cpu)
bnx2i_percpu_thread_create(cpu);
/* Initialize per CPU interrupt thread */
register_hotcpu_notifier(&bnx2i_cpu_notifier);
return 0;
unreg_xport:
......@@ -413,6 +523,7 @@ static int __init bnx2i_mod_init(void)
static void __exit bnx2i_mod_exit(void)
{
struct bnx2i_hba *hba;
unsigned cpu = 0;
mutex_lock(&bnx2i_dev_lock);
while (!list_empty(&adapter_list)) {
......@@ -430,6 +541,11 @@ static void __exit bnx2i_mod_exit(void)
}
mutex_unlock(&bnx2i_dev_lock);
unregister_hotcpu_notifier(&bnx2i_cpu_notifier);
for_each_online_cpu(cpu)
bnx2i_percpu_thread_destroy(cpu);
iscsi_unregister_transport(&bnx2i_iscsi_transport);
cnic_unregister_driver(CNIC_ULP_ISCSI);
}
......
......@@ -27,6 +27,7 @@ static struct scsi_host_template bnx2i_host_template;
*/
static DEFINE_SPINLOCK(bnx2i_resc_lock); /* protects global resources */
DECLARE_PER_CPU(struct bnx2i_percpu_s, bnx2i_percpu);
static int bnx2i_adapter_ready(struct bnx2i_hba *hba)
{
......@@ -1214,7 +1215,8 @@ static int bnx2i_task_xmit(struct iscsi_task *task)
struct bnx2i_cmd *cmd = task->dd_data;
struct iscsi_cmd *hdr = (struct iscsi_cmd *) task->hdr;
if (bnx2i_conn->ep->num_active_cmds + 1 > hba->max_sqes)
if (atomic_read(&bnx2i_conn->ep->num_active_cmds) + 1 >
hba->max_sqes)
return -ENOMEM;
/*
......@@ -1354,6 +1356,9 @@ bnx2i_conn_create(struct iscsi_cls_session *cls_session, uint32_t cid)
bnx2i_conn = conn->dd_data;
bnx2i_conn->cls_conn = cls_conn;
bnx2i_conn->hba = hba;
atomic_set(&bnx2i_conn->work_cnt, 0);
/* 'ep' ptr will be assigned in bind() call */
bnx2i_conn->ep = NULL;
init_completion(&bnx2i_conn->cmd_cleanup_cmpl);
......@@ -1457,11 +1462,34 @@ static void bnx2i_conn_destroy(struct iscsi_cls_conn *cls_conn)
struct bnx2i_conn *bnx2i_conn = conn->dd_data;
struct Scsi_Host *shost;
struct bnx2i_hba *hba;
struct bnx2i_work *work, *tmp;
unsigned cpu = 0;
struct bnx2i_percpu_s *p;
shost = iscsi_session_to_shost(iscsi_conn_to_session(cls_conn));
hba = iscsi_host_priv(shost);
bnx2i_conn_free_login_resources(hba, bnx2i_conn);
if (atomic_read(&bnx2i_conn->work_cnt)) {
for_each_online_cpu(cpu) {
p = &per_cpu(bnx2i_percpu, cpu);
spin_lock_bh(&p->p_work_lock);
list_for_each_entry_safe(work, tmp,
&p->work_list, list) {
if (work->session == conn->session &&
work->bnx2i_conn == bnx2i_conn) {
list_del_init(&work->list);
kfree(work);
if (!atomic_dec_and_test(
&bnx2i_conn->work_cnt))
break;
}
}
spin_unlock_bh(&p->p_work_lock);
}
}
iscsi_conn_teardown(cls_conn);
}
......@@ -1769,7 +1797,7 @@ static struct iscsi_endpoint *bnx2i_ep_connect(struct Scsi_Host *shost,
}
bnx2i_ep = ep->dd_data;
bnx2i_ep->num_active_cmds = 0;
atomic_set(&bnx2i_ep->num_active_cmds, 0);
iscsi_cid = bnx2i_alloc_iscsi_cid(hba);
if (iscsi_cid == -1) {
printk(KERN_ALERT "bnx2i (%s): alloc_ep - unable to allocate "
......@@ -2163,9 +2191,9 @@ static struct scsi_host_template bnx2i_host_template = {
.eh_device_reset_handler = iscsi_eh_device_reset,
.eh_target_reset_handler = iscsi_eh_recover_target,
.change_queue_depth = iscsi_change_queue_depth,
.can_queue = 1024,
.can_queue = 2048,
.max_sectors = 127,
.cmd_per_lun = 24,
.cmd_per_lun = 128,
.this_id = -1,
.use_clustering = ENABLE_CLUSTERING,
.sg_tablesize = ISCSI_MAX_BDS_PER_CMD,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment