Commit 924dd584 authored by Martin Peschke's avatar Martin Peschke Committed by James Bottomley

[SCSI] zfcp: fix schedule-inside-lock in scsi_device list loops

BUG: sleeping function called from invalid context at kernel/workqueue.c:2752
in_atomic(): 1, irqs_disabled(): 1, pid: 360, name: zfcperp0.0.1700
CPU: 1 Not tainted 3.9.3+ #69
Process zfcperp0.0.1700 (pid: 360, task: 0000000075b7e080, ksp: 000000007476bc30)
<snip>
Call Trace:
([<00000000001165de>] show_trace+0x106/0x154)
 [<00000000001166a0>] show_stack+0x74/0xf4
 [<00000000006ff646>] dump_stack+0xc6/0xd4
 [<000000000017f3a0>] __might_sleep+0x128/0x148
 [<000000000015ece8>] flush_work+0x54/0x1f8
 [<00000000001630de>] __cancel_work_timer+0xc6/0x128
 [<00000000005067ac>] scsi_device_dev_release_usercontext+0x164/0x23c
 [<0000000000161816>] execute_in_process_context+0x96/0xa8
 [<00000000004d33d8>] device_release+0x60/0xc0
 [<000000000048af48>] kobject_release+0xa8/0x1c4
 [<00000000004f4bf2>] __scsi_iterate_devices+0xfa/0x130
 [<000003ff801b307a>] zfcp_erp_strategy+0x4da/0x1014 [zfcp]
 [<000003ff801b3caa>] zfcp_erp_thread+0xf6/0x2b0 [zfcp]
 [<000000000016b75a>] kthread+0xf2/0xfc
 [<000000000070c9de>] kernel_thread_starter+0x6/0xc
 [<000000000070c9d8>] kernel_thread_starter+0x0/0xc

Apparently, the ref_count for some scsi_device drops down to zero,
triggering device removal through execute_in_process_context(), while
the lldd error recovery thread iterates through a scsi device list.
Unfortunately, execute_in_process_context() decides to immediately
execute that device removal function, instead of scheduling asynchronous
execution, since it detects process context and thinks it is safe to do
so. But almost all calls to shost_for_each_device() in our lldd are
inside spin_lock_irq, even in thread context. Obviously, schedule()
inside spin_lock_irq sections is a bad idea.

Change the lldd to use the proper iterator function,
__shost_for_each_device(), in combination with required locking.

Occurences that need to be changed include all calls in zfcp_erp.c,
since those might be executed in zfcp error recovery thread context
with a lock held.

Other occurences of shost_for_each_device() in zfcp_fsf.c do not
need to be changed (no process context, no surrounding locking).

The problem was introduced in Linux 2.6.37 by commit
b62a8d9b
"[SCSI] zfcp: Use SCSI device data zfcp_scsi_dev instead of zfcp_unit".
Reported-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: default avatarMartin Peschke <mpeschke@linux.vnet.ibm.com>
Cc: stable@vger.kernel.org #2.6.37+
Signed-off-by: default avatarSteffen Maier <maier@linux.vnet.ibm.com>
Signed-off-by: default avatarJames Bottomley <JBottomley@Parallels.com>
parent d79ff142
...@@ -102,10 +102,13 @@ static void zfcp_erp_action_dismiss_port(struct zfcp_port *port) ...@@ -102,10 +102,13 @@ static void zfcp_erp_action_dismiss_port(struct zfcp_port *port)
if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_INUSE) if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_INUSE)
zfcp_erp_action_dismiss(&port->erp_action); zfcp_erp_action_dismiss(&port->erp_action);
else else {
shost_for_each_device(sdev, port->adapter->scsi_host) spin_lock(port->adapter->scsi_host->host_lock);
__shost_for_each_device(sdev, port->adapter->scsi_host)
if (sdev_to_zfcp(sdev)->port == port) if (sdev_to_zfcp(sdev)->port == port)
zfcp_erp_action_dismiss_lun(sdev); zfcp_erp_action_dismiss_lun(sdev);
spin_unlock(port->adapter->scsi_host->host_lock);
}
} }
static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter) static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
...@@ -592,9 +595,11 @@ static void _zfcp_erp_lun_reopen_all(struct zfcp_port *port, int clear, ...@@ -592,9 +595,11 @@ static void _zfcp_erp_lun_reopen_all(struct zfcp_port *port, int clear,
{ {
struct scsi_device *sdev; struct scsi_device *sdev;
shost_for_each_device(sdev, port->adapter->scsi_host) spin_lock(port->adapter->scsi_host->host_lock);
__shost_for_each_device(sdev, port->adapter->scsi_host)
if (sdev_to_zfcp(sdev)->port == port) if (sdev_to_zfcp(sdev)->port == port)
_zfcp_erp_lun_reopen(sdev, clear, id, 0); _zfcp_erp_lun_reopen(sdev, clear, id, 0);
spin_unlock(port->adapter->scsi_host->host_lock);
} }
static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act) static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act)
...@@ -1434,8 +1439,10 @@ void zfcp_erp_set_adapter_status(struct zfcp_adapter *adapter, u32 mask) ...@@ -1434,8 +1439,10 @@ void zfcp_erp_set_adapter_status(struct zfcp_adapter *adapter, u32 mask)
atomic_set_mask(common_mask, &port->status); atomic_set_mask(common_mask, &port->status);
read_unlock_irqrestore(&adapter->port_list_lock, flags); read_unlock_irqrestore(&adapter->port_list_lock, flags);
shost_for_each_device(sdev, adapter->scsi_host) spin_lock_irqsave(adapter->scsi_host->host_lock, flags);
__shost_for_each_device(sdev, adapter->scsi_host)
atomic_set_mask(common_mask, &sdev_to_zfcp(sdev)->status); atomic_set_mask(common_mask, &sdev_to_zfcp(sdev)->status);
spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags);
} }
/** /**
...@@ -1469,11 +1476,13 @@ void zfcp_erp_clear_adapter_status(struct zfcp_adapter *adapter, u32 mask) ...@@ -1469,11 +1476,13 @@ void zfcp_erp_clear_adapter_status(struct zfcp_adapter *adapter, u32 mask)
} }
read_unlock_irqrestore(&adapter->port_list_lock, flags); read_unlock_irqrestore(&adapter->port_list_lock, flags);
shost_for_each_device(sdev, adapter->scsi_host) { spin_lock_irqsave(adapter->scsi_host->host_lock, flags);
__shost_for_each_device(sdev, adapter->scsi_host) {
atomic_clear_mask(common_mask, &sdev_to_zfcp(sdev)->status); atomic_clear_mask(common_mask, &sdev_to_zfcp(sdev)->status);
if (clear_counter) if (clear_counter)
atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0); atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0);
} }
spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags);
} }
/** /**
...@@ -1487,16 +1496,19 @@ void zfcp_erp_set_port_status(struct zfcp_port *port, u32 mask) ...@@ -1487,16 +1496,19 @@ void zfcp_erp_set_port_status(struct zfcp_port *port, u32 mask)
{ {
struct scsi_device *sdev; struct scsi_device *sdev;
u32 common_mask = mask & ZFCP_COMMON_FLAGS; u32 common_mask = mask & ZFCP_COMMON_FLAGS;
unsigned long flags;
atomic_set_mask(mask, &port->status); atomic_set_mask(mask, &port->status);
if (!common_mask) if (!common_mask)
return; return;
shost_for_each_device(sdev, port->adapter->scsi_host) spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags);
__shost_for_each_device(sdev, port->adapter->scsi_host)
if (sdev_to_zfcp(sdev)->port == port) if (sdev_to_zfcp(sdev)->port == port)
atomic_set_mask(common_mask, atomic_set_mask(common_mask,
&sdev_to_zfcp(sdev)->status); &sdev_to_zfcp(sdev)->status);
spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags);
} }
/** /**
...@@ -1511,6 +1523,7 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask) ...@@ -1511,6 +1523,7 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask)
struct scsi_device *sdev; struct scsi_device *sdev;
u32 common_mask = mask & ZFCP_COMMON_FLAGS; u32 common_mask = mask & ZFCP_COMMON_FLAGS;
u32 clear_counter = mask & ZFCP_STATUS_COMMON_ERP_FAILED; u32 clear_counter = mask & ZFCP_STATUS_COMMON_ERP_FAILED;
unsigned long flags;
atomic_clear_mask(mask, &port->status); atomic_clear_mask(mask, &port->status);
...@@ -1520,13 +1533,15 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask) ...@@ -1520,13 +1533,15 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask)
if (clear_counter) if (clear_counter)
atomic_set(&port->erp_counter, 0); atomic_set(&port->erp_counter, 0);
shost_for_each_device(sdev, port->adapter->scsi_host) spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags);
__shost_for_each_device(sdev, port->adapter->scsi_host)
if (sdev_to_zfcp(sdev)->port == port) { if (sdev_to_zfcp(sdev)->port == port) {
atomic_clear_mask(common_mask, atomic_clear_mask(common_mask,
&sdev_to_zfcp(sdev)->status); &sdev_to_zfcp(sdev)->status);
if (clear_counter) if (clear_counter)
atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0); atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0);
} }
spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags);
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment