• Bart Van Assche's avatar
    scsi: qla2xxx: Fix hardirq-unsafe locking · 300ec741
    Bart Van Assche authored
    Since fc_remote_port_delete() must be called with interrupts enabled, do
    not disable interrupts when calling that function. Remove the lockin calls
    from around the put_sess() call. This is safe because the function that is
    called when the final reference is dropped, qlt_unreg_sess(), grabs the
    proper locks. This patch avoids that lockdep reports the following:
    
    WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected
    kworker/2:1/62 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
    0000000009e679b3 (&(&k->k_lock)->rlock){+.+.}, at: klist_next+0x43/0x1d0
    
    and this task is already holding:
    00000000a033b71c (&(&ha->tgt.sess_lock)->rlock){-...}, at: qla24xx_delete_sess_fn+0x55/0xf0 [qla2xxx_scst]
    which would create a new lock dependency:
     (&(&ha->tgt.sess_lock)->rlock){-...} -> (&(&k->k_lock)->rlock){+.+.}
    
    but this new dependency connects a HARDIRQ-irq-safe lock:
     (&(&ha->tgt.sess_lock)->rlock){-...}
    
    ... which became HARDIRQ-irq-safe at:
      lock_acquire+0xe3/0x200
      _raw_spin_lock_irqsave+0x3d/0x60
      qla24xx_report_id_acquisition+0xa69/0xe30 [qla2xxx_scst]
      qla24xx_process_response_queue+0x69e/0x1270 [qla2xxx_scst]
      qla24xx_msix_rsp_q+0x79/0xf0 [qla2xxx_scst]
      __handle_irq_event_percpu+0x79/0x3c0
      handle_irq_event_percpu+0x70/0xf0
      handle_irq_event+0x5a/0x8b
      handle_edge_irq+0x12c/0x310
      handle_irq+0x192/0x20a
      do_IRQ+0x73/0x160
      ret_from_intr+0x0/0x1d
      default_idle+0x23/0x1f0
      arch_cpu_idle+0x15/0x20
      default_idle_call+0x35/0x40
      do_idle+0x2bb/0x2e0
      cpu_startup_entry+0x1d/0x20
      start_secondary+0x2a8/0x320
      secondary_startup_64+0xa4/0xb0
    
    to a HARDIRQ-irq-unsafe lock:
     (&(&k->k_lock)->rlock){+.+.}
    
    ... which became HARDIRQ-irq-unsafe at:
    ...
      lock_acquire+0xe3/0x200
      _raw_spin_lock+0x32/0x50
      klist_add_tail+0x33/0xb0
      device_add+0x7e1/0xb50
      device_create_groups_vargs+0x11c/0x150
      device_create_with_groups+0x89/0xb0
      vtconsole_class_init+0xb2/0x124
      do_one_initcall+0xc5/0x3ce
      kernel_init_freeable+0x295/0x32e
      kernel_init+0x11/0x11b
      ret_from_fork+0x3a/0x50
    
    other info that might help us debug this:
    
     Possible interrupt unsafe locking scenario:
    
           CPU0                    CPU1
           ----                    ----
      lock(&(&k->k_lock)->rlock);
                                   local_irq_disable();
                                   lock(&(&ha->tgt.sess_lock)->rlock);
                                   lock(&(&k->k_lock)->rlock);
      <Interrupt>
        lock(&(&ha->tgt.sess_lock)->rlock);
    
     *** DEADLOCK ***
    
    3 locks held by kworker/2:1/62:
     #0: 00000000a4319c16 ((wq_completion)"qla2xxx_wq"){+.+.}, at: process_one_work+0x437/0xa80
     #1: 00000000ffa34c42 ((work_completion)(&sess->del_work)){+.+.}, at: process_one_work+0x437/0xa80
     #2: 00000000a033b71c (&(&ha->tgt.sess_lock)->rlock){-...}, at: qla24xx_delete_sess_fn+0x55/0xf0 [qla2xxx_scst]
    
    the dependencies between HARDIRQ-irq-safe lock and the holding lock:
    -> (&(&ha->tgt.sess_lock)->rlock){-...} ops: 8 {
       IN-HARDIRQ-W at:
                        lock_acquire+0xe3/0x200
                        _raw_spin_lock_irqsave+0x3d/0x60
                        qla24xx_report_id_acquisition+0xa69/0xe30 [qla2xxx_scst]
                        qla24xx_process_response_queue+0x69e/0x1270 [qla2xxx_scst]
                        qla24xx_msix_rsp_q+0x79/0xf0 [qla2xxx_scst]
                        __handle_irq_event_percpu+0x79/0x3c0
                        handle_irq_event_percpu+0x70/0xf0
                        handle_irq_event+0x5a/0x8b
                        handle_edge_irq+0x12c/0x310
                        handle_irq+0x192/0x20a
                        do_IRQ+0x73/0x160
                        ret_from_intr+0x0/0x1d
                        default_idle+0x23/0x1f0
                        arch_cpu_idle+0x15/0x20
                        default_idle_call+0x35/0x40
                        do_idle+0x2bb/0x2e0
                        cpu_startup_entry+0x1d/0x20
                        start_secondary+0x2a8/0x320
                        secondary_startup_64+0xa4/0xb0
       INITIAL USE at:
                       lock_acquire+0xe3/0x200
                       _raw_spin_lock_irqsave+0x3d/0x60
                       qla24xx_report_id_acquisition+0xa69/0xe30 [qla2xxx_scst]
                       qla24xx_process_response_queue+0x69e/0x1270 [qla2xxx_scst]
                       qla24xx_msix_rsp_q+0x79/0xf0 [qla2xxx_scst]
                       __handle_irq_event_percpu+0x79/0x3c0
                       handle_irq_event_percpu+0x70/0xf0
                       handle_irq_event+0x5a/0x8b
                       handle_edge_irq+0x12c/0x310
                       handle_irq+0x192/0x20a
                       do_IRQ+0x73/0x160
                       ret_from_intr+0x0/0x1d
                       default_idle+0x23/0x1f0
                       arch_cpu_idle+0x15/0x20
                       default_idle_call+0x35/0x40
                       do_idle+0x2bb/0x2e0
                       cpu_startup_entry+0x1d/0x20
                       start_secondary+0x2a8/0x320
                       secondary_startup_64+0xa4/0xb0
     }
     ... key      at: [<ffffffffa0c0d080>] __key.85462+0x0/0xfffffffffff7df80 [qla2xxx_scst]
     ... acquired at:
       lock_acquire+0xe3/0x200
       _raw_spin_lock_irqsave+0x3d/0x60
       klist_next+0x43/0x1d0
       device_for_each_child+0x96/0x110
       scsi_target_block+0x3c/0x40 [scsi_mod]
       fc_remote_port_delete+0xe7/0x1c0 [scsi_transport_fc]
       qla2x00_mark_device_lost+0xa0b/0xa30 [qla2xxx_scst]
       qlt_unreg_sess+0x1c6/0x380 [qla2xxx_scst]
       qla24xx_delete_sess_fn+0xe6/0xf0 [qla2xxx_scst]
       process_one_work+0x511/0xa80
       worker_thread+0x67/0x5b0
       kthread+0x1d2/0x1f0
       ret_from_fork+0x3a/0x50
    
    the dependencies between the lock to be acquired
     and HARDIRQ-irq-unsafe lock:
    -> (&(&k->k_lock)->rlock){+.+.} ops: 13831 {
       HARDIRQ-ON-W at:
                        lock_acquire+0xe3/0x200
                        _raw_spin_lock+0x32/0x50
                        klist_add_tail+0x33/0xb0
                        device_add+0x7e1/0xb50
                        device_create_groups_vargs+0x11c/0x150
                        device_create_with_groups+0x89/0xb0
                        vtconsole_class_init+0xb2/0x124
                        do_one_initcall+0xc5/0x3ce
                        kernel_init_freeable+0x295/0x32e
                        kernel_init+0x11/0x11b
                        ret_from_fork+0x3a/0x50
       SOFTIRQ-ON-W at:
                        lock_acquire+0xe3/0x200
                        _raw_spin_lock+0x32/0x50
                        klist_add_tail+0x33/0xb0
                        device_add+0x7e1/0xb50
                        device_create_groups_vargs+0x11c/0x150
                        device_create_with_groups+0x89/0xb0
                        vtconsole_class_init+0xb2/0x124
                        do_one_initcall+0xc5/0x3ce
                        kernel_init_freeable+0x295/0x32e
                        kernel_init+0x11/0x11b
                        ret_from_fork+0x3a/0x50
       INITIAL USE at:
                       lock_acquire+0xe3/0x200
                       _raw_spin_lock+0x32/0x50
                       klist_add_tail+0x33/0xb0
                       device_add+0x7e1/0xb50
                       device_create_groups_vargs+0x11c/0x150
                       device_create_with_groups+0x89/0xb0
                       vtconsole_class_init+0xb2/0x124
                       do_one_initcall+0xc5/0x3ce
                       kernel_init_freeable+0x295/0x32e
                       kernel_init+0x11/0x11b
                       ret_from_fork+0x3a/0x50
     }
     ... key      at: [<ffffffff83ed8780>] __key.15491+0x0/0x40
     ... acquired at:
       lock_acquire+0xe3/0x200
       _raw_spin_lock_irqsave+0x3d/0x60
       klist_next+0x43/0x1d0
       device_for_each_child+0x96/0x110
       scsi_target_block+0x3c/0x40 [scsi_mod]
       fc_remote_port_delete+0xe7/0x1c0 [scsi_transport_fc]
       qla2x00_mark_device_lost+0xa0b/0xa30 [qla2xxx_scst]
       qlt_unreg_sess+0x1c6/0x380 [qla2xxx_scst]
       qla24xx_delete_sess_fn+0xe6/0xf0 [qla2xxx_scst]
       process_one_work+0x511/0xa80
       worker_thread+0x67/0x5b0
       kthread+0x1d2/0x1f0
       ret_from_fork+0x3a/0x50
    
    stack backtrace:
    CPU: 2 PID: 62 Comm: kworker/2:1 Tainted: G           O      5.0.7-dbg+ #8
    Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
    Workqueue: qla2xxx_wq qla24xx_delete_sess_fn [qla2xxx_scst]
    Call Trace:
     dump_stack+0x86/0xca
     check_usage.cold.52+0x473/0x563
     __lock_acquire+0x11c0/0x23e0
     lock_acquire+0xe3/0x200
     _raw_spin_lock_irqsave+0x3d/0x60
     klist_next+0x43/0x1d0
     device_for_each_child+0x96/0x110
     scsi_target_block+0x3c/0x40 [scsi_mod]
     fc_remote_port_delete+0xe7/0x1c0 [scsi_transport_fc]
     qla2x00_mark_device_lost+0xa0b/0xa30 [qla2xxx_scst]
     qlt_unreg_sess+0x1c6/0x380 [qla2xxx_scst]
     qla24xx_delete_sess_fn+0xe6/0xf0 [qla2xxx_scst]
     process_one_work+0x511/0xa80
     worker_thread+0x67/0x5b0
     kthread+0x1d2/0x1f0
     ret_from_fork+0x3a/0x50
    
    Cc: Himanshu Madhani <hmadhani@marvell.com>
    Cc: Giridhar Malavali <gmalavali@marvell.com>
    Signed-off-by: default avatarBart Van Assche <bvanassche@acm.org>
    Acked-by: default avatarHimanshu Madhani <hmadhani@marvell.com>
    Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
    300ec741
qla_target.c 199 KB