• Alex Williamson's avatar
    vfio/pci: Collect hot-reset devices to local buffer · f6944d4a
    Alex Williamson authored
    Lockdep reports the below circular locking dependency issue.  The
    mmap_lock acquisition while holding pci_bus_sem is due to the use of
    copy_to_user() from within a pci_walk_bus() callback.
    
    Building the devices array directly into the user buffer is only for
    convenience.  Instead we can allocate a local buffer for the array,
    bounded by the number of devices on the bus/slot, fill the device
    information into this local buffer, then copy it into the user buffer
    outside the bus walk callback.
    
    ======================================================
    WARNING: possible circular locking dependency detected
    6.9.0-rc5+ #39 Not tainted
    ------------------------------------------------------
    CPU 0/KVM/4113 is trying to acquire lock:
    ffff99a609ee18a8 (&vdev->vma_lock){+.+.}-{4:4}, at: vfio_pci_mmap_fault+0x35/0x1a0 [vfio_pci_core]
    
    but task is already holding lock:
    ffff99a243a052a0 (&mm->mmap_lock){++++}-{4:4}, at: vaddr_get_pfns+0x3f/0x170 [vfio_iommu_type1]
    
    which lock already depends on the new lock.
    
    the existing dependency chain (in reverse order) is:
    
    -> #3 (&mm->mmap_lock){++++}-{4:4}:
           __lock_acquire+0x4e4/0xb90
           lock_acquire+0xbc/0x2d0
           __might_fault+0x5c/0x80
           _copy_to_user+0x1e/0x60
           vfio_pci_fill_devs+0x9f/0x130 [vfio_pci_core]
           vfio_pci_walk_wrapper+0x45/0x60 [vfio_pci_core]
           __pci_walk_bus+0x6b/0xb0
           vfio_pci_ioctl_get_pci_hot_reset_info+0x10b/0x1d0 [vfio_pci_core]
           vfio_pci_core_ioctl+0x1cb/0x400 [vfio_pci_core]
           vfio_device_fops_unl_ioctl+0x7e/0x140 [vfio]
           __x64_sys_ioctl+0x8a/0xc0
           do_syscall_64+0x8d/0x170
           entry_SYSCALL_64_after_hwframe+0x76/0x7e
    
    -> #2 (pci_bus_sem){++++}-{4:4}:
           __lock_acquire+0x4e4/0xb90
           lock_acquire+0xbc/0x2d0
           down_read+0x3e/0x160
           pci_bridge_wait_for_secondary_bus.part.0+0x33/0x2d0
           pci_reset_bus+0xdd/0x160
           vfio_pci_dev_set_hot_reset+0x256/0x270 [vfio_pci_core]
           vfio_pci_ioctl_pci_hot_reset_groups+0x1a3/0x280 [vfio_pci_core]
           vfio_pci_core_ioctl+0x3b5/0x400 [vfio_pci_core]
           vfio_device_fops_unl_ioctl+0x7e/0x140 [vfio]
           __x64_sys_ioctl+0x8a/0xc0
           do_syscall_64+0x8d/0x170
           entry_SYSCALL_64_after_hwframe+0x76/0x7e
    
    -> #1 (&vdev->memory_lock){+.+.}-{4:4}:
           __lock_acquire+0x4e4/0xb90
           lock_acquire+0xbc/0x2d0
           down_write+0x3b/0xc0
           vfio_pci_zap_and_down_write_memory_lock+0x1c/0x30 [vfio_pci_core]
           vfio_basic_config_write+0x281/0x340 [vfio_pci_core]
           vfio_config_do_rw+0x1fa/0x300 [vfio_pci_core]
           vfio_pci_config_rw+0x75/0xe50 [vfio_pci_core]
           vfio_pci_rw+0xea/0x1a0 [vfio_pci_core]
           vfs_write+0xea/0x520
           __x64_sys_pwrite64+0x90/0xc0
           do_syscall_64+0x8d/0x170
           entry_SYSCALL_64_after_hwframe+0x76/0x7e
    
    -> #0 (&vdev->vma_lock){+.+.}-{4:4}:
           check_prev_add+0xeb/0xcc0
           validate_chain+0x465/0x530
           __lock_acquire+0x4e4/0xb90
           lock_acquire+0xbc/0x2d0
           __mutex_lock+0x97/0xde0
           vfio_pci_mmap_fault+0x35/0x1a0 [vfio_pci_core]
           __do_fault+0x31/0x160
           do_pte_missing+0x65/0x3b0
           __handle_mm_fault+0x303/0x720
           handle_mm_fault+0x10f/0x460
           fixup_user_fault+0x7f/0x1f0
           follow_fault_pfn+0x66/0x1c0 [vfio_iommu_type1]
           vaddr_get_pfns+0xf2/0x170 [vfio_iommu_type1]
           vfio_pin_pages_remote+0x348/0x4e0 [vfio_iommu_type1]
           vfio_pin_map_dma+0xd2/0x330 [vfio_iommu_type1]
           vfio_dma_do_map+0x2c0/0x440 [vfio_iommu_type1]
           vfio_iommu_type1_ioctl+0xc5/0x1d0 [vfio_iommu_type1]
           __x64_sys_ioctl+0x8a/0xc0
           do_syscall_64+0x8d/0x170
           entry_SYSCALL_64_after_hwframe+0x76/0x7e
    
    other info that might help us debug this:
    
    Chain exists of:
      &vdev->vma_lock --> pci_bus_sem --> &mm->mmap_lock
    
     Possible unsafe locking scenario:
    
    block dm-0: the capability attribute has been deprecated.
           CPU0                    CPU1
           ----                    ----
      rlock(&mm->mmap_lock);
                                   lock(pci_bus_sem);
                                   lock(&mm->mmap_lock);
      lock(&vdev->vma_lock);
    
     *** DEADLOCK ***
    
    2 locks held by CPU 0/KVM/4113:
     #0: ffff99a25f294888 (&iommu->lock#2){+.+.}-{4:4}, at: vfio_dma_do_map+0x60/0x440 [vfio_iommu_type1]
     #1: ffff99a243a052a0 (&mm->mmap_lock){++++}-{4:4}, at: vaddr_get_pfns+0x3f/0x170 [vfio_iommu_type1]
    
    stack backtrace:
    CPU: 1 PID: 4113 Comm: CPU 0/KVM Not tainted 6.9.0-rc5+ #39
    Hardware name: Dell Inc. PowerEdge T640/04WYPY, BIOS 2.15.1 06/16/2022
    Call Trace:
     <TASK>
     dump_stack_lvl+0x64/0xa0
     check_noncircular+0x131/0x150
     check_prev_add+0xeb/0xcc0
     ? add_chain_cache+0x10a/0x2f0
     ? __lock_acquire+0x4e4/0xb90
     validate_chain+0x465/0x530
     __lock_acquire+0x4e4/0xb90
     lock_acquire+0xbc/0x2d0
     ? vfio_pci_mmap_fault+0x35/0x1a0 [vfio_pci_core]
     ? lock_is_held_type+0x9a/0x110
     __mutex_lock+0x97/0xde0
     ? vfio_pci_mmap_fault+0x35/0x1a0 [vfio_pci_core]
     ? lock_acquire+0xbc/0x2d0
     ? vfio_pci_mmap_fault+0x35/0x1a0 [vfio_pci_core]
     ? find_held_lock+0x2b/0x80
     ? vfio_pci_mmap_fault+0x35/0x1a0 [vfio_pci_core]
     vfio_pci_mmap_fault+0x35/0x1a0 [vfio_pci_core]
     __do_fault+0x31/0x160
     do_pte_missing+0x65/0x3b0
     __handle_mm_fault+0x303/0x720
     handle_mm_fault+0x10f/0x460
     fixup_user_fault+0x7f/0x1f0
     follow_fault_pfn+0x66/0x1c0 [vfio_iommu_type1]
     vaddr_get_pfns+0xf2/0x170 [vfio_iommu_type1]
     vfio_pin_pages_remote+0x348/0x4e0 [vfio_iommu_type1]
     vfio_pin_map_dma+0xd2/0x330 [vfio_iommu_type1]
     vfio_dma_do_map+0x2c0/0x440 [vfio_iommu_type1]
     vfio_iommu_type1_ioctl+0xc5/0x1d0 [vfio_iommu_type1]
     __x64_sys_ioctl+0x8a/0xc0
     do_syscall_64+0x8d/0x170
     ? rcu_core+0x8d/0x250
     ? __lock_release+0x5e/0x160
     ? rcu_core+0x8d/0x250
     ? lock_release+0x5f/0x120
     ? sched_clock+0xc/0x30
     ? sched_clock_cpu+0xb/0x190
     ? irqtime_account_irq+0x40/0xc0
     ? __local_bh_enable+0x54/0x60
     ? __do_softirq+0x315/0x3ca
     ? lockdep_hardirqs_on_prepare.part.0+0x97/0x140
     entry_SYSCALL_64_after_hwframe+0x76/0x7e
    RIP: 0033:0x7f8300d0357b
    Code: ff ff ff 85 c0 79 9b 49 c7 c4 ff ff ff ff 5b 5d 4c 89 e0 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 75 68 0f 00 f7 d8 64 89 01 48
    RSP: 002b:00007f82ef3fb948 EFLAGS: 00000206 ORIG_RAX: 0000000000000010
    RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f8300d0357b
    RDX: 00007f82ef3fb990 RSI: 0000000000003b71 RDI: 0000000000000023
    RBP: 00007f82ef3fb9c0 R08: 0000000000000000 R09: 0000561b7e0bcac2
    R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000
    R13: 0000000200000000 R14: 0000381800000000 R15: 0000000000000000
     </TASK>
    Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
    Link: https://lore.kernel.org/r/20240503143138.3562116-1-alex.williamson@redhat.comSigned-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
    f6944d4a
vfio_pci_core.c 71.6 KB