Commit 5f22ca9b authored by Linus Torvalds's avatar Linus Torvalds

vfat: fix 'sync' mount deadlock due to BKL->lock_super conversion

There was another FAT BKL conversion deadlock reported by Bart
Trojanowski due to the BKL being used as a recursive lock by FAT, which
was missed because it only triggers with 'sync' (or 'dirsync') mounts.

The recursion worked for the BKL, but after the conversion to lock_super
(which uses a mutex), it just deadlocks.

Thanks to Bart for debugging this and testing the fix.  The lock
debugging information from the original report:

  =============================================
  [ INFO: possible recursive locking detected ]
  2.6.27-rc3-bisect-00448-ga7f5aaf3 #16
  ---------------------------------------------
  mv/4020 is trying to acquire lock:
   (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20

  but task is already holding lock:
   (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20

  other info that might help us debug this:
  3 locks held by mv/4020:
   #0:  (&sb->s_type->i_mutex_key#9/1){--..}, at: [<c01b2336>] do_unlinkat+0x66/0x140
   #1:  (&sb->s_type->i_mutex_key#9){--..}, at: [<c01b0954>] vfs_unlink+0x84/0x110
   #2:  (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20

  stack backtrace:
  Pid: 4020, comm: mv Not tainted 2.6.27-rc3-bisect-00448-ga7f5aaf3 #16
   [<c014e694>] validate_chain+0x984/0xea0
   [<c0108d70>] ? native_sched_clock+0x0/0xf0
   [<c014ee9c>] __lock_acquire+0x2ec/0x9b0
   [<c014f5cf>] lock_acquire+0x6f/0x90
   [<c01a90fe>] ? lock_super+0x1e/0x20
   [<c044e5fd>] mutex_lock_nested+0xad/0x300
   [<c01a90fe>] ? lock_super+0x1e/0x20
   [<c01a90fe>] ? lock_super+0x1e/0x20
   [<c01a90fe>] lock_super+0x1e/0x20
   [<f8b3a700>] fat_write_inode+0x60/0x2b0 [fat]
   [<c0450878>] ? _spin_unlock_irqrestore+0x48/0x80
   [<f8b3a953>] ? fat_sync_inode+0x3/0x20 [fat]
   [<f8b3a962>] fat_sync_inode+0x12/0x20 [fat]
   [<f8b37c7e>] fat_remove_entries+0xbe/0x120 [fat]
   [<f8b422ef>] vfat_unlink+0x5f/0x90 [vfat]
   [<f8b42290>] ? vfat_unlink+0x0/0x90 [vfat]
   [<c01b0968>] vfs_unlink+0x98/0x110
   [<c01b2400>] do_unlinkat+0x130/0x140
   [<c016a8f5>] ? audit_syscall_entry+0x105/0x150
   [<c01b253b>] sys_unlinkat+0x3b/0x40
   [<c01040d3>] sysenter_do_call+0x12/0x3f
   =======================

where the deadlock is due to the nesting of lock_super from vfat_unlink
to fat_write_inode:

 - do_unlinkat
   - vfs_unlink
     - vfat_unlink
       * lock_super
       - fat_remove_entries
         - fat_sync_inode
           - fat_write_inode
             * lock_super

and the fix is to simply remove the use of lock_super() in fat_write_inode.

The lock_super() there had been just an automatic conversion of the
kernel lock to the superblock lock, but no locking was actually needed
there, since the code in fat_write_inode already protected all relevant
accesses with a spinlock (sbi->inode_hash_lock to be exact).  The only
code inside the BKL (and thus the superblock lock) was accesses tp local
variables or calls to functions that have long been SMP-safe (i.e.
sb_bread, mark_buffe_dirty and brlese).

Bart reports:
 "Looks good.  I ran 10 parallel processes creating 1M files truncating
  them, writing to them again and then deleting them.  This patch fixes
  the issue I ran into.

  Signed-off-by: Bart Trojanowski <bart@jukie.net>"
Reported-and-tested-by: default avatarBart Trojanowski <bart@jukie.net>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 395c6846
...@@ -562,26 +562,23 @@ static int fat_write_inode(struct inode *inode, int wait) ...@@ -562,26 +562,23 @@ static int fat_write_inode(struct inode *inode, int wait)
struct buffer_head *bh; struct buffer_head *bh;
struct msdos_dir_entry *raw_entry; struct msdos_dir_entry *raw_entry;
loff_t i_pos; loff_t i_pos;
int err = 0; int err;
retry: retry:
i_pos = MSDOS_I(inode)->i_pos; i_pos = MSDOS_I(inode)->i_pos;
if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) if (inode->i_ino == MSDOS_ROOT_INO || !i_pos)
return 0; return 0;
lock_super(sb);
bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
if (!bh) { if (!bh) {
printk(KERN_ERR "FAT: unable to read inode block " printk(KERN_ERR "FAT: unable to read inode block "
"for updating (i_pos %lld)\n", i_pos); "for updating (i_pos %lld)\n", i_pos);
err = -EIO; return -EIO;
goto out;
} }
spin_lock(&sbi->inode_hash_lock); spin_lock(&sbi->inode_hash_lock);
if (i_pos != MSDOS_I(inode)->i_pos) { if (i_pos != MSDOS_I(inode)->i_pos) {
spin_unlock(&sbi->inode_hash_lock); spin_unlock(&sbi->inode_hash_lock);
brelse(bh); brelse(bh);
unlock_super(sb);
goto retry; goto retry;
} }
...@@ -607,11 +604,10 @@ static int fat_write_inode(struct inode *inode, int wait) ...@@ -607,11 +604,10 @@ static int fat_write_inode(struct inode *inode, int wait)
} }
spin_unlock(&sbi->inode_hash_lock); spin_unlock(&sbi->inode_hash_lock);
mark_buffer_dirty(bh); mark_buffer_dirty(bh);
err = 0;
if (wait) if (wait)
err = sync_dirty_buffer(bh); err = sync_dirty_buffer(bh);
brelse(bh); brelse(bh);
out:
unlock_super(sb);
return err; return err;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment