Commit b610ded7 authored by Michal Hocko's avatar Michal Hocko Committed by Linus Torvalds

hugetlb: fix lockdep splat caused by pmd sharing

Dave has reported the following lockdep splat:

  =================================
  [ INFO: inconsistent lock state ]
  3.11.0-rc1+ #9 Not tainted
  ---------------------------------
  inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage.
  kswapd0/49 [HC0[0]:SC0[0]:HE1:SE1] takes:
   (&mapping->i_mmap_mutex){+.+.?.}, at: [<c114971b>] page_referenced+0x87/0x5e3
  {RECLAIM_FS-ON-W} state was registered at:
     mark_held_locks+0x81/0xe7
     lockdep_trace_alloc+0x5e/0xbc
     __alloc_pages_nodemask+0x8b/0x9b6
     __get_free_pages+0x20/0x31
     get_zeroed_page+0x12/0x14
     __pmd_alloc+0x1c/0x6b
     huge_pmd_share+0x265/0x283
     huge_pte_alloc+0x5d/0x71
     hugetlb_fault+0x7c/0x64a
     handle_mm_fault+0x255/0x299
     __do_page_fault+0x142/0x55c
     do_page_fault+0xd/0x16
     error_code+0x6c/0x74
  irq event stamp: 3136917
  hardirqs last  enabled at (3136917):  _raw_spin_unlock_irq+0x27/0x50
  hardirqs last disabled at (3136916):  _raw_spin_lock_irq+0x15/0x78
  softirqs last  enabled at (3136180):  __do_softirq+0x137/0x30f
  softirqs last disabled at (3136175):  irq_exit+0xa8/0xaa
  other info that might help us debug this:
   Possible unsafe locking scenario:
         CPU0
         ----
    lock(&mapping->i_mmap_mutex);
    <Interrupt>
      lock(&mapping->i_mmap_mutex);

  *** DEADLOCK ***
  no locks held by kswapd0/49.

  stack backtrace:
  CPU: 1 PID: 49 Comm: kswapd0 Not tainted 3.11.0-rc1+ #9
  Hardware name: Dell Inc.                 Precision WorkStation 490    /0DT031, BIOS A08 04/25/2008
  Call Trace:
    dump_stack+0x4b/0x79
    print_usage_bug+0x1d9/0x1e3
    mark_lock+0x1e0/0x261
    __lock_acquire+0x623/0x17f2
    lock_acquire+0x7d/0x195
    mutex_lock_nested+0x6c/0x3a7
    page_referenced+0x87/0x5e3
    shrink_page_list+0x3d9/0x947
    shrink_inactive_list+0x155/0x4cb
    shrink_lruvec+0x300/0x5ce
    shrink_zone+0x53/0x14e
    kswapd+0x517/0xa75
    kthread+0xa8/0xaa
    ret_from_kernel_thread+0x1b/0x28

which is a false positive caused by hugetlb pmd sharing code which
allocates a new pmd from withing mapping->i_mmap_mutex.  If this
allocation causes reclaim then the lockdep detector complains that we
might self-deadlock.

This is not correct though, because hugetlb pages are not reclaimable so
their mapping will be never touched from the reclaim path.

The patch tells lockup detector that hugetlb i_mmap_mutex is special by
assigning it a separate lockdep class so it won't report possible
deadlocks on unrelated mappings.

[peterz@infradead.org: comment for annotation]
Reported-by: default avatarDave Jones <davej@redhat.com>
Signed-off-by: default avatarMichal Hocko <mhocko@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: default avatarMinchan Kim <minchan@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent fb32975d
...@@ -463,6 +463,14 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, ...@@ -463,6 +463,14 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
return inode; return inode;
} }
/*
* Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never
* be taken from reclaim -- unlike regular filesystems. This needs an
* annotation because huge_pmd_share() does an allocation under
* i_mmap_mutex.
*/
struct lock_class_key hugetlbfs_i_mmap_mutex_key;
static struct inode *hugetlbfs_get_inode(struct super_block *sb, static struct inode *hugetlbfs_get_inode(struct super_block *sb,
struct inode *dir, struct inode *dir,
umode_t mode, dev_t dev) umode_t mode, dev_t dev)
...@@ -474,6 +482,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, ...@@ -474,6 +482,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
struct hugetlbfs_inode_info *info; struct hugetlbfs_inode_info *info;
inode->i_ino = get_next_ino(); inode->i_ino = get_next_ino();
inode_init_owner(inode, dir, mode); inode_init_owner(inode, dir, mode);
lockdep_set_class(&inode->i_mapping->i_mmap_mutex,
&hugetlbfs_i_mmap_mutex_key);
inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_mapping->a_ops = &hugetlbfs_aops;
inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment