Commit 7d9071a0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs updates from Al Viro:
 "In this one:

   - d_move fixes (Eric Biederman)

   - UFS fixes (me; locking is mostly sane now, a bunch of bugs in error
     handling ought to be fixed)

   - switch of sb_writers to percpu rwsem (Oleg Nesterov)

   - superblock scalability (Josef Bacik and Dave Chinner)

   - swapon(2) race fix (Hugh Dickins)"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (65 commits)
  vfs: Test for and handle paths that are unreachable from their mnt_root
  dcache: Reduce the scope of i_lock in d_splice_alias
  dcache: Handle escaped paths in prepend_path
  mm: fix potential data race in SyS_swapon
  inode: don't softlockup when evicting inodes
  inode: rename i_wb_list to i_io_list
  sync: serialise per-superblock sync operations
  inode: convert inode_sb_list_lock to per-sb
  inode: add hlist_fake to avoid the inode hash lock in evict
  writeback: plug writeback at a high level
  change sb_writers to use percpu_rw_semaphore
  shift percpu_counter_destroy() into destroy_super_work()
  percpu-rwsem: kill CONFIG_PERCPU_RWSEM
  percpu-rwsem: introduce percpu_rwsem_release() and percpu_rwsem_acquire()
  percpu-rwsem: introduce percpu_down_read_trylock()
  document rwsem_release() in sb_wait_write()
  fix the broken lockdep logic in __sb_start_write()
  introduce __sb_writers_{acquired,release}() helpers
  ufs_inode_get{frag,block}(): get rid of 'phys' argument
  ufs_getfrag_block(): tidy up a bit
  ...
parents bd779669 397d425d
...@@ -93,7 +93,6 @@ config KPROBES_ON_FTRACE ...@@ -93,7 +93,6 @@ config KPROBES_ON_FTRACE
config UPROBES config UPROBES
def_bool n def_bool n
select PERCPU_RWSEM
help help
Uprobes is the user-space counterpart to kprobes: they Uprobes is the user-space counterpart to kprobes: they
enable instrumentation applications (such as 'perf probe') enable instrumentation applications (such as 'perf probe')
......
...@@ -1769,7 +1769,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) ...@@ -1769,7 +1769,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
{ {
struct inode *inode, *old_inode = NULL; struct inode *inode, *old_inode = NULL;
spin_lock(&inode_sb_list_lock); spin_lock(&blockdev_superblock->s_inode_list_lock);
list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) { list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
...@@ -1781,13 +1781,13 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) ...@@ -1781,13 +1781,13 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
} }
__iget(inode); __iget(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock); spin_unlock(&blockdev_superblock->s_inode_list_lock);
/* /*
* We hold a reference to 'inode' so it couldn't have been * We hold a reference to 'inode' so it couldn't have been
* removed from s_inodes list while we dropped the * removed from s_inodes list while we dropped the
* inode_sb_list_lock. We cannot iput the inode now as we can * s_inode_list_lock We cannot iput the inode now as we can
* be holding the last reference and we cannot iput it under * be holding the last reference and we cannot iput it under
* inode_sb_list_lock. So we keep the reference and iput it * s_inode_list_lock. So we keep the reference and iput it
* later. * later.
*/ */
iput(old_inode); iput(old_inode);
...@@ -1795,8 +1795,8 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) ...@@ -1795,8 +1795,8 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
func(I_BDEV(inode), arg); func(I_BDEV(inode), arg);
spin_lock(&inode_sb_list_lock); spin_lock(&blockdev_superblock->s_inode_list_lock);
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&blockdev_superblock->s_inode_list_lock);
iput(old_inode); iput(old_inode);
} }
...@@ -1640,9 +1640,7 @@ static void do_async_commit(struct work_struct *work) ...@@ -1640,9 +1640,7 @@ static void do_async_commit(struct work_struct *work)
* Tell lockdep about it. * Tell lockdep about it.
*/ */
if (ac->newtrans->type & __TRANS_FREEZABLE) if (ac->newtrans->type & __TRANS_FREEZABLE)
rwsem_acquire_read( __sb_writers_acquired(ac->root->fs_info->sb, SB_FREEZE_FS);
&ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
0, 1, _THIS_IP_);
current->journal_info = ac->newtrans; current->journal_info = ac->newtrans;
...@@ -1681,9 +1679,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, ...@@ -1681,9 +1679,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
* async commit thread will be the one to unlock it. * async commit thread will be the one to unlock it.
*/ */
if (ac->newtrans->type & __TRANS_FREEZABLE) if (ac->newtrans->type & __TRANS_FREEZABLE)
rwsem_release( __sb_writers_release(root->fs_info->sb, SB_FREEZE_FS);
&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1, _THIS_IP_);
schedule_work(&ac->work); schedule_work(&ac->work);
......
...@@ -2718,7 +2718,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) ...@@ -2718,7 +2718,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
* This helper attempts to cope with remotely renamed directories * This helper attempts to cope with remotely renamed directories
* *
* It assumes that the caller is already holding * It assumes that the caller is already holding
* dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock * dentry->d_parent->d_inode->i_mutex, and rename_lock
* *
* Note: If ever the locking in lock_rename() changes, then please * Note: If ever the locking in lock_rename() changes, then please
* remember to update this too... * remember to update this too...
...@@ -2744,7 +2744,6 @@ static int __d_unalias(struct inode *inode, ...@@ -2744,7 +2744,6 @@ static int __d_unalias(struct inode *inode,
__d_move(alias, dentry, false); __d_move(alias, dentry, false);
ret = 0; ret = 0;
out_err: out_err:
spin_unlock(&inode->i_lock);
if (m2) if (m2)
mutex_unlock(m2); mutex_unlock(m2);
if (m1) if (m1)
...@@ -2790,10 +2789,11 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) ...@@ -2790,10 +2789,11 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
if (S_ISDIR(inode->i_mode)) { if (S_ISDIR(inode->i_mode)) {
struct dentry *new = __d_find_any_alias(inode); struct dentry *new = __d_find_any_alias(inode);
if (unlikely(new)) { if (unlikely(new)) {
/* The reference to new ensures it remains an alias */
spin_unlock(&inode->i_lock);
write_seqlock(&rename_lock); write_seqlock(&rename_lock);
if (unlikely(d_ancestor(new, dentry))) { if (unlikely(d_ancestor(new, dentry))) {
write_sequnlock(&rename_lock); write_sequnlock(&rename_lock);
spin_unlock(&inode->i_lock);
dput(new); dput(new);
new = ERR_PTR(-ELOOP); new = ERR_PTR(-ELOOP);
pr_warn_ratelimited( pr_warn_ratelimited(
...@@ -2812,7 +2812,6 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) ...@@ -2812,7 +2812,6 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
} else { } else {
__d_move(new, dentry, false); __d_move(new, dentry, false);
write_sequnlock(&rename_lock); write_sequnlock(&rename_lock);
spin_unlock(&inode->i_lock);
security_d_instantiate(new, inode); security_d_instantiate(new, inode);
} }
iput(inode); iput(inode);
...@@ -2926,6 +2925,13 @@ static int prepend_path(const struct path *path, ...@@ -2926,6 +2925,13 @@ static int prepend_path(const struct path *path,
if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
struct mount *parent = ACCESS_ONCE(mnt->mnt_parent); struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
/* Escaped? */
if (dentry != vfsmnt->mnt_root) {
bptr = *buffer;
blen = *buflen;
error = 3;
break;
}
/* Global root? */ /* Global root? */
if (mnt != parent) { if (mnt != parent) {
dentry = ACCESS_ONCE(mnt->mnt_mountpoint); dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
......
...@@ -17,7 +17,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) ...@@ -17,7 +17,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
{ {
struct inode *inode, *toput_inode = NULL; struct inode *inode, *toput_inode = NULL;
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
...@@ -27,13 +27,15 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) ...@@ -27,13 +27,15 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
} }
__iget(inode); __iget(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
invalidate_mapping_pages(inode->i_mapping, 0, -1); invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode); iput(toput_inode);
toput_inode = inode; toput_inode = inode;
spin_lock(&inode_sb_list_lock);
spin_lock(&sb->s_inode_list_lock);
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
iput(toput_inode); iput(toput_inode);
} }
......
...@@ -88,7 +88,7 @@ unsigned int dirtytime_expire_interval = 12 * 60 * 60; ...@@ -88,7 +88,7 @@ unsigned int dirtytime_expire_interval = 12 * 60 * 60;
static inline struct inode *wb_inode(struct list_head *head) static inline struct inode *wb_inode(struct list_head *head)
{ {
return list_entry(head, struct inode, i_wb_list); return list_entry(head, struct inode, i_io_list);
} }
/* /*
...@@ -125,22 +125,22 @@ static void wb_io_lists_depopulated(struct bdi_writeback *wb) ...@@ -125,22 +125,22 @@ static void wb_io_lists_depopulated(struct bdi_writeback *wb)
} }
/** /**
* inode_wb_list_move_locked - move an inode onto a bdi_writeback IO list * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
* @inode: inode to be moved * @inode: inode to be moved
* @wb: target bdi_writeback * @wb: target bdi_writeback
* @head: one of @wb->b_{dirty|io|more_io} * @head: one of @wb->b_{dirty|io|more_io}
* *
* Move @inode->i_wb_list to @list of @wb and set %WB_has_dirty_io. * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
* Returns %true if @inode is the first occupant of the !dirty_time IO * Returns %true if @inode is the first occupant of the !dirty_time IO
* lists; otherwise, %false. * lists; otherwise, %false.
*/ */
static bool inode_wb_list_move_locked(struct inode *inode, static bool inode_io_list_move_locked(struct inode *inode,
struct bdi_writeback *wb, struct bdi_writeback *wb,
struct list_head *head) struct list_head *head)
{ {
assert_spin_locked(&wb->list_lock); assert_spin_locked(&wb->list_lock);
list_move(&inode->i_wb_list, head); list_move(&inode->i_io_list, head);
/* dirty_time doesn't count as dirty_io until expiration */ /* dirty_time doesn't count as dirty_io until expiration */
if (head != &wb->b_dirty_time) if (head != &wb->b_dirty_time)
...@@ -151,19 +151,19 @@ static bool inode_wb_list_move_locked(struct inode *inode, ...@@ -151,19 +151,19 @@ static bool inode_wb_list_move_locked(struct inode *inode,
} }
/** /**
* inode_wb_list_del_locked - remove an inode from its bdi_writeback IO list * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
* @inode: inode to be removed * @inode: inode to be removed
* @wb: bdi_writeback @inode is being removed from * @wb: bdi_writeback @inode is being removed from
* *
* Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
* clear %WB_has_dirty_io if all are empty afterwards. * clear %WB_has_dirty_io if all are empty afterwards.
*/ */
static void inode_wb_list_del_locked(struct inode *inode, static void inode_io_list_del_locked(struct inode *inode,
struct bdi_writeback *wb) struct bdi_writeback *wb)
{ {
assert_spin_locked(&wb->list_lock); assert_spin_locked(&wb->list_lock);
list_del_init(&inode->i_wb_list); list_del_init(&inode->i_io_list);
wb_io_lists_depopulated(wb); wb_io_lists_depopulated(wb);
} }
...@@ -351,7 +351,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) ...@@ -351,7 +351,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
/* /*
* Once I_FREEING is visible under i_lock, the eviction path owns * Once I_FREEING is visible under i_lock, the eviction path owns
* the inode and we shouldn't modify ->i_wb_list. * the inode and we shouldn't modify ->i_io_list.
*/ */
if (unlikely(inode->i_state & I_FREEING)) if (unlikely(inode->i_state & I_FREEING))
goto skip_switch; goto skip_switch;
...@@ -390,16 +390,16 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) ...@@ -390,16 +390,16 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
* is always correct including from ->b_dirty_time. The transfer * is always correct including from ->b_dirty_time. The transfer
* preserves @inode->dirtied_when ordering. * preserves @inode->dirtied_when ordering.
*/ */
if (!list_empty(&inode->i_wb_list)) { if (!list_empty(&inode->i_io_list)) {
struct inode *pos; struct inode *pos;
inode_wb_list_del_locked(inode, old_wb); inode_io_list_del_locked(inode, old_wb);
inode->i_wb = new_wb; inode->i_wb = new_wb;
list_for_each_entry(pos, &new_wb->b_dirty, i_wb_list) list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
if (time_after_eq(inode->dirtied_when, if (time_after_eq(inode->dirtied_when,
pos->dirtied_when)) pos->dirtied_when))
break; break;
inode_wb_list_move_locked(inode, new_wb, pos->i_wb_list.prev); inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
} else { } else {
inode->i_wb = new_wb; inode->i_wb = new_wb;
} }
...@@ -961,12 +961,12 @@ void wb_start_background_writeback(struct bdi_writeback *wb) ...@@ -961,12 +961,12 @@ void wb_start_background_writeback(struct bdi_writeback *wb)
/* /*
* Remove the inode from the writeback list it is on. * Remove the inode from the writeback list it is on.
*/ */
void inode_wb_list_del(struct inode *inode) void inode_io_list_del(struct inode *inode)
{ {
struct bdi_writeback *wb; struct bdi_writeback *wb;
wb = inode_to_wb_and_lock_list(inode); wb = inode_to_wb_and_lock_list(inode);
inode_wb_list_del_locked(inode, wb); inode_io_list_del_locked(inode, wb);
spin_unlock(&wb->list_lock); spin_unlock(&wb->list_lock);
} }
...@@ -988,7 +988,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) ...@@ -988,7 +988,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
if (time_before(inode->dirtied_when, tail->dirtied_when)) if (time_before(inode->dirtied_when, tail->dirtied_when))
inode->dirtied_when = jiffies; inode->dirtied_when = jiffies;
} }
inode_wb_list_move_locked(inode, wb, &wb->b_dirty); inode_io_list_move_locked(inode, wb, &wb->b_dirty);
} }
/* /*
...@@ -996,7 +996,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) ...@@ -996,7 +996,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
*/ */
static void requeue_io(struct inode *inode, struct bdi_writeback *wb) static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
{ {
inode_wb_list_move_locked(inode, wb, &wb->b_more_io); inode_io_list_move_locked(inode, wb, &wb->b_more_io);
} }
static void inode_sync_complete(struct inode *inode) static void inode_sync_complete(struct inode *inode)
...@@ -1055,7 +1055,7 @@ static int move_expired_inodes(struct list_head *delaying_queue, ...@@ -1055,7 +1055,7 @@ static int move_expired_inodes(struct list_head *delaying_queue,
if (older_than_this && if (older_than_this &&
inode_dirtied_after(inode, *older_than_this)) inode_dirtied_after(inode, *older_than_this))
break; break;
list_move(&inode->i_wb_list, &tmp); list_move(&inode->i_io_list, &tmp);
moved++; moved++;
if (flags & EXPIRE_DIRTY_ATIME) if (flags & EXPIRE_DIRTY_ATIME)
set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state); set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
...@@ -1078,7 +1078,7 @@ static int move_expired_inodes(struct list_head *delaying_queue, ...@@ -1078,7 +1078,7 @@ static int move_expired_inodes(struct list_head *delaying_queue,
list_for_each_prev_safe(pos, node, &tmp) { list_for_each_prev_safe(pos, node, &tmp) {
inode = wb_inode(pos); inode = wb_inode(pos);
if (inode->i_sb == sb) if (inode->i_sb == sb)
list_move(&inode->i_wb_list, dispatch_queue); list_move(&inode->i_io_list, dispatch_queue);
} }
} }
out: out:
...@@ -1232,10 +1232,10 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, ...@@ -1232,10 +1232,10 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
redirty_tail(inode, wb); redirty_tail(inode, wb);
} else if (inode->i_state & I_DIRTY_TIME) { } else if (inode->i_state & I_DIRTY_TIME) {
inode->dirtied_when = jiffies; inode->dirtied_when = jiffies;
inode_wb_list_move_locked(inode, wb, &wb->b_dirty_time); inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
} else { } else {
/* The inode is clean. Remove from writeback lists. */ /* The inode is clean. Remove from writeback lists. */
inode_wb_list_del_locked(inode, wb); inode_io_list_del_locked(inode, wb);
} }
} }
...@@ -1378,7 +1378,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, ...@@ -1378,7 +1378,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
* touch it. See comment above for explanation. * touch it. See comment above for explanation.
*/ */
if (!(inode->i_state & I_DIRTY_ALL)) if (!(inode->i_state & I_DIRTY_ALL))
inode_wb_list_del_locked(inode, wb); inode_io_list_del_locked(inode, wb);
spin_unlock(&wb->list_lock); spin_unlock(&wb->list_lock);
inode_sync_complete(inode); inode_sync_complete(inode);
out: out:
...@@ -1439,7 +1439,9 @@ static long writeback_sb_inodes(struct super_block *sb, ...@@ -1439,7 +1439,9 @@ static long writeback_sb_inodes(struct super_block *sb,
unsigned long start_time = jiffies; unsigned long start_time = jiffies;
long write_chunk; long write_chunk;
long wrote = 0; /* count both pages and inodes */ long wrote = 0; /* count both pages and inodes */
struct blk_plug plug;
blk_start_plug(&plug);
while (!list_empty(&wb->b_io)) { while (!list_empty(&wb->b_io)) {
struct inode *inode = wb_inode(wb->b_io.prev); struct inode *inode = wb_inode(wb->b_io.prev);
...@@ -1537,6 +1539,7 @@ static long writeback_sb_inodes(struct super_block *sb, ...@@ -1537,6 +1539,7 @@ static long writeback_sb_inodes(struct super_block *sb,
break; break;
} }
} }
blk_finish_plug(&plug);
return wrote; return wrote;
} }
...@@ -2088,7 +2091,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) ...@@ -2088,7 +2091,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
else else
dirty_list = &wb->b_dirty_time; dirty_list = &wb->b_dirty_time;
wakeup_bdi = inode_wb_list_move_locked(inode, wb, wakeup_bdi = inode_io_list_move_locked(inode, wb,
dirty_list); dirty_list);
spin_unlock(&wb->list_lock); spin_unlock(&wb->list_lock);
...@@ -2111,6 +2114,15 @@ void __mark_inode_dirty(struct inode *inode, int flags) ...@@ -2111,6 +2114,15 @@ void __mark_inode_dirty(struct inode *inode, int flags)
} }
EXPORT_SYMBOL(__mark_inode_dirty); EXPORT_SYMBOL(__mark_inode_dirty);
/*
* The @s_sync_lock is used to serialise concurrent sync operations
* to avoid lock contention problems with concurrent wait_sb_inodes() calls.
* Concurrent callers will block on the s_sync_lock rather than doing contending
* walks. The queueing maintains sync(2) required behaviour as all the IO that
* has been issued up to the time this function is enter is guaranteed to be
* completed by the time we have gained the lock and waited for all IO that is
* in progress regardless of the order callers are granted the lock.
*/
static void wait_sb_inodes(struct super_block *sb) static void wait_sb_inodes(struct super_block *sb)
{ {
struct inode *inode, *old_inode = NULL; struct inode *inode, *old_inode = NULL;
...@@ -2121,7 +2133,8 @@ static void wait_sb_inodes(struct super_block *sb) ...@@ -2121,7 +2133,8 @@ static void wait_sb_inodes(struct super_block *sb)
*/ */
WARN_ON(!rwsem_is_locked(&sb->s_umount)); WARN_ON(!rwsem_is_locked(&sb->s_umount));
spin_lock(&inode_sb_list_lock); mutex_lock(&sb->s_sync_lock);
spin_lock(&sb->s_inode_list_lock);
/* /*
* Data integrity sync. Must wait for all pages under writeback, * Data integrity sync. Must wait for all pages under writeback,
...@@ -2141,14 +2154,14 @@ static void wait_sb_inodes(struct super_block *sb) ...@@ -2141,14 +2154,14 @@ static void wait_sb_inodes(struct super_block *sb)
} }
__iget(inode); __iget(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
/* /*
* We hold a reference to 'inode' so it couldn't have been * We hold a reference to 'inode' so it couldn't have been
* removed from s_inodes list while we dropped the * removed from s_inodes list while we dropped the
* inode_sb_list_lock. We cannot iput the inode now as we can * s_inode_list_lock. We cannot iput the inode now as we can
* be holding the last reference and we cannot iput it under * be holding the last reference and we cannot iput it under
* inode_sb_list_lock. So we keep the reference and iput it * s_inode_list_lock. So we keep the reference and iput it
* later. * later.
*/ */
iput(old_inode); iput(old_inode);
...@@ -2158,10 +2171,11 @@ static void wait_sb_inodes(struct super_block *sb) ...@@ -2158,10 +2171,11 @@ static void wait_sb_inodes(struct super_block *sb)
cond_resched(); cond_resched();
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
iput(old_inode); iput(old_inode);
mutex_unlock(&sb->s_sync_lock);
} }
static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr, static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
......
...@@ -28,16 +28,16 @@ ...@@ -28,16 +28,16 @@
* inode->i_state, inode->i_hash, __iget() * inode->i_state, inode->i_hash, __iget()
* Inode LRU list locks protect: * Inode LRU list locks protect:
* inode->i_sb->s_inode_lru, inode->i_lru * inode->i_sb->s_inode_lru, inode->i_lru
* inode_sb_list_lock protects: * inode->i_sb->s_inode_list_lock protects:
* sb->s_inodes, inode->i_sb_list * inode->i_sb->s_inodes, inode->i_sb_list
* bdi->wb.list_lock protects: * bdi->wb.list_lock protects:
* bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
* inode_hash_lock protects: * inode_hash_lock protects:
* inode_hashtable, inode->i_hash * inode_hashtable, inode->i_hash
* *
* Lock ordering: * Lock ordering:
* *
* inode_sb_list_lock * inode->i_sb->s_inode_list_lock
* inode->i_lock * inode->i_lock
* Inode LRU list locks * Inode LRU list locks
* *
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
* inode->i_lock * inode->i_lock
* *
* inode_hash_lock * inode_hash_lock
* inode_sb_list_lock * inode->i_sb->s_inode_list_lock
* inode->i_lock * inode->i_lock
* *
* iunique_lock * iunique_lock
...@@ -57,8 +57,6 @@ static unsigned int i_hash_shift __read_mostly; ...@@ -57,8 +57,6 @@ static unsigned int i_hash_shift __read_mostly;
static struct hlist_head *inode_hashtable __read_mostly; static struct hlist_head *inode_hashtable __read_mostly;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
/* /*
* Empty aops. Can be used for the cases where the user does not * Empty aops. Can be used for the cases where the user does not
* define any of the address_space operations. * define any of the address_space operations.
...@@ -359,7 +357,7 @@ void inode_init_once(struct inode *inode) ...@@ -359,7 +357,7 @@ void inode_init_once(struct inode *inode)
memset(inode, 0, sizeof(*inode)); memset(inode, 0, sizeof(*inode));
INIT_HLIST_NODE(&inode->i_hash); INIT_HLIST_NODE(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_devices); INIT_LIST_HEAD(&inode->i_devices);
INIT_LIST_HEAD(&inode->i_wb_list); INIT_LIST_HEAD(&inode->i_io_list);
INIT_LIST_HEAD(&inode->i_lru); INIT_LIST_HEAD(&inode->i_lru);
address_space_init_once(&inode->i_data); address_space_init_once(&inode->i_data);
i_size_ordered_init(inode); i_size_ordered_init(inode);
...@@ -426,18 +424,18 @@ static void inode_lru_list_del(struct inode *inode) ...@@ -426,18 +424,18 @@ static void inode_lru_list_del(struct inode *inode)
*/ */
void inode_sb_list_add(struct inode *inode) void inode_sb_list_add(struct inode *inode)
{ {
spin_lock(&inode_sb_list_lock); spin_lock(&inode->i_sb->s_inode_list_lock);
list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
spin_unlock(&inode_sb_list_lock); spin_unlock(&inode->i_sb->s_inode_list_lock);
} }
EXPORT_SYMBOL_GPL(inode_sb_list_add); EXPORT_SYMBOL_GPL(inode_sb_list_add);
static inline void inode_sb_list_del(struct inode *inode) static inline void inode_sb_list_del(struct inode *inode)
{ {
if (!list_empty(&inode->i_sb_list)) { if (!list_empty(&inode->i_sb_list)) {
spin_lock(&inode_sb_list_lock); spin_lock(&inode->i_sb->s_inode_list_lock);
list_del_init(&inode->i_sb_list); list_del_init(&inode->i_sb_list);
spin_unlock(&inode_sb_list_lock); spin_unlock(&inode->i_sb->s_inode_list_lock);
} }
} }
...@@ -527,8 +525,8 @@ static void evict(struct inode *inode) ...@@ -527,8 +525,8 @@ static void evict(struct inode *inode)
BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(!(inode->i_state & I_FREEING));
BUG_ON(!list_empty(&inode->i_lru)); BUG_ON(!list_empty(&inode->i_lru));
if (!list_empty(&inode->i_wb_list)) if (!list_empty(&inode->i_io_list))
inode_wb_list_del(inode); inode_io_list_del(inode);
inode_sb_list_del(inode); inode_sb_list_del(inode);
...@@ -577,6 +575,7 @@ static void dispose_list(struct list_head *head) ...@@ -577,6 +575,7 @@ static void dispose_list(struct list_head *head)
list_del_init(&inode->i_lru); list_del_init(&inode->i_lru);
evict(inode); evict(inode);
cond_resched();
} }
} }
...@@ -594,7 +593,8 @@ void evict_inodes(struct super_block *sb) ...@@ -594,7 +593,8 @@ void evict_inodes(struct super_block *sb)
struct inode *inode, *next; struct inode *inode, *next;
LIST_HEAD(dispose); LIST_HEAD(dispose);
spin_lock(&inode_sb_list_lock); again:
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
if (atomic_read(&inode->i_count)) if (atomic_read(&inode->i_count))
continue; continue;
...@@ -609,8 +609,20 @@ void evict_inodes(struct super_block *sb) ...@@ -609,8 +609,20 @@ void evict_inodes(struct super_block *sb)
inode_lru_list_del(inode); inode_lru_list_del(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
list_add(&inode->i_lru, &dispose); list_add(&inode->i_lru, &dispose);
/*
* We can have a ton of inodes to evict at unmount time given
* enough memory, check to see if we need to go to sleep for a
* bit so we don't livelock.
*/
if (need_resched()) {
spin_unlock(&sb->s_inode_list_lock);
cond_resched();
dispose_list(&dispose);
goto again;
}
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
dispose_list(&dispose); dispose_list(&dispose);
} }
...@@ -631,7 +643,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) ...@@ -631,7 +643,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
struct inode *inode, *next; struct inode *inode, *next;
LIST_HEAD(dispose); LIST_HEAD(dispose);
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
...@@ -654,7 +666,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) ...@@ -654,7 +666,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
list_add(&inode->i_lru, &dispose); list_add(&inode->i_lru, &dispose);
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
dispose_list(&dispose); dispose_list(&dispose);
...@@ -890,7 +902,7 @@ struct inode *new_inode(struct super_block *sb) ...@@ -890,7 +902,7 @@ struct inode *new_inode(struct super_block *sb)
{ {
struct inode *inode; struct inode *inode;
spin_lock_prefetch(&inode_sb_list_lock); spin_lock_prefetch(&sb->s_inode_list_lock);
inode = new_inode_pseudo(sb); inode = new_inode_pseudo(sb);
if (inode) if (inode)
......
...@@ -112,14 +112,13 @@ extern int vfs_open(const struct path *, struct file *, const struct cred *); ...@@ -112,14 +112,13 @@ extern int vfs_open(const struct path *, struct file *, const struct cred *);
/* /*
* inode.c * inode.c
*/ */
extern spinlock_t inode_sb_list_lock;
extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
extern void inode_add_lru(struct inode *inode); extern void inode_add_lru(struct inode *inode);
/* /*
* fs-writeback.c * fs-writeback.c
*/ */
extern void inode_wb_list_del(struct inode *inode); extern void inode_io_list_del(struct inode *inode);
extern long get_nr_dirty_inodes(void); extern long get_nr_dirty_inodes(void);
extern void evict_inodes(struct super_block *); extern void evict_inodes(struct super_block *);
......
...@@ -560,6 +560,24 @@ static int __nd_alloc_stack(struct nameidata *nd) ...@@ -560,6 +560,24 @@ static int __nd_alloc_stack(struct nameidata *nd)
return 0; return 0;
} }
/**
* path_connected - Verify that a path->dentry is below path->mnt.mnt_root
* @path: nameidate to verify
*
* Rename can sometimes move a file or directory outside of a bind
* mount, path_connected allows those cases to be detected.
*/
static bool path_connected(const struct path *path)
{
struct vfsmount *mnt = path->mnt;
/* Only bind mounts can have disconnected paths */
if (mnt->mnt_root == mnt->mnt_sb->s_root)
return true;
return is_subdir(path->dentry, mnt->mnt_root);
}
static inline int nd_alloc_stack(struct nameidata *nd) static inline int nd_alloc_stack(struct nameidata *nd)
{ {
if (likely(nd->depth != EMBEDDED_LEVELS)) if (likely(nd->depth != EMBEDDED_LEVELS))
...@@ -1296,6 +1314,8 @@ static int follow_dotdot_rcu(struct nameidata *nd) ...@@ -1296,6 +1314,8 @@ static int follow_dotdot_rcu(struct nameidata *nd)
return -ECHILD; return -ECHILD;
nd->path.dentry = parent; nd->path.dentry = parent;
nd->seq = seq; nd->seq = seq;
if (unlikely(!path_connected(&nd->path)))
return -ENOENT;
break; break;
} else { } else {
struct mount *mnt = real_mount(nd->path.mnt); struct mount *mnt = real_mount(nd->path.mnt);
...@@ -1396,7 +1416,7 @@ static void follow_mount(struct path *path) ...@@ -1396,7 +1416,7 @@ static void follow_mount(struct path *path)
} }
} }
static void follow_dotdot(struct nameidata *nd) static int follow_dotdot(struct nameidata *nd)
{ {
if (!nd->root.mnt) if (!nd->root.mnt)
set_root(nd); set_root(nd);
...@@ -1412,6 +1432,8 @@ static void follow_dotdot(struct nameidata *nd) ...@@ -1412,6 +1432,8 @@ static void follow_dotdot(struct nameidata *nd)
/* rare case of legitimate dget_parent()... */ /* rare case of legitimate dget_parent()... */
nd->path.dentry = dget_parent(nd->path.dentry); nd->path.dentry = dget_parent(nd->path.dentry);
dput(old); dput(old);
if (unlikely(!path_connected(&nd->path)))
return -ENOENT;
break; break;
} }
if (!follow_up(&nd->path)) if (!follow_up(&nd->path))
...@@ -1419,6 +1441,7 @@ static void follow_dotdot(struct nameidata *nd) ...@@ -1419,6 +1441,7 @@ static void follow_dotdot(struct nameidata *nd)
} }
follow_mount(&nd->path); follow_mount(&nd->path);
nd->inode = nd->path.dentry->d_inode; nd->inode = nd->path.dentry->d_inode;
return 0;
} }
/* /*
...@@ -1634,7 +1657,7 @@ static inline int handle_dots(struct nameidata *nd, int type) ...@@ -1634,7 +1657,7 @@ static inline int handle_dots(struct nameidata *nd, int type)
if (nd->flags & LOOKUP_RCU) { if (nd->flags & LOOKUP_RCU) {
return follow_dotdot_rcu(nd); return follow_dotdot_rcu(nd);
} else } else
follow_dotdot(nd); return follow_dotdot(nd);
} }
return 0; return 0;
} }
......
...@@ -143,17 +143,17 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, ...@@ -143,17 +143,17 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
/** /**
* fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
* @list: list of inodes being unmounted (sb->s_inodes) * @sb: superblock being unmounted.
* *
* Called during unmount with no locks held, so needs to be safe against * Called during unmount with no locks held, so needs to be safe against
* concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block. * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
*/ */
void fsnotify_unmount_inodes(struct list_head *list) void fsnotify_unmount_inodes(struct super_block *sb)
{ {
struct inode *inode, *next_i, *need_iput = NULL; struct inode *inode, *next_i, *need_iput = NULL;
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
list_for_each_entry_safe(inode, next_i, list, i_sb_list) { list_for_each_entry_safe(inode, next_i, &sb->s_inodes, i_sb_list) {
struct inode *need_iput_tmp; struct inode *need_iput_tmp;
/* /*
...@@ -189,7 +189,7 @@ void fsnotify_unmount_inodes(struct list_head *list) ...@@ -189,7 +189,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
/* In case the dropping of a reference would nuke next_i. */ /* In case the dropping of a reference would nuke next_i. */
while (&next_i->i_sb_list != list) { while (&next_i->i_sb_list != &sb->s_inodes) {
spin_lock(&next_i->i_lock); spin_lock(&next_i->i_lock);
if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) && if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
atomic_read(&next_i->i_count)) { atomic_read(&next_i->i_count)) {
...@@ -204,12 +204,12 @@ void fsnotify_unmount_inodes(struct list_head *list) ...@@ -204,12 +204,12 @@ void fsnotify_unmount_inodes(struct list_head *list)
} }
/* /*
* We can safely drop inode_sb_list_lock here because either * We can safely drop s_inode_list_lock here because either
* we actually hold references on both inode and next_i or * we actually hold references on both inode and next_i or
* end of list. Also no new inodes will be added since the * end of list. Also no new inodes will be added since the
* umount has begun. * umount has begun.
*/ */
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
if (need_iput_tmp) if (need_iput_tmp)
iput(need_iput_tmp); iput(need_iput_tmp);
...@@ -221,7 +221,7 @@ void fsnotify_unmount_inodes(struct list_head *list) ...@@ -221,7 +221,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
iput(inode); iput(inode);
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
} }
...@@ -928,7 +928,7 @@ static void add_dquot_ref(struct super_block *sb, int type) ...@@ -928,7 +928,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
int reserved = 0; int reserved = 0;
#endif #endif
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
...@@ -939,7 +939,7 @@ static void add_dquot_ref(struct super_block *sb, int type) ...@@ -939,7 +939,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
} }
__iget(inode); __iget(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
#ifdef CONFIG_QUOTA_DEBUG #ifdef CONFIG_QUOTA_DEBUG
if (unlikely(inode_get_rsv_space(inode) > 0)) if (unlikely(inode_get_rsv_space(inode) > 0))
...@@ -951,15 +951,15 @@ static void add_dquot_ref(struct super_block *sb, int type) ...@@ -951,15 +951,15 @@ static void add_dquot_ref(struct super_block *sb, int type)
/* /*
* We hold a reference to 'inode' so it couldn't have been * We hold a reference to 'inode' so it couldn't have been
* removed from s_inodes list while we dropped the * removed from s_inodes list while we dropped the
* inode_sb_list_lock We cannot iput the inode now as we can be * s_inode_list_lock. We cannot iput the inode now as we can be
* holding the last reference and we cannot iput it under * holding the last reference and we cannot iput it under
* inode_sb_list_lock. So we keep the reference and iput it * s_inode_list_lock. So we keep the reference and iput it
* later. * later.
*/ */
old_inode = inode; old_inode = inode;
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
iput(old_inode); iput(old_inode);
#ifdef CONFIG_QUOTA_DEBUG #ifdef CONFIG_QUOTA_DEBUG
...@@ -1028,7 +1028,7 @@ static void remove_dquot_ref(struct super_block *sb, int type, ...@@ -1028,7 +1028,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
struct inode *inode; struct inode *inode;
int reserved = 0; int reserved = 0;
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
/* /*
* We have to scan also I_NEW inodes because they can already * We have to scan also I_NEW inodes because they can already
...@@ -1044,7 +1044,7 @@ static void remove_dquot_ref(struct super_block *sb, int type, ...@@ -1044,7 +1044,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
} }
spin_unlock(&dq_data_lock); spin_unlock(&dq_data_lock);
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
#ifdef CONFIG_QUOTA_DEBUG #ifdef CONFIG_QUOTA_DEBUG
if (reserved) { if (reserved) {
printk(KERN_WARNING "VFS (%s): Writes happened after quota" printk(KERN_WARNING "VFS (%s): Writes happened after quota"
......
...@@ -135,6 +135,24 @@ static unsigned long super_cache_count(struct shrinker *shrink, ...@@ -135,6 +135,24 @@ static unsigned long super_cache_count(struct shrinker *shrink,
return total_objects; return total_objects;
} }
static void destroy_super_work(struct work_struct *work)
{
struct super_block *s = container_of(work, struct super_block,
destroy_work);
int i;
for (i = 0; i < SB_FREEZE_LEVELS; i++)
percpu_free_rwsem(&s->s_writers.rw_sem[i]);
kfree(s);
}
static void destroy_super_rcu(struct rcu_head *head)
{
struct super_block *s = container_of(head, struct super_block, rcu);
INIT_WORK(&s->destroy_work, destroy_super_work);
schedule_work(&s->destroy_work);
}
/** /**
* destroy_super - frees a superblock * destroy_super - frees a superblock
* @s: superblock to free * @s: superblock to free
...@@ -143,16 +161,13 @@ static unsigned long super_cache_count(struct shrinker *shrink, ...@@ -143,16 +161,13 @@ static unsigned long super_cache_count(struct shrinker *shrink,
*/ */
static void destroy_super(struct super_block *s) static void destroy_super(struct super_block *s)
{ {
int i;
list_lru_destroy(&s->s_dentry_lru); list_lru_destroy(&s->s_dentry_lru);
list_lru_destroy(&s->s_inode_lru); list_lru_destroy(&s->s_inode_lru);
for (i = 0; i < SB_FREEZE_LEVELS; i++)
percpu_counter_destroy(&s->s_writers.counter[i]);
security_sb_free(s); security_sb_free(s);
WARN_ON(!list_empty(&s->s_mounts)); WARN_ON(!list_empty(&s->s_mounts));
kfree(s->s_subtype); kfree(s->s_subtype);
kfree(s->s_options); kfree(s->s_options);
kfree_rcu(s, rcu); call_rcu(&s->rcu, destroy_super_rcu);
} }
/** /**
...@@ -178,19 +193,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) ...@@ -178,19 +193,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
goto fail; goto fail;
for (i = 0; i < SB_FREEZE_LEVELS; i++) { for (i = 0; i < SB_FREEZE_LEVELS; i++) {
if (percpu_counter_init(&s->s_writers.counter[i], 0, if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
GFP_KERNEL) < 0) sb_writers_name[i],
&type->s_writers_key[i]))
goto fail; goto fail;
lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
&type->s_writers_key[i], 0);
} }
init_waitqueue_head(&s->s_writers.wait);
init_waitqueue_head(&s->s_writers.wait_unfrozen); init_waitqueue_head(&s->s_writers.wait_unfrozen);
s->s_bdi = &noop_backing_dev_info; s->s_bdi = &noop_backing_dev_info;
s->s_flags = flags; s->s_flags = flags;
INIT_HLIST_NODE(&s->s_instances); INIT_HLIST_NODE(&s->s_instances);
INIT_HLIST_BL_HEAD(&s->s_anon); INIT_HLIST_BL_HEAD(&s->s_anon);
mutex_init(&s->s_sync_lock);
INIT_LIST_HEAD(&s->s_inodes); INIT_LIST_HEAD(&s->s_inodes);
spin_lock_init(&s->s_inode_list_lock);
if (list_lru_init_memcg(&s->s_dentry_lru)) if (list_lru_init_memcg(&s->s_dentry_lru))
goto fail; goto fail;
...@@ -399,7 +414,7 @@ void generic_shutdown_super(struct super_block *sb) ...@@ -399,7 +414,7 @@ void generic_shutdown_super(struct super_block *sb)
sync_filesystem(sb); sync_filesystem(sb);
sb->s_flags &= ~MS_ACTIVE; sb->s_flags &= ~MS_ACTIVE;
fsnotify_unmount_inodes(&sb->s_inodes); fsnotify_unmount_inodes(sb);
evict_inodes(sb); evict_inodes(sb);
...@@ -1146,72 +1161,46 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) ...@@ -1146,72 +1161,46 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
*/ */
void __sb_end_write(struct super_block *sb, int level) void __sb_end_write(struct super_block *sb, int level)
{ {
percpu_counter_dec(&sb->s_writers.counter[level-1]); percpu_up_read(sb->s_writers.rw_sem + level-1);
/*
* Make sure s_writers are updated before we wake up waiters in
* freeze_super().
*/
smp_mb();
if (waitqueue_active(&sb->s_writers.wait))
wake_up(&sb->s_writers.wait);
rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
} }
EXPORT_SYMBOL(__sb_end_write); EXPORT_SYMBOL(__sb_end_write);
#ifdef CONFIG_LOCKDEP
/*
* We want lockdep to tell us about possible deadlocks with freezing but
* it's it bit tricky to properly instrument it. Getting a freeze protection
* works as getting a read lock but there are subtle problems. XFS for example
* gets freeze protection on internal level twice in some cases, which is OK
* only because we already hold a freeze protection also on higher level. Due
* to these cases we have to tell lockdep we are doing trylock when we
* already hold a freeze protection for a higher freeze level.
*/
static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
unsigned long ip)
{
int i;
if (!trylock) {
for (i = 0; i < level - 1; i++)
if (lock_is_held(&sb->s_writers.lock_map[i])) {
trylock = true;
break;
}
}
rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
}
#endif
/* /*
* This is an internal function, please use sb_start_{write,pagefault,intwrite} * This is an internal function, please use sb_start_{write,pagefault,intwrite}
* instead. * instead.
*/ */
int __sb_start_write(struct super_block *sb, int level, bool wait) int __sb_start_write(struct super_block *sb, int level, bool wait)
{ {
retry: bool force_trylock = false;
if (unlikely(sb->s_writers.frozen >= level)) { int ret = 1;
if (!wait)
return 0;
wait_event(sb->s_writers.wait_unfrozen,
sb->s_writers.frozen < level);
}
#ifdef CONFIG_LOCKDEP #ifdef CONFIG_LOCKDEP
acquire_freeze_lock(sb, level, !wait, _RET_IP_);
#endif
percpu_counter_inc(&sb->s_writers.counter[level-1]);
/* /*
* Make sure counter is updated before we check for frozen. * We want lockdep to tell us about possible deadlocks with freezing
* freeze_super() first sets frozen and then checks the counter. * but it's it bit tricky to properly instrument it. Getting a freeze
* protection works as getting a read lock but there are subtle
* problems. XFS for example gets freeze protection on internal level
* twice in some cases, which is OK only because we already hold a
* freeze protection also on higher level. Due to these cases we have
* to use wait == F (trylock mode) which must not fail.
*/ */
smp_mb(); if (wait) {
if (unlikely(sb->s_writers.frozen >= level)) { int i;
__sb_end_write(sb, level);
goto retry; for (i = 0; i < level - 1; i++)
if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
force_trylock = true;
break;
} }
return 1; }
#endif
if (wait && !force_trylock)
percpu_down_read(sb->s_writers.rw_sem + level-1);
else
ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
WARN_ON(force_trylock & !ret);
return ret;
} }
EXPORT_SYMBOL(__sb_start_write); EXPORT_SYMBOL(__sb_start_write);
...@@ -1221,37 +1210,33 @@ EXPORT_SYMBOL(__sb_start_write); ...@@ -1221,37 +1210,33 @@ EXPORT_SYMBOL(__sb_start_write);
* @level: type of writers we wait for (normal vs page fault) * @level: type of writers we wait for (normal vs page fault)
* *
* This function waits until there are no writers of given type to given file * This function waits until there are no writers of given type to given file
* system. Caller of this function should make sure there can be no new writers * system.
* of type @level before calling this function. Otherwise this function can
* livelock.
*/ */
static void sb_wait_write(struct super_block *sb, int level) static void sb_wait_write(struct super_block *sb, int level)
{ {
s64 writers; percpu_down_write(sb->s_writers.rw_sem + level-1);
/* /*
* We just cycle-through lockdep here so that it does not complain * We are going to return to userspace and forget about this lock, the
* about returning with lock to userspace * ownership goes to the caller of thaw_super() which does unlock.
*
* FIXME: we should do this before return from freeze_super() after we
* called sync_filesystem(sb) and s_op->freeze_fs(sb), and thaw_super()
* should re-acquire these locks before s_op->unfreeze_fs(sb). However
* this leads to lockdep false-positives, so currently we do the early
* release right after acquire.
*/ */
rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_); percpu_rwsem_release(sb->s_writers.rw_sem + level-1, 0, _THIS_IP_);
rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_); }
do {
DEFINE_WAIT(wait);
/* static void sb_freeze_unlock(struct super_block *sb)
* We use a barrier in prepare_to_wait() to separate setting {
* of frozen and checking of the counter int level;
*/
prepare_to_wait(&sb->s_writers.wait, &wait,
TASK_UNINTERRUPTIBLE);
writers = percpu_counter_sum(&sb->s_writers.counter[level-1]); for (level = 0; level < SB_FREEZE_LEVELS; ++level)
if (writers) percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
schedule();
finish_wait(&sb->s_writers.wait, &wait); for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
} while (writers); percpu_up_write(sb->s_writers.rw_sem + level);
} }
/** /**
...@@ -1310,20 +1295,14 @@ int freeze_super(struct super_block *sb) ...@@ -1310,20 +1295,14 @@ int freeze_super(struct super_block *sb)
return 0; return 0;
} }
/* From now on, no new normal writers can start */
sb->s_writers.frozen = SB_FREEZE_WRITE; sb->s_writers.frozen = SB_FREEZE_WRITE;
smp_wmb();
/* Release s_umount to preserve sb_start_write -> s_umount ordering */ /* Release s_umount to preserve sb_start_write -> s_umount ordering */
up_write(&sb->s_umount); up_write(&sb->s_umount);
sb_wait_write(sb, SB_FREEZE_WRITE); sb_wait_write(sb, SB_FREEZE_WRITE);
down_write(&sb->s_umount);
/* Now we go and block page faults... */ /* Now we go and block page faults... */
down_write(&sb->s_umount);
sb->s_writers.frozen = SB_FREEZE_PAGEFAULT; sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
smp_wmb();
sb_wait_write(sb, SB_FREEZE_PAGEFAULT); sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
/* All writers are done so after syncing there won't be dirty data */ /* All writers are done so after syncing there won't be dirty data */
...@@ -1331,7 +1310,6 @@ int freeze_super(struct super_block *sb) ...@@ -1331,7 +1310,6 @@ int freeze_super(struct super_block *sb)
/* Now wait for internal filesystem counter */ /* Now wait for internal filesystem counter */
sb->s_writers.frozen = SB_FREEZE_FS; sb->s_writers.frozen = SB_FREEZE_FS;
smp_wmb();
sb_wait_write(sb, SB_FREEZE_FS); sb_wait_write(sb, SB_FREEZE_FS);
if (sb->s_op->freeze_fs) { if (sb->s_op->freeze_fs) {
...@@ -1340,7 +1318,7 @@ int freeze_super(struct super_block *sb) ...@@ -1340,7 +1318,7 @@ int freeze_super(struct super_block *sb)
printk(KERN_ERR printk(KERN_ERR
"VFS:Filesystem freeze failed\n"); "VFS:Filesystem freeze failed\n");
sb->s_writers.frozen = SB_UNFROZEN; sb->s_writers.frozen = SB_UNFROZEN;
smp_wmb(); sb_freeze_unlock(sb);
wake_up(&sb->s_writers.wait_unfrozen); wake_up(&sb->s_writers.wait_unfrozen);
deactivate_locked_super(sb); deactivate_locked_super(sb);
return ret; return ret;
...@@ -1372,8 +1350,10 @@ int thaw_super(struct super_block *sb) ...@@ -1372,8 +1350,10 @@ int thaw_super(struct super_block *sb)
return -EINVAL; return -EINVAL;
} }
if (sb->s_flags & MS_RDONLY) if (sb->s_flags & MS_RDONLY) {
sb->s_writers.frozen = SB_UNFROZEN;
goto out; goto out;
}
if (sb->s_op->unfreeze_fs) { if (sb->s_op->unfreeze_fs) {
error = sb->s_op->unfreeze_fs(sb); error = sb->s_op->unfreeze_fs(sb);
...@@ -1385,12 +1365,11 @@ int thaw_super(struct super_block *sb) ...@@ -1385,12 +1365,11 @@ int thaw_super(struct super_block *sb)
} }
} }
out:
sb->s_writers.frozen = SB_UNFROZEN; sb->s_writers.frozen = SB_UNFROZEN;
smp_wmb(); sb_freeze_unlock(sb);
out:
wake_up(&sb->s_writers.wait_unfrozen); wake_up(&sb->s_writers.wait_unfrozen);
deactivate_locked_super(sb); deactivate_locked_super(sb);
return 0; return 0;
} }
EXPORT_SYMBOL(thaw_super); EXPORT_SYMBOL(thaw_super);
...@@ -5,5 +5,5 @@ ...@@ -5,5 +5,5 @@
obj-$(CONFIG_UFS_FS) += ufs.o obj-$(CONFIG_UFS_FS) += ufs.o
ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \ ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \
namei.o super.o symlink.o truncate.o util.o namei.o super.o symlink.o util.o
ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG
...@@ -417,7 +417,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, ...@@ -417,7 +417,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
if (oldcount == 0) { if (oldcount == 0) {
result = ufs_alloc_fragments (inode, cgno, goal, count, err); result = ufs_alloc_fragments (inode, cgno, goal, count, err);
if (result) { if (result) {
write_seqlock(&UFS_I(inode)->meta_lock);
ufs_cpu_to_data_ptr(sb, p, result); ufs_cpu_to_data_ptr(sb, p, result);
write_sequnlock(&UFS_I(inode)->meta_lock);
*err = 0; *err = 0;
UFS_I(inode)->i_lastfrag = UFS_I(inode)->i_lastfrag =
max(UFS_I(inode)->i_lastfrag, fragment + count); max(UFS_I(inode)->i_lastfrag, fragment + count);
...@@ -473,7 +475,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, ...@@ -473,7 +475,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
ufs_change_blocknr(inode, fragment - oldcount, oldcount, ufs_change_blocknr(inode, fragment - oldcount, oldcount,
uspi->s_sbbase + tmp, uspi->s_sbbase + tmp,
uspi->s_sbbase + result, locked_page); uspi->s_sbbase + result, locked_page);
write_seqlock(&UFS_I(inode)->meta_lock);
ufs_cpu_to_data_ptr(sb, p, result); ufs_cpu_to_data_ptr(sb, p, result);
write_sequnlock(&UFS_I(inode)->meta_lock);
*err = 0; *err = 0;
UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
fragment + count); fragment + count);
......
This diff is collapsed.
...@@ -94,22 +94,6 @@ ...@@ -94,22 +94,6 @@
#include "swab.h" #include "swab.h"
#include "util.h" #include "util.h"
void lock_ufs(struct super_block *sb)
{
struct ufs_sb_info *sbi = UFS_SB(sb);
mutex_lock(&sbi->mutex);
sbi->mutex_owner = current;
}
void unlock_ufs(struct super_block *sb)
{
struct ufs_sb_info *sbi = UFS_SB(sb);
sbi->mutex_owner = NULL;
mutex_unlock(&sbi->mutex);
}
static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation)
{ {
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
...@@ -694,7 +678,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait) ...@@ -694,7 +678,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
struct ufs_super_block_third * usb3; struct ufs_super_block_third * usb3;
unsigned flags; unsigned flags;
lock_ufs(sb);
mutex_lock(&UFS_SB(sb)->s_lock); mutex_lock(&UFS_SB(sb)->s_lock);
UFSD("ENTER\n"); UFSD("ENTER\n");
...@@ -714,7 +697,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait) ...@@ -714,7 +697,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
UFSD("EXIT\n"); UFSD("EXIT\n");
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return 0; return 0;
} }
...@@ -758,7 +740,6 @@ static void ufs_put_super(struct super_block *sb) ...@@ -758,7 +740,6 @@ static void ufs_put_super(struct super_block *sb)
ubh_brelse_uspi (sbi->s_uspi); ubh_brelse_uspi (sbi->s_uspi);
kfree (sbi->s_uspi); kfree (sbi->s_uspi);
mutex_destroy(&sbi->mutex);
kfree (sbi); kfree (sbi);
sb->s_fs_info = NULL; sb->s_fs_info = NULL;
UFSD("EXIT\n"); UFSD("EXIT\n");
...@@ -801,7 +782,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -801,7 +782,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY));
mutex_init(&sbi->mutex);
mutex_init(&sbi->s_lock); mutex_init(&sbi->s_lock);
spin_lock_init(&sbi->work_lock); spin_lock_init(&sbi->work_lock);
INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
...@@ -1257,7 +1237,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1257,7 +1237,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
return 0; return 0;
failed: failed:
mutex_destroy(&sbi->mutex);
if (ubh) if (ubh)
ubh_brelse_uspi (uspi); ubh_brelse_uspi (uspi);
kfree (uspi); kfree (uspi);
...@@ -1280,7 +1259,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ...@@ -1280,7 +1259,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
unsigned flags; unsigned flags;
sync_filesystem(sb); sync_filesystem(sb);
lock_ufs(sb);
mutex_lock(&UFS_SB(sb)->s_lock); mutex_lock(&UFS_SB(sb)->s_lock);
uspi = UFS_SB(sb)->s_uspi; uspi = UFS_SB(sb)->s_uspi;
flags = UFS_SB(sb)->s_flags; flags = UFS_SB(sb)->s_flags;
...@@ -1296,7 +1274,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ...@@ -1296,7 +1274,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufs_set_opt (new_mount_opt, ONERROR_LOCK); ufs_set_opt (new_mount_opt, ONERROR_LOCK);
if (!ufs_parse_options (data, &new_mount_opt)) { if (!ufs_parse_options (data, &new_mount_opt)) {
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL; return -EINVAL;
} }
if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) { if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
...@@ -1304,14 +1281,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ...@@ -1304,14 +1281,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
pr_err("ufstype can't be changed during remount\n"); pr_err("ufstype can't be changed during remount\n");
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL; return -EINVAL;
} }
if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
UFS_SB(sb)->s_mount_opt = new_mount_opt; UFS_SB(sb)->s_mount_opt = new_mount_opt;
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return 0; return 0;
} }
...@@ -1335,7 +1310,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ...@@ -1335,7 +1310,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
#ifndef CONFIG_UFS_FS_WRITE #ifndef CONFIG_UFS_FS_WRITE
pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n"); pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL; return -EINVAL;
#else #else
if (ufstype != UFS_MOUNT_UFSTYPE_SUN && if (ufstype != UFS_MOUNT_UFSTYPE_SUN &&
...@@ -1345,13 +1319,11 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ...@@ -1345,13 +1319,11 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufstype != UFS_MOUNT_UFSTYPE_UFS2) { ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
pr_err("this ufstype is read-only supported\n"); pr_err("this ufstype is read-only supported\n");
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL; return -EINVAL;
} }
if (!ufs_read_cylinder_structures(sb)) { if (!ufs_read_cylinder_structures(sb)) {
pr_err("failed during remounting\n"); pr_err("failed during remounting\n");
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EPERM; return -EPERM;
} }
sb->s_flags &= ~MS_RDONLY; sb->s_flags &= ~MS_RDONLY;
...@@ -1359,7 +1331,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ...@@ -1359,7 +1331,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
} }
UFS_SB(sb)->s_mount_opt = new_mount_opt; UFS_SB(sb)->s_mount_opt = new_mount_opt;
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return 0; return 0;
} }
...@@ -1391,8 +1362,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -1391,8 +1362,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct ufs_super_block_third *usb3; struct ufs_super_block_third *usb3;
u64 id = huge_encode_dev(sb->s_bdev->bd_dev); u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
lock_ufs(sb); mutex_lock(&UFS_SB(sb)->s_lock);
usb3 = ubh_get_usb_third(uspi); usb3 = ubh_get_usb_third(uspi);
if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
...@@ -1413,7 +1383,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -1413,7 +1383,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[0] = (u32)id;
buf->f_fsid.val[1] = (u32)(id >> 32); buf->f_fsid.val[1] = (u32)(id >> 32);
unlock_ufs(sb); mutex_unlock(&UFS_SB(sb)->s_lock);
return 0; return 0;
} }
...@@ -1429,6 +1399,8 @@ static struct inode *ufs_alloc_inode(struct super_block *sb) ...@@ -1429,6 +1399,8 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
return NULL; return NULL;
ei->vfs_inode.i_version = 1; ei->vfs_inode.i_version = 1;
seqlock_init(&ei->meta_lock);
mutex_init(&ei->truncate_mutex);
return &ei->vfs_inode; return &ei->vfs_inode;
} }
......
This diff is collapsed.
...@@ -24,8 +24,6 @@ struct ufs_sb_info { ...@@ -24,8 +24,6 @@ struct ufs_sb_info {
unsigned s_cgno[UFS_MAX_GROUP_LOADED]; unsigned s_cgno[UFS_MAX_GROUP_LOADED];
unsigned short s_cg_loaded; unsigned short s_cg_loaded;
unsigned s_mount_opt; unsigned s_mount_opt;
struct mutex mutex;
struct task_struct *mutex_owner;
struct super_block *sb; struct super_block *sb;
int work_queued; /* non-zero if the delayed work is queued */ int work_queued; /* non-zero if the delayed work is queued */
struct delayed_work sync_work; /* FS sync delayed work */ struct delayed_work sync_work; /* FS sync delayed work */
...@@ -46,6 +44,8 @@ struct ufs_inode_info { ...@@ -46,6 +44,8 @@ struct ufs_inode_info {
__u32 i_oeftflag; __u32 i_oeftflag;
__u16 i_osync; __u16 i_osync;
__u64 i_lastfrag; __u64 i_lastfrag;
seqlock_t meta_lock;
struct mutex truncate_mutex;
__u32 i_dir_start_lookup; __u32 i_dir_start_lookup;
struct inode vfs_inode; struct inode vfs_inode;
}; };
...@@ -122,7 +122,7 @@ extern struct inode *ufs_iget(struct super_block *, unsigned long); ...@@ -122,7 +122,7 @@ extern struct inode *ufs_iget(struct super_block *, unsigned long);
extern int ufs_write_inode (struct inode *, struct writeback_control *); extern int ufs_write_inode (struct inode *, struct writeback_control *);
extern int ufs_sync_inode (struct inode *); extern int ufs_sync_inode (struct inode *);
extern void ufs_evict_inode (struct inode *); extern void ufs_evict_inode (struct inode *);
extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create); extern int ufs_setattr(struct dentry *dentry, struct iattr *attr);
/* namei.c */ /* namei.c */
extern const struct file_operations ufs_dir_operations; extern const struct file_operations ufs_dir_operations;
...@@ -140,10 +140,6 @@ void ufs_mark_sb_dirty(struct super_block *sb); ...@@ -140,10 +140,6 @@ void ufs_mark_sb_dirty(struct super_block *sb);
extern const struct inode_operations ufs_fast_symlink_inode_operations; extern const struct inode_operations ufs_fast_symlink_inode_operations;
extern const struct inode_operations ufs_symlink_inode_operations; extern const struct inode_operations ufs_symlink_inode_operations;
/* truncate.c */
extern int ufs_truncate (struct inode *, loff_t);
extern int ufs_setattr(struct dentry *dentry, struct iattr *attr);
static inline struct ufs_sb_info *UFS_SB(struct super_block *sb) static inline struct ufs_sb_info *UFS_SB(struct super_block *sb)
{ {
return sb->s_fs_info; return sb->s_fs_info;
...@@ -170,7 +166,4 @@ static inline u32 ufs_dtogd(struct ufs_sb_private_info * uspi, u64 b) ...@@ -170,7 +166,4 @@ static inline u32 ufs_dtogd(struct ufs_sb_private_info * uspi, u64 b)
return do_div(b, uspi->s_fpg); return do_div(b, uspi->s_fpg);
} }
extern void lock_ufs(struct super_block *sb);
extern void unlock_ufs(struct super_block *sb);
#endif /* _UFS_UFS_H */ #endif /* _UFS_UFS_H */
...@@ -119,8 +119,7 @@ xfs_setfilesize_trans_alloc( ...@@ -119,8 +119,7 @@ xfs_setfilesize_trans_alloc(
* We may pass freeze protection with a transaction. So tell lockdep * We may pass freeze protection with a transaction. So tell lockdep
* we released it. * we released it.
*/ */
rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS);
1, _THIS_IP_);
/* /*
* We hand off the transaction to the completion thread now, so * We hand off the transaction to the completion thread now, so
* clear the flag here. * clear the flag here.
...@@ -171,8 +170,7 @@ xfs_setfilesize_ioend( ...@@ -171,8 +170,7 @@ xfs_setfilesize_ioend(
* Similarly for freeze protection. * Similarly for freeze protection.
*/ */
current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
0, 1, _THIS_IP_);
return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
} }
......
#ifndef _LINUX_FS_H #ifndef _LINUX_FS_H
#define _LINUX_FS_H #define _LINUX_FS_H
#include <linux/linkage.h> #include <linux/linkage.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/kdev_t.h> #include <linux/kdev_t.h>
...@@ -30,6 +29,8 @@ ...@@ -30,6 +29,8 @@
#include <linux/lockdep.h> #include <linux/lockdep.h>
#include <linux/percpu-rwsem.h> #include <linux/percpu-rwsem.h>
#include <linux/blk_types.h> #include <linux/blk_types.h>
#include <linux/workqueue.h>
#include <linux/percpu-rwsem.h>
#include <asm/byteorder.h> #include <asm/byteorder.h>
#include <uapi/linux/fs.h> #include <uapi/linux/fs.h>
...@@ -636,7 +637,7 @@ struct inode { ...@@ -636,7 +637,7 @@ struct inode {
unsigned long dirtied_time_when; unsigned long dirtied_time_when;
struct hlist_node i_hash; struct hlist_node i_hash;
struct list_head i_wb_list; /* backing dev IO list */ struct list_head i_io_list; /* backing dev IO list */
#ifdef CONFIG_CGROUP_WRITEBACK #ifdef CONFIG_CGROUP_WRITEBACK
struct bdi_writeback *i_wb; /* the associated cgroup wb */ struct bdi_writeback *i_wb; /* the associated cgroup wb */
...@@ -1281,16 +1282,9 @@ enum { ...@@ -1281,16 +1282,9 @@ enum {
#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1) #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
struct sb_writers { struct sb_writers {
/* Counters for counting writers at each level */
struct percpu_counter counter[SB_FREEZE_LEVELS];
wait_queue_head_t wait; /* queue for waiting for
writers / faults to finish */
int frozen; /* Is sb frozen? */ int frozen; /* Is sb frozen? */
wait_queue_head_t wait_unfrozen; /* queue for waiting for wait_queue_head_t wait_unfrozen; /* for get_super_thawed() */
sb to be thawed */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map lock_map[SB_FREEZE_LEVELS];
#endif
}; };
struct super_block { struct super_block {
...@@ -1316,7 +1310,6 @@ struct super_block { ...@@ -1316,7 +1310,6 @@ struct super_block {
#endif #endif
const struct xattr_handler **s_xattr; const struct xattr_handler **s_xattr;
struct list_head s_inodes; /* all inodes */
struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */
struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */
struct block_device *s_bdev; struct block_device *s_bdev;
...@@ -1382,11 +1375,18 @@ struct super_block { ...@@ -1382,11 +1375,18 @@ struct super_block {
struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; struct list_lru s_dentry_lru ____cacheline_aligned_in_smp;
struct list_lru s_inode_lru ____cacheline_aligned_in_smp; struct list_lru s_inode_lru ____cacheline_aligned_in_smp;
struct rcu_head rcu; struct rcu_head rcu;
struct work_struct destroy_work;
struct mutex s_sync_lock; /* sync serialisation lock */
/* /*
* Indicates how deep in a filesystem stack this SB is * Indicates how deep in a filesystem stack this SB is
*/ */
int s_stack_depth; int s_stack_depth;
/* s_inode_list_lock protects s_inodes */
spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp;
struct list_head s_inodes; /* all inodes */
}; };
extern struct timespec current_fs_time(struct super_block *sb); extern struct timespec current_fs_time(struct super_block *sb);
...@@ -1398,6 +1398,11 @@ extern struct timespec current_fs_time(struct super_block *sb); ...@@ -1398,6 +1398,11 @@ extern struct timespec current_fs_time(struct super_block *sb);
void __sb_end_write(struct super_block *sb, int level); void __sb_end_write(struct super_block *sb, int level);
int __sb_start_write(struct super_block *sb, int level, bool wait); int __sb_start_write(struct super_block *sb, int level, bool wait);
#define __sb_writers_acquired(sb, lev) \
percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
#define __sb_writers_release(sb, lev) \
percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
/** /**
* sb_end_write - drop write access to a superblock * sb_end_write - drop write access to a superblock
* @sb: the super we wrote to * @sb: the super we wrote to
...@@ -2614,7 +2619,7 @@ static inline void insert_inode_hash(struct inode *inode) ...@@ -2614,7 +2619,7 @@ static inline void insert_inode_hash(struct inode *inode)
extern void __remove_inode_hash(struct inode *); extern void __remove_inode_hash(struct inode *);
static inline void remove_inode_hash(struct inode *inode) static inline void remove_inode_hash(struct inode *inode)
{ {
if (!inode_unhashed(inode)) if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash))
__remove_inode_hash(inode); __remove_inode_hash(inode);
} }
......
...@@ -368,7 +368,7 @@ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, un ...@@ -368,7 +368,7 @@ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, un
extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_get_mark(struct fsnotify_mark *mark);
extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark);
extern void fsnotify_unmount_inodes(struct list_head *list); extern void fsnotify_unmount_inodes(struct super_block *sb);
/* put here because inotify does some weird stuff when destroying watches */ /* put here because inotify does some weird stuff when destroying watches */
extern void fsnotify_init_event(struct fsnotify_event *event, extern void fsnotify_init_event(struct fsnotify_event *event,
...@@ -404,7 +404,7 @@ static inline u32 fsnotify_get_cookie(void) ...@@ -404,7 +404,7 @@ static inline u32 fsnotify_get_cookie(void)
return 0; return 0;
} }
static inline void fsnotify_unmount_inodes(struct list_head *list) static inline void fsnotify_unmount_inodes(struct super_block *sb)
{} {}
#endif /* CONFIG_FSNOTIFY */ #endif /* CONFIG_FSNOTIFY */
......
...@@ -672,6 +672,11 @@ static inline void hlist_add_fake(struct hlist_node *n) ...@@ -672,6 +672,11 @@ static inline void hlist_add_fake(struct hlist_node *n)
n->pprev = &n->next; n->pprev = &n->next;
} }
static inline bool hlist_fake(struct hlist_node *h)
{
return h->pprev == &h->next;
}
/* /*
* Move a list from one list head to another. Fixup the pprev * Move a list from one list head to another. Fixup the pprev
* reference of the first entry if it exists. * reference of the first entry if it exists.
......
...@@ -16,6 +16,7 @@ struct percpu_rw_semaphore { ...@@ -16,6 +16,7 @@ struct percpu_rw_semaphore {
}; };
extern void percpu_down_read(struct percpu_rw_semaphore *); extern void percpu_down_read(struct percpu_rw_semaphore *);
extern int percpu_down_read_trylock(struct percpu_rw_semaphore *);
extern void percpu_up_read(struct percpu_rw_semaphore *); extern void percpu_up_read(struct percpu_rw_semaphore *);
extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_down_write(struct percpu_rw_semaphore *);
...@@ -31,4 +32,23 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *); ...@@ -31,4 +32,23 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
__percpu_init_rwsem(brw, #brw, &rwsem_key); \ __percpu_init_rwsem(brw, #brw, &rwsem_key); \
}) })
#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
{
lock_release(&sem->rw_sem.dep_map, 1, ip);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
if (!read)
sem->rw_sem.owner = NULL;
#endif
}
static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
{
lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
}
#endif #endif
...@@ -927,7 +927,6 @@ config NUMA_BALANCING_DEFAULT_ENABLED ...@@ -927,7 +927,6 @@ config NUMA_BALANCING_DEFAULT_ENABLED
menuconfig CGROUPS menuconfig CGROUPS
bool "Control Group support" bool "Control Group support"
select KERNFS select KERNFS
select PERCPU_RWSEM
help help
This option adds support for grouping sets of processes together, for This option adds support for grouping sets of processes together, for
use with process control subsystems such as Cpusets, CFS, memory use with process control subsystems such as Cpusets, CFS, memory
......
obj-y += mutex.o semaphore.o rwsem.o obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
...@@ -24,6 +24,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o ...@@ -24,6 +24,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
...@@ -88,6 +88,19 @@ void percpu_down_read(struct percpu_rw_semaphore *brw) ...@@ -88,6 +88,19 @@ void percpu_down_read(struct percpu_rw_semaphore *brw)
__up_read(&brw->rw_sem); __up_read(&brw->rw_sem);
} }
int percpu_down_read_trylock(struct percpu_rw_semaphore *brw)
{
if (unlikely(!update_fast_ctr(brw, +1))) {
if (!__down_read_trylock(&brw->rw_sem))
return 0;
atomic_inc(&brw->slow_read_ctr);
__up_read(&brw->rw_sem);
}
rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 1, _RET_IP_);
return 1;
}
void percpu_up_read(struct percpu_rw_semaphore *brw) void percpu_up_read(struct percpu_rw_semaphore *brw)
{ {
rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_); rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
......
...@@ -53,9 +53,6 @@ config GENERIC_IO ...@@ -53,9 +53,6 @@ config GENERIC_IO
config STMP_DEVICE config STMP_DEVICE
bool bool
config PERCPU_RWSEM
bool
config ARCH_USE_CMPXCHG_LOCKREF config ARCH_USE_CMPXCHG_LOCKREF
bool bool
......
...@@ -55,13 +55,13 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) ...@@ -55,13 +55,13 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0; nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
spin_lock(&wb->list_lock); spin_lock(&wb->list_lock);
list_for_each_entry(inode, &wb->b_dirty, i_wb_list) list_for_each_entry(inode, &wb->b_dirty, i_io_list)
nr_dirty++; nr_dirty++;
list_for_each_entry(inode, &wb->b_io, i_wb_list) list_for_each_entry(inode, &wb->b_io, i_io_list)
nr_io++; nr_io++;
list_for_each_entry(inode, &wb->b_more_io, i_wb_list) list_for_each_entry(inode, &wb->b_more_io, i_io_list)
nr_more_io++; nr_more_io++;
list_for_each_entry(inode, &wb->b_dirty_time, i_wb_list) list_for_each_entry(inode, &wb->b_dirty_time, i_io_list)
if (inode->i_state & I_DIRTY_TIME) if (inode->i_state & I_DIRTY_TIME)
nr_dirty_time++; nr_dirty_time++;
spin_unlock(&wb->list_lock); spin_unlock(&wb->list_lock);
......
...@@ -2143,11 +2143,10 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) ...@@ -2143,11 +2143,10 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
if (S_ISBLK(inode->i_mode)) { if (S_ISBLK(inode->i_mode)) {
p->bdev = bdgrab(I_BDEV(inode)); p->bdev = bdgrab(I_BDEV(inode));
error = blkdev_get(p->bdev, error = blkdev_get(p->bdev,
FMODE_READ | FMODE_WRITE | FMODE_EXCL, FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
sys_swapon);
if (error < 0) { if (error < 0) {
p->bdev = NULL; p->bdev = NULL;
return -EINVAL; return error;
} }
p->old_block_size = block_size(p->bdev); p->old_block_size = block_size(p->bdev);
error = set_blocksize(p->bdev, PAGE_SIZE); error = set_blocksize(p->bdev, PAGE_SIZE);
...@@ -2348,7 +2347,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) ...@@ -2348,7 +2347,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
struct filename *name; struct filename *name;
struct file *swap_file = NULL; struct file *swap_file = NULL;
struct address_space *mapping; struct address_space *mapping;
int i;
int prio; int prio;
int error; int error;
union swap_header *swap_header; union swap_header *swap_header;
...@@ -2388,19 +2386,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) ...@@ -2388,19 +2386,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
p->swap_file = swap_file; p->swap_file = swap_file;
mapping = swap_file->f_mapping; mapping = swap_file->f_mapping;
for (i = 0; i < nr_swapfiles; i++) {
struct swap_info_struct *q = swap_info[i];
if (q == p || !q->swap_file)
continue;
if (mapping == q->swap_file->f_mapping) {
error = -EBUSY;
goto bad_swap;
}
}
inode = mapping->host; inode = mapping->host;
/* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */ /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
error = claim_swapfile(p, inode); error = claim_swapfile(p, inode);
if (unlikely(error)) if (unlikely(error))
...@@ -2433,6 +2420,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) ...@@ -2433,6 +2420,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
goto bad_swap; goto bad_swap;
} }
if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
int cpu;
p->flags |= SWP_SOLIDSTATE; p->flags |= SWP_SOLIDSTATE;
/* /*
* select a random position to start with to help wear leveling * select a random position to start with to help wear leveling
...@@ -2451,9 +2440,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) ...@@ -2451,9 +2440,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = -ENOMEM; error = -ENOMEM;
goto bad_swap; goto bad_swap;
} }
for_each_possible_cpu(i) { for_each_possible_cpu(cpu) {
struct percpu_cluster *cluster; struct percpu_cluster *cluster;
cluster = per_cpu_ptr(p->percpu_cluster, i); cluster = per_cpu_ptr(p->percpu_cluster, cpu);
cluster_set_null(&cluster->index); cluster_set_null(&cluster->index);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment