Commit 31b7a57c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "This is an assortment of fixes.  Most of the commits are from Filipe
  (fsync, the inode allocation cache and a few others).  Mark kicked in
  a series fixing corners in the extent sharing ioctls, and everyone
  else fixed up on assorted other problems"

* 'for-linus-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: fix wrong check for btrfs_force_chunk_alloc()
  Btrfs: fix warning of bytes_may_use
  Btrfs: fix hang when failing to submit bio of directIO
  Btrfs: fix a comment in inode.c:evict_inode_truncate_pages()
  Btrfs: fix memory corruption on failure to submit bio for direct IO
  btrfs: don't update mtime/ctime on deduped inodes
  btrfs: allow dedupe of same inode
  btrfs: fix deadlock with extent-same and readpage
  btrfs: pass unaligned length to btrfs_cmp_data()
  Btrfs: fix fsync after truncate when no_holes feature is enabled
  Btrfs: fix fsync xattr loss in the fast fsync path
  Btrfs: fix fsync data loss after append write
  Btrfs: fix crash on close_ctree() if cleaner starts new transaction
  Btrfs: fix race between caching kthread and returning inode to inode cache
  Btrfs: use kmem_cache_free when freeing entry in inode cache
  Btrfs: fix race between balance and unused block group deletion
  btrfs: add error handling for scrub_workers_get()
  btrfs: cleanup noused initialization of dev in btrfs_end_bio()
  btrfs: qgroup: allow user to clear the limitation on qgroup
parents 84e3e9d0 9689457b
...@@ -44,6 +44,8 @@ ...@@ -44,6 +44,8 @@
#define BTRFS_INODE_IN_DELALLOC_LIST 9 #define BTRFS_INODE_IN_DELALLOC_LIST 9
#define BTRFS_INODE_READDIO_NEED_LOCK 10 #define BTRFS_INODE_READDIO_NEED_LOCK 10
#define BTRFS_INODE_HAS_PROPS 11 #define BTRFS_INODE_HAS_PROPS 11
/* DIO is ready to submit */
#define BTRFS_INODE_DIO_READY 12
/* /*
* The following 3 bits are meant only for the btree inode. * The following 3 bits are meant only for the btree inode.
* When any of them is set, it means an error happened while writing an * When any of them is set, it means an error happened while writing an
......
...@@ -1778,6 +1778,7 @@ struct btrfs_fs_info { ...@@ -1778,6 +1778,7 @@ struct btrfs_fs_info {
spinlock_t unused_bgs_lock; spinlock_t unused_bgs_lock;
struct list_head unused_bgs; struct list_head unused_bgs;
struct mutex unused_bg_unpin_mutex; struct mutex unused_bg_unpin_mutex;
struct mutex delete_unused_bgs_mutex;
/* For btrfs to record security options */ /* For btrfs to record security options */
struct security_mnt_opts security_opts; struct security_mnt_opts security_opts;
......
...@@ -1751,6 +1751,7 @@ static int cleaner_kthread(void *arg) ...@@ -1751,6 +1751,7 @@ static int cleaner_kthread(void *arg)
{ {
struct btrfs_root *root = arg; struct btrfs_root *root = arg;
int again; int again;
struct btrfs_trans_handle *trans;
do { do {
again = 0; again = 0;
...@@ -1772,7 +1773,6 @@ static int cleaner_kthread(void *arg) ...@@ -1772,7 +1773,6 @@ static int cleaner_kthread(void *arg)
} }
btrfs_run_delayed_iputs(root); btrfs_run_delayed_iputs(root);
btrfs_delete_unused_bgs(root->fs_info);
again = btrfs_clean_one_deleted_snapshot(root); again = btrfs_clean_one_deleted_snapshot(root);
mutex_unlock(&root->fs_info->cleaner_mutex); mutex_unlock(&root->fs_info->cleaner_mutex);
...@@ -1781,6 +1781,16 @@ static int cleaner_kthread(void *arg) ...@@ -1781,6 +1781,16 @@ static int cleaner_kthread(void *arg)
* needn't do anything special here. * needn't do anything special here.
*/ */
btrfs_run_defrag_inodes(root->fs_info); btrfs_run_defrag_inodes(root->fs_info);
/*
* Acquires fs_info->delete_unused_bgs_mutex to avoid racing
* with relocation (btrfs_relocate_chunk) and relocation
* acquires fs_info->cleaner_mutex (btrfs_relocate_block_group)
* after acquiring fs_info->delete_unused_bgs_mutex. So we
* can't hold, nor need to, fs_info->cleaner_mutex when deleting
* unused block groups.
*/
btrfs_delete_unused_bgs(root->fs_info);
sleep: sleep:
if (!try_to_freeze() && !again) { if (!try_to_freeze() && !again) {
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
...@@ -1789,6 +1799,34 @@ static int cleaner_kthread(void *arg) ...@@ -1789,6 +1799,34 @@ static int cleaner_kthread(void *arg)
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
} }
} while (!kthread_should_stop()); } while (!kthread_should_stop());
/*
* Transaction kthread is stopped before us and wakes us up.
* However we might have started a new transaction and COWed some
* tree blocks when deleting unused block groups for example. So
* make sure we commit the transaction we started to have a clean
* shutdown when evicting the btree inode - if it has dirty pages
* when we do the final iput() on it, eviction will trigger a
* writeback for it which will fail with null pointer dereferences
* since work queues and other resources were already released and
* destroyed by the time the iput/eviction/writeback is made.
*/
trans = btrfs_attach_transaction(root);
if (IS_ERR(trans)) {
if (PTR_ERR(trans) != -ENOENT)
btrfs_err(root->fs_info,
"cleaner transaction attach returned %ld",
PTR_ERR(trans));
} else {
int ret;
ret = btrfs_commit_transaction(trans, root);
if (ret)
btrfs_err(root->fs_info,
"cleaner open transaction commit returned %d",
ret);
}
return 0; return 0;
} }
...@@ -2492,6 +2530,7 @@ int open_ctree(struct super_block *sb, ...@@ -2492,6 +2530,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->unused_bgs_lock); spin_lock_init(&fs_info->unused_bgs_lock);
rwlock_init(&fs_info->tree_mod_log_lock); rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->unused_bg_unpin_mutex); mutex_init(&fs_info->unused_bg_unpin_mutex);
mutex_init(&fs_info->delete_unused_bgs_mutex);
mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex); mutex_init(&fs_info->delalloc_root_mutex);
seqlock_init(&fs_info->profiles_lock); seqlock_init(&fs_info->profiles_lock);
......
...@@ -9889,6 +9889,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) ...@@ -9889,6 +9889,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
} }
spin_unlock(&fs_info->unused_bgs_lock); spin_unlock(&fs_info->unused_bgs_lock);
mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
/* Don't want to race with allocators so take the groups_sem */ /* Don't want to race with allocators so take the groups_sem */
down_write(&space_info->groups_sem); down_write(&space_info->groups_sem);
spin_lock(&block_group->lock); spin_lock(&block_group->lock);
...@@ -9983,6 +9985,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) ...@@ -9983,6 +9985,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
end_trans: end_trans:
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
next: next:
mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
btrfs_put_block_group(block_group); btrfs_put_block_group(block_group);
spin_lock(&fs_info->unused_bgs_lock); spin_lock(&fs_info->unused_bgs_lock);
} }
......
...@@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) ...@@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
{ {
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock;
struct btrfs_free_space *info; struct btrfs_free_space *info;
struct rb_node *n; struct rb_node *n;
u64 count; u64 count;
...@@ -254,24 +255,30 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) ...@@ -254,24 +255,30 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
return; return;
while (1) { while (1) {
bool add_to_ctl = true;
spin_lock(rbroot_lock);
n = rb_first(rbroot); n = rb_first(rbroot);
if (!n) if (!n) {
spin_unlock(rbroot_lock);
break; break;
}
info = rb_entry(n, struct btrfs_free_space, offset_index); info = rb_entry(n, struct btrfs_free_space, offset_index);
BUG_ON(info->bitmap); /* Logic error */ BUG_ON(info->bitmap); /* Logic error */
if (info->offset > root->ino_cache_progress) if (info->offset > root->ino_cache_progress)
goto free; add_to_ctl = false;
else if (info->offset + info->bytes > root->ino_cache_progress) else if (info->offset + info->bytes > root->ino_cache_progress)
count = root->ino_cache_progress - info->offset + 1; count = root->ino_cache_progress - info->offset + 1;
else else
count = info->bytes; count = info->bytes;
__btrfs_add_free_space(ctl, info->offset, count);
free:
rb_erase(&info->offset_index, rbroot); rb_erase(&info->offset_index, rbroot);
kfree(info); spin_unlock(rbroot_lock);
if (add_to_ctl)
__btrfs_add_free_space(ctl, info->offset, count);
kmem_cache_free(btrfs_free_space_cachep, info);
} }
} }
......
...@@ -4989,8 +4989,9 @@ static void evict_inode_truncate_pages(struct inode *inode) ...@@ -4989,8 +4989,9 @@ static void evict_inode_truncate_pages(struct inode *inode)
/* /*
* Keep looping until we have no more ranges in the io tree. * Keep looping until we have no more ranges in the io tree.
* We can have ongoing bios started by readpages (called from readahead) * We can have ongoing bios started by readpages (called from readahead)
* that didn't get their end io callbacks called yet or they are still * that have their endio callback (extent_io.c:end_bio_extent_readpage)
* in progress ((extent_io.c:end_bio_extent_readpage()). This means some * still in progress (unlocked the pages in the bio but did not yet
* unlocked the ranges in the io tree). Therefore this means some
* ranges can still be locked and eviction started because before * ranges can still be locked and eviction started because before
* submitting those bios, which are executed by a separate task (work * submitting those bios, which are executed by a separate task (work
* queue kthread), inode references (inode->i_count) were not taken * queue kthread), inode references (inode->i_count) were not taken
...@@ -7546,6 +7547,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, ...@@ -7546,6 +7547,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
current->journal_info = outstanding_extents; current->journal_info = outstanding_extents;
btrfs_free_reserved_data_space(inode, len); btrfs_free_reserved_data_space(inode, len);
set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags);
} }
/* /*
...@@ -7871,8 +7873,6 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) ...@@ -7871,8 +7873,6 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
struct bio *dio_bio; struct bio *dio_bio;
int ret; int ret;
if (err)
goto out_done;
again: again:
ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
&ordered_offset, &ordered_offset,
...@@ -7895,7 +7895,6 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) ...@@ -7895,7 +7895,6 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
ordered = NULL; ordered = NULL;
goto again; goto again;
} }
out_done:
dio_bio = dip->dio_bio; dio_bio = dip->dio_bio;
kfree(dip); kfree(dip);
...@@ -8163,9 +8162,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, ...@@ -8163,9 +8162,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
static void btrfs_submit_direct(int rw, struct bio *dio_bio, static void btrfs_submit_direct(int rw, struct bio *dio_bio,
struct inode *inode, loff_t file_offset) struct inode *inode, loff_t file_offset)
{ {
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_dio_private *dip = NULL;
struct btrfs_dio_private *dip; struct bio *io_bio = NULL;
struct bio *io_bio;
struct btrfs_io_bio *btrfs_bio; struct btrfs_io_bio *btrfs_bio;
int skip_sum; int skip_sum;
int write = rw & REQ_WRITE; int write = rw & REQ_WRITE;
...@@ -8182,7 +8180,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, ...@@ -8182,7 +8180,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
dip = kzalloc(sizeof(*dip), GFP_NOFS); dip = kzalloc(sizeof(*dip), GFP_NOFS);
if (!dip) { if (!dip) {
ret = -ENOMEM; ret = -ENOMEM;
goto free_io_bio; goto free_ordered;
} }
dip->private = dio_bio->bi_private; dip->private = dio_bio->bi_private;
...@@ -8210,25 +8208,55 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, ...@@ -8210,25 +8208,55 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
if (btrfs_bio->end_io) if (btrfs_bio->end_io)
btrfs_bio->end_io(btrfs_bio, ret); btrfs_bio->end_io(btrfs_bio, ret);
free_io_bio:
bio_put(io_bio);
free_ordered: free_ordered:
/* /*
* If this is a write, we need to clean up the reserved space and kill * If we arrived here it means either we failed to submit the dip
* the ordered extent. * or we either failed to clone the dio_bio or failed to allocate the
* dip. If we cloned the dio_bio and allocated the dip, we can just
* call bio_endio against our io_bio so that we get proper resource
* cleanup if we fail to submit the dip, otherwise, we must do the
* same as btrfs_endio_direct_[write|read] because we can't call these
* callbacks - they require an allocated dip and a clone of dio_bio.
*/ */
if (write) { if (io_bio && dip) {
struct btrfs_ordered_extent *ordered; bio_endio(io_bio, ret);
ordered = btrfs_lookup_ordered_extent(inode, file_offset); /*
if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && * The end io callbacks free our dip, do the final put on io_bio
!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) * and all the cleanup and final put for dio_bio (through
btrfs_free_reserved_extent(root, ordered->start, * dio_end_io()).
ordered->disk_len, 1); */
btrfs_put_ordered_extent(ordered); dip = NULL;
btrfs_put_ordered_extent(ordered); io_bio = NULL;
} else {
if (write) {
struct btrfs_ordered_extent *ordered;
ordered = btrfs_lookup_ordered_extent(inode,
file_offset);
set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
/*
* Decrements our ref on the ordered extent and removes
* the ordered extent from the inode's ordered tree,
* doing all the proper resource cleanup such as for the
* reserved space and waking up any waiters for this
* ordered extent (through btrfs_remove_ordered_extent).
*/
btrfs_finish_ordered_io(ordered);
} else {
unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
file_offset + dio_bio->bi_iter.bi_size - 1);
}
clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
/*
* Releases and cleans up our dio_bio, no need to bio_put()
* nor bio_endio()/bio_io_error() against dio_bio.
*/
dio_end_io(dio_bio, ret);
} }
bio_endio(dio_bio, ret); if (io_bio)
bio_put(io_bio);
kfree(dip);
} }
static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb, static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb,
...@@ -8330,9 +8358,18 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, ...@@ -8330,9 +8358,18 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
btrfs_submit_direct, flags); btrfs_submit_direct, flags);
if (iov_iter_rw(iter) == WRITE) { if (iov_iter_rw(iter) == WRITE) {
current->journal_info = NULL; current->journal_info = NULL;
if (ret < 0 && ret != -EIOCBQUEUED) if (ret < 0 && ret != -EIOCBQUEUED) {
btrfs_delalloc_release_space(inode, count); /*
else if (ret >= 0 && (size_t)ret < count) * If the error comes from submitting stage,
* btrfs_get_blocsk_direct() has free'd data space,
* and metadata space will be handled by
* finish_ordered_fn, don't do that again to make
* sure bytes_may_use is correct.
*/
if (!test_and_clear_bit(BTRFS_INODE_DIO_READY,
&BTRFS_I(inode)->runtime_flags))
btrfs_delalloc_release_space(inode, count);
} else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode, btrfs_delalloc_release_space(inode,
count - (size_t)ret); count - (size_t)ret);
} }
......
This diff is collapsed.
...@@ -552,6 +552,10 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) ...@@ -552,6 +552,10 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
trace_btrfs_ordered_extent_put(entry->inode, entry); trace_btrfs_ordered_extent_put(entry->inode, entry);
if (atomic_dec_and_test(&entry->refs)) { if (atomic_dec_and_test(&entry->refs)) {
ASSERT(list_empty(&entry->log_list));
ASSERT(list_empty(&entry->trans_list));
ASSERT(list_empty(&entry->root_extent_list));
ASSERT(RB_EMPTY_NODE(&entry->rb_node));
if (entry->inode) if (entry->inode)
btrfs_add_delayed_iput(entry->inode); btrfs_add_delayed_iput(entry->inode);
while (!list_empty(&entry->list)) { while (!list_empty(&entry->list)) {
...@@ -579,6 +583,7 @@ void btrfs_remove_ordered_extent(struct inode *inode, ...@@ -579,6 +583,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
spin_lock_irq(&tree->lock); spin_lock_irq(&tree->lock);
node = &entry->rb_node; node = &entry->rb_node;
rb_erase(node, &tree->tree); rb_erase(node, &tree->tree);
RB_CLEAR_NODE(node);
if (tree->last == node) if (tree->last == node)
tree->last = NULL; tree->last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
......
...@@ -1349,6 +1349,11 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, ...@@ -1349,6 +1349,11 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_root *quota_root; struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup; struct btrfs_qgroup *qgroup;
int ret = 0; int ret = 0;
/* Sometimes we would want to clear the limit on this qgroup.
* To meet this requirement, we treat the -1 as a special value
* which tell kernel to clear the limit on this qgroup.
*/
const u64 CLEAR_VALUE = -1;
mutex_lock(&fs_info->qgroup_ioctl_lock); mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root; quota_root = fs_info->quota_root;
...@@ -1364,14 +1369,42 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, ...@@ -1364,14 +1369,42 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
} }
spin_lock(&fs_info->qgroup_lock); spin_lock(&fs_info->qgroup_lock);
if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) {
qgroup->max_rfer = limit->max_rfer; if (limit->max_rfer == CLEAR_VALUE) {
if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
qgroup->max_excl = limit->max_excl; limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) qgroup->max_rfer = 0;
qgroup->rsv_rfer = limit->rsv_rfer; } else {
if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) qgroup->max_rfer = limit->max_rfer;
qgroup->rsv_excl = limit->rsv_excl; }
}
if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
if (limit->max_excl == CLEAR_VALUE) {
qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
qgroup->max_excl = 0;
} else {
qgroup->max_excl = limit->max_excl;
}
}
if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) {
if (limit->rsv_rfer == CLEAR_VALUE) {
qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
qgroup->rsv_rfer = 0;
} else {
qgroup->rsv_rfer = limit->rsv_rfer;
}
}
if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) {
if (limit->rsv_excl == CLEAR_VALUE) {
qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
qgroup->rsv_excl = 0;
} else {
qgroup->rsv_excl = limit->rsv_excl;
}
}
qgroup->lim_flags |= limit->flags; qgroup->lim_flags |= limit->flags;
spin_unlock(&fs_info->qgroup_lock); spin_unlock(&fs_info->qgroup_lock);
......
...@@ -4049,7 +4049,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) ...@@ -4049,7 +4049,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
if (trans && progress && err == -ENOSPC) { if (trans && progress && err == -ENOSPC) {
ret = btrfs_force_chunk_alloc(trans, rc->extent_root, ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
rc->block_group->flags); rc->block_group->flags);
if (ret == 0) { if (ret == 1) {
err = 0; err = 0;
progress = 0; progress = 0;
goto restart; goto restart;
......
...@@ -3571,7 +3571,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, ...@@ -3571,7 +3571,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
int is_dev_replace) int is_dev_replace)
{ {
int ret = 0;
unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
int max_active = fs_info->thread_pool_size; int max_active = fs_info->thread_pool_size;
...@@ -3584,34 +3583,36 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, ...@@ -3584,34 +3583,36 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
fs_info->scrub_workers = fs_info->scrub_workers =
btrfs_alloc_workqueue("btrfs-scrub", flags, btrfs_alloc_workqueue("btrfs-scrub", flags,
max_active, 4); max_active, 4);
if (!fs_info->scrub_workers) { if (!fs_info->scrub_workers)
ret = -ENOMEM; goto fail_scrub_workers;
goto out;
}
fs_info->scrub_wr_completion_workers = fs_info->scrub_wr_completion_workers =
btrfs_alloc_workqueue("btrfs-scrubwrc", flags, btrfs_alloc_workqueue("btrfs-scrubwrc", flags,
max_active, 2); max_active, 2);
if (!fs_info->scrub_wr_completion_workers) { if (!fs_info->scrub_wr_completion_workers)
ret = -ENOMEM; goto fail_scrub_wr_completion_workers;
goto out;
}
fs_info->scrub_nocow_workers = fs_info->scrub_nocow_workers =
btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0);
if (!fs_info->scrub_nocow_workers) { if (!fs_info->scrub_nocow_workers)
ret = -ENOMEM; goto fail_scrub_nocow_workers;
goto out;
}
fs_info->scrub_parity_workers = fs_info->scrub_parity_workers =
btrfs_alloc_workqueue("btrfs-scrubparity", flags, btrfs_alloc_workqueue("btrfs-scrubparity", flags,
max_active, 2); max_active, 2);
if (!fs_info->scrub_parity_workers) { if (!fs_info->scrub_parity_workers)
ret = -ENOMEM; goto fail_scrub_parity_workers;
goto out;
}
} }
++fs_info->scrub_workers_refcnt; ++fs_info->scrub_workers_refcnt;
out: return 0;
return ret;
fail_scrub_parity_workers:
btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
fail_scrub_nocow_workers:
btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
fail_scrub_wr_completion_workers:
btrfs_destroy_workqueue(fs_info->scrub_workers);
fail_scrub_workers:
return -ENOMEM;
} }
static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
......
...@@ -4117,6 +4117,187 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode, ...@@ -4117,6 +4117,187 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
return 0; return 0;
} }
/*
* At the moment we always log all xattrs. This is to figure out at log replay
* time which xattrs must have their deletion replayed. If a xattr is missing
* in the log tree and exists in the fs/subvol tree, we delete it. This is
* because if a xattr is deleted, the inode is fsynced and a power failure
* happens, causing the log to be replayed the next time the fs is mounted,
* we want the xattr to not exist anymore (same behaviour as other filesystems
* with a journal, ext3/4, xfs, f2fs, etc).
*/
static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode,
struct btrfs_path *path,
struct btrfs_path *dst_path)
{
int ret;
struct btrfs_key key;
const u64 ino = btrfs_ino(inode);
int ins_nr = 0;
int start_slot = 0;
key.objectid = ino;
key.type = BTRFS_XATTR_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
return ret;
while (true) {
int slot = path->slots[0];
struct extent_buffer *leaf = path->nodes[0];
int nritems = btrfs_header_nritems(leaf);
if (slot >= nritems) {
if (ins_nr > 0) {
u64 last_extent = 0;
ret = copy_items(trans, inode, dst_path, path,
&last_extent, start_slot,
ins_nr, 1, 0);
/* can't be 1, extent items aren't processed */
ASSERT(ret <= 0);
if (ret < 0)
return ret;
ins_nr = 0;
}
ret = btrfs_next_leaf(root, path);
if (ret < 0)
return ret;
else if (ret > 0)
break;
continue;
}
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY)
break;
if (ins_nr == 0)
start_slot = slot;
ins_nr++;
path->slots[0]++;
cond_resched();
}
if (ins_nr > 0) {
u64 last_extent = 0;
ret = copy_items(trans, inode, dst_path, path,
&last_extent, start_slot,
ins_nr, 1, 0);
/* can't be 1, extent items aren't processed */
ASSERT(ret <= 0);
if (ret < 0)
return ret;
}
return 0;
}
/*
* If the no holes feature is enabled we need to make sure any hole between the
* last extent and the i_size of our inode is explicitly marked in the log. This
* is to make sure that doing something like:
*
* 1) create file with 128Kb of data
* 2) truncate file to 64Kb
* 3) truncate file to 256Kb
* 4) fsync file
* 5) <crash/power failure>
* 6) mount fs and trigger log replay
*
* Will give us a file with a size of 256Kb, the first 64Kb of data match what
* the file had in its first 64Kb of data at step 1 and the last 192Kb of the
* file correspond to a hole. The presence of explicit holes in a log tree is
* what guarantees that log replay will remove/adjust file extent items in the
* fs/subvol tree.
*
* Here we do not need to care about holes between extents, that is already done
* by copy_items(). We also only need to do this in the full sync path, where we
* lookup for extents from the fs/subvol tree only. In the fast path case, we
* lookup the list of modified extent maps and if any represents a hole, we
* insert a corresponding extent representing a hole in the log tree.
*/
static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode,
struct btrfs_path *path)
{
int ret;
struct btrfs_key key;
u64 hole_start;
u64 hole_size;
struct extent_buffer *leaf;
struct btrfs_root *log = root->log_root;
const u64 ino = btrfs_ino(inode);
const u64 i_size = i_size_read(inode);
if (!btrfs_fs_incompat(root->fs_info, NO_HOLES))
return 0;
key.objectid = ino;
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = (u64)-1;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
ASSERT(ret != 0);
if (ret < 0)
return ret;
ASSERT(path->slots[0] > 0);
path->slots[0]--;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) {
/* inode does not have any extents */
hole_start = 0;
hole_size = i_size;
} else {
struct btrfs_file_extent_item *extent;
u64 len;
/*
* If there's an extent beyond i_size, an explicit hole was
* already inserted by copy_items().
*/
if (key.offset >= i_size)
return 0;
extent = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, extent) ==
BTRFS_FILE_EXTENT_INLINE) {
len = btrfs_file_extent_inline_len(leaf,
path->slots[0],
extent);
ASSERT(len == i_size);
return 0;
}
len = btrfs_file_extent_num_bytes(leaf, extent);
/* Last extent goes beyond i_size, no need to log a hole. */
if (key.offset + len > i_size)
return 0;
hole_start = key.offset + len;
hole_size = i_size - hole_start;
}
btrfs_release_path(path);
/* Last extent ends at i_size. */
if (hole_size == 0)
return 0;
hole_size = ALIGN(hole_size, root->sectorsize);
ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0,
hole_size, 0, hole_size, 0, 0, 0);
return ret;
}
/* log a single inode in the tree log. /* log a single inode in the tree log.
* At least one parent directory for this inode must exist in the tree * At least one parent directory for this inode must exist in the tree
* or be logged already. * or be logged already.
...@@ -4155,6 +4336,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ...@@ -4155,6 +4336,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
u64 ino = btrfs_ino(inode); u64 ino = btrfs_ino(inode);
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
u64 logged_isize = 0; u64 logged_isize = 0;
bool need_log_inode_item = true;
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) if (!path)
...@@ -4263,11 +4445,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ...@@ -4263,11 +4445,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
} else { } else {
if (inode_only == LOG_INODE_ALL) if (inode_only == LOG_INODE_ALL)
fast_search = true; fast_search = true;
ret = log_inode_item(trans, log, dst_path, inode);
if (ret) {
err = ret;
goto out_unlock;
}
goto log_extents; goto log_extents;
} }
...@@ -4290,6 +4467,28 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ...@@ -4290,6 +4467,28 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
if (min_key.type > max_key.type) if (min_key.type > max_key.type)
break; break;
if (min_key.type == BTRFS_INODE_ITEM_KEY)
need_log_inode_item = false;
/* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
if (ins_nr == 0)
goto next_slot;
ret = copy_items(trans, inode, dst_path, path,
&last_extent, ins_start_slot,
ins_nr, inode_only, logged_isize);
if (ret < 0) {
err = ret;
goto out_unlock;
}
ins_nr = 0;
if (ret) {
btrfs_release_path(path);
continue;
}
goto next_slot;
}
src = path->nodes[0]; src = path->nodes[0];
if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
ins_nr++; ins_nr++;
...@@ -4357,9 +4556,26 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ...@@ -4357,9 +4556,26 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
ins_nr = 0; ins_nr = 0;
} }
btrfs_release_path(path);
btrfs_release_path(dst_path);
err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
if (err)
goto out_unlock;
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
btrfs_release_path(path);
btrfs_release_path(dst_path);
err = btrfs_log_trailing_hole(trans, root, inode, path);
if (err)
goto out_unlock;
}
log_extents: log_extents:
btrfs_release_path(path); btrfs_release_path(path);
btrfs_release_path(dst_path); btrfs_release_path(dst_path);
if (need_log_inode_item) {
err = log_inode_item(trans, log, dst_path, inode);
if (err)
goto out_unlock;
}
if (fast_search) { if (fast_search) {
/* /*
* Some ordered extents started by fsync might have completed * Some ordered extents started by fsync might have completed
......
...@@ -2766,6 +2766,20 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, ...@@ -2766,6 +2766,20 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
root = root->fs_info->chunk_root; root = root->fs_info->chunk_root;
extent_root = root->fs_info->extent_root; extent_root = root->fs_info->extent_root;
/*
* Prevent races with automatic removal of unused block groups.
* After we relocate and before we remove the chunk with offset
* chunk_offset, automatic removal of the block group can kick in,
* resulting in a failure when calling btrfs_remove_chunk() below.
*
* Make sure to acquire this mutex before doing a tree search (dev
* or chunk trees) to find chunks. Otherwise the cleaner kthread might
* call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after
* we release the path used to search the chunk/dev tree and before
* the current task acquires this mutex and calls us.
*/
ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex));
ret = btrfs_can_relocate(extent_root, chunk_offset); ret = btrfs_can_relocate(extent_root, chunk_offset);
if (ret) if (ret)
return -ENOSPC; return -ENOSPC;
...@@ -2814,13 +2828,18 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) ...@@ -2814,13 +2828,18 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
key.type = BTRFS_CHUNK_ITEM_KEY; key.type = BTRFS_CHUNK_ITEM_KEY;
while (1) { while (1) {
mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
if (ret < 0) if (ret < 0) {
mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
goto error; goto error;
}
BUG_ON(ret == 0); /* Corruption */ BUG_ON(ret == 0); /* Corruption */
ret = btrfs_previous_item(chunk_root, path, key.objectid, ret = btrfs_previous_item(chunk_root, path, key.objectid,
key.type); key.type);
if (ret)
mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
if (ret < 0) if (ret < 0)
goto error; goto error;
if (ret > 0) if (ret > 0)
...@@ -2843,6 +2862,7 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) ...@@ -2843,6 +2862,7 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
else else
BUG_ON(ret); BUG_ON(ret);
} }
mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
if (found_key.offset == 0) if (found_key.offset == 0)
break; break;
...@@ -3299,9 +3319,12 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) ...@@ -3299,9 +3319,12 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
goto error; goto error;
} }
mutex_lock(&fs_info->delete_unused_bgs_mutex);
ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
if (ret < 0) if (ret < 0) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
goto error; goto error;
}
/* /*
* this shouldn't happen, it means the last relocate * this shouldn't happen, it means the last relocate
...@@ -3313,6 +3336,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) ...@@ -3313,6 +3336,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
ret = btrfs_previous_item(chunk_root, path, 0, ret = btrfs_previous_item(chunk_root, path, 0,
BTRFS_CHUNK_ITEM_KEY); BTRFS_CHUNK_ITEM_KEY);
if (ret) { if (ret) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
ret = 0; ret = 0;
break; break;
} }
...@@ -3321,8 +3345,10 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) ...@@ -3321,8 +3345,10 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
slot = path->slots[0]; slot = path->slots[0];
btrfs_item_key_to_cpu(leaf, &found_key, slot); btrfs_item_key_to_cpu(leaf, &found_key, slot);
if (found_key.objectid != key.objectid) if (found_key.objectid != key.objectid) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
break; break;
}
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
...@@ -3335,10 +3361,13 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) ...@@ -3335,10 +3361,13 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
ret = should_balance_chunk(chunk_root, leaf, chunk, ret = should_balance_chunk(chunk_root, leaf, chunk,
found_key.offset); found_key.offset);
btrfs_release_path(path); btrfs_release_path(path);
if (!ret) if (!ret) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
goto loop; goto loop;
}
if (counting) { if (counting) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
spin_lock(&fs_info->balance_lock); spin_lock(&fs_info->balance_lock);
bctl->stat.expected++; bctl->stat.expected++;
spin_unlock(&fs_info->balance_lock); spin_unlock(&fs_info->balance_lock);
...@@ -3348,6 +3377,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) ...@@ -3348,6 +3377,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
ret = btrfs_relocate_chunk(chunk_root, ret = btrfs_relocate_chunk(chunk_root,
found_key.objectid, found_key.objectid,
found_key.offset); found_key.offset);
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
if (ret && ret != -ENOSPC) if (ret && ret != -ENOSPC)
goto error; goto error;
if (ret == -ENOSPC) { if (ret == -ENOSPC) {
...@@ -4087,11 +4117,16 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) ...@@ -4087,11 +4117,16 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
key.type = BTRFS_DEV_EXTENT_KEY; key.type = BTRFS_DEV_EXTENT_KEY;
do { do {
mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) if (ret < 0) {
mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
goto done; goto done;
}
ret = btrfs_previous_item(root, path, 0, key.type); ret = btrfs_previous_item(root, path, 0, key.type);
if (ret)
mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
if (ret < 0) if (ret < 0)
goto done; goto done;
if (ret) { if (ret) {
...@@ -4105,6 +4140,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) ...@@ -4105,6 +4140,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
btrfs_item_key_to_cpu(l, &key, path->slots[0]); btrfs_item_key_to_cpu(l, &key, path->slots[0]);
if (key.objectid != device->devid) { if (key.objectid != device->devid) {
mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
btrfs_release_path(path); btrfs_release_path(path);
break; break;
} }
...@@ -4113,6 +4149,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) ...@@ -4113,6 +4149,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
length = btrfs_dev_extent_length(l, dev_extent); length = btrfs_dev_extent_length(l, dev_extent);
if (key.offset + length <= new_size) { if (key.offset + length <= new_size) {
mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
btrfs_release_path(path); btrfs_release_path(path);
break; break;
} }
...@@ -4122,6 +4159,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) ...@@ -4122,6 +4159,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
btrfs_release_path(path); btrfs_release_path(path);
ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset); ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
if (ret && ret != -ENOSPC) if (ret && ret != -ENOSPC)
goto done; goto done;
if (ret == -ENOSPC) if (ret == -ENOSPC)
...@@ -5715,7 +5753,6 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e ...@@ -5715,7 +5753,6 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e
static void btrfs_end_bio(struct bio *bio, int err) static void btrfs_end_bio(struct bio *bio, int err)
{ {
struct btrfs_bio *bbio = bio->bi_private; struct btrfs_bio *bbio = bio->bi_private;
struct btrfs_device *dev = bbio->stripes[0].dev;
int is_orig_bio = 0; int is_orig_bio = 0;
if (err) { if (err) {
...@@ -5723,6 +5760,7 @@ static void btrfs_end_bio(struct bio *bio, int err) ...@@ -5723,6 +5760,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
if (err == -EIO || err == -EREMOTEIO) { if (err == -EIO || err == -EREMOTEIO) {
unsigned int stripe_index = unsigned int stripe_index =
btrfs_io_bio(bio)->stripe_index; btrfs_io_bio(bio)->stripe_index;
struct btrfs_device *dev;
BUG_ON(stripe_index >= bbio->num_stripes); BUG_ON(stripe_index >= bbio->num_stripes);
dev = bbio->stripes[stripe_index].dev; dev = bbio->stripes[stripe_index].dev;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment