Commit ba929b66 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "This pull is dedicated to Josef's enospc rework, which we've been
  testing for a few releases now.  It fixes some early enospc problems
  and is dramatically faster.

  This also includes an updated fix for the delalloc accounting that
  happens after a fault in copy_from_user.  My patch in v4.7 was almost
  but not quite enough"

* 'for-linus-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: fix delalloc accounting after copy_from_user faults
  Btrfs: avoid deadlocks during reservations in btrfs_truncate_block
  Btrfs: use FLUSH_LIMIT for relocation in reserve_metadata_bytes
  Btrfs: fill relocation block rsv after allocation
  Btrfs: always use trans->block_rsv for orphans
  Btrfs: change how we calculate the global block rsv
  Btrfs: use root when checking need_async_flush
  Btrfs: don't bother kicking async if there's nothing to reclaim
  Btrfs: fix release reserved extents trace points
  Btrfs: add fsid to some tracepoints
  Btrfs: add tracepoints for flush events
  Btrfs: fix delalloc reservation amount tracepoint
  Btrfs: trace pinned extents
  Btrfs: introduce ticketed enospc infrastructure
  Btrfs: add tracepoint for adding block groups
  Btrfs: warn_on for unaccounted spaces
  Btrfs: change delayed reservation fallback behavior
  Btrfs: always reserve metadata for delalloc extents
  Btrfs: fix callers of btrfs_block_rsv_migrate
  Btrfs: add bytes_readonly to the spaceinfo at once
parents c9b95e59 8b8b08cb
...@@ -439,6 +439,8 @@ struct btrfs_space_info { ...@@ -439,6 +439,8 @@ struct btrfs_space_info {
struct list_head list; struct list_head list;
/* Protected by the spinlock 'lock'. */ /* Protected by the spinlock 'lock'. */
struct list_head ro_bgs; struct list_head ro_bgs;
struct list_head priority_tickets;
struct list_head tickets;
struct rw_semaphore groups_sem; struct rw_semaphore groups_sem;
/* for block groups in our same type */ /* for block groups in our same type */
...@@ -2624,6 +2626,15 @@ enum btrfs_reserve_flush_enum { ...@@ -2624,6 +2626,15 @@ enum btrfs_reserve_flush_enum {
BTRFS_RESERVE_FLUSH_ALL, BTRFS_RESERVE_FLUSH_ALL,
}; };
enum btrfs_flush_state {
FLUSH_DELAYED_ITEMS_NR = 1,
FLUSH_DELAYED_ITEMS = 2,
FLUSH_DELALLOC = 3,
FLUSH_DELALLOC_WAIT = 4,
ALLOC_CHUNK = 5,
COMMIT_TRANS = 6,
};
int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len); int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len);
int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes); int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len); void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len);
...@@ -2661,8 +2672,8 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, ...@@ -2661,8 +2672,8 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 min_reserved, struct btrfs_block_rsv *block_rsv, u64 min_reserved,
enum btrfs_reserve_flush_enum flush); enum btrfs_reserve_flush_enum flush);
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv, struct btrfs_block_rsv *dst_rsv, u64 num_bytes,
u64 num_bytes); int update_size);
int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *dest, u64 num_bytes, struct btrfs_block_rsv *dest, u64 num_bytes,
int min_factor); int min_factor);
......
...@@ -553,7 +553,7 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, ...@@ -553,7 +553,7 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
dst_rsv = &root->fs_info->delayed_block_rsv; dst_rsv = &root->fs_info->delayed_block_rsv;
num_bytes = btrfs_calc_trans_metadata_size(root, 1); num_bytes = btrfs_calc_trans_metadata_size(root, 1);
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
if (!ret) { if (!ret) {
trace_btrfs_space_reservation(root->fs_info, "delayed_item", trace_btrfs_space_reservation(root->fs_info, "delayed_item",
item->key.objectid, item->key.objectid,
...@@ -597,6 +597,29 @@ static int btrfs_delayed_inode_reserve_metadata( ...@@ -597,6 +597,29 @@ static int btrfs_delayed_inode_reserve_metadata(
num_bytes = btrfs_calc_trans_metadata_size(root, 1); num_bytes = btrfs_calc_trans_metadata_size(root, 1);
/*
* If our block_rsv is the delalloc block reserve then check and see if
* we have our extra reservation for updating the inode. If not fall
* through and try to reserve space quickly.
*
* We used to try and steal from the delalloc block rsv or the global
* reserve, but we'd steal a full reservation, which isn't kind. We are
* here through delalloc which means we've likely just cowed down close
* to the leaf that contains the inode, so we would steal less just
* doing the fallback inode update, so if we do end up having to steal
* from the global block rsv we hopefully only steal one or two blocks
* worth which is less likely to hurt us.
*/
if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
spin_lock(&BTRFS_I(inode)->lock);
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags))
release = true;
else
src_rsv = NULL;
spin_unlock(&BTRFS_I(inode)->lock);
}
/* /*
* btrfs_dirty_inode will update the inode under btrfs_join_transaction * btrfs_dirty_inode will update the inode under btrfs_join_transaction
* which doesn't reserve space for speed. This is a problem since we * which doesn't reserve space for speed. This is a problem since we
...@@ -626,51 +649,10 @@ static int btrfs_delayed_inode_reserve_metadata( ...@@ -626,51 +649,10 @@ static int btrfs_delayed_inode_reserve_metadata(
num_bytes, 1); num_bytes, 1);
} }
return ret; return ret;
} else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
spin_lock(&BTRFS_I(inode)->lock);
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags)) {
spin_unlock(&BTRFS_I(inode)->lock);
release = true;
goto migrate;
} }
spin_unlock(&BTRFS_I(inode)->lock);
/* Ok we didn't have space pre-reserved. This shouldn't happen ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
* too often but it can happen if we do delalloc to an existing
* inode which gets dirtied because of the time update, and then
* isn't touched again until after the transaction commits and
* then we try to write out the data. First try to be nice and
* reserve something strictly for us. If not be a pain and try
* to steal from the delalloc block rsv.
*/
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
BTRFS_RESERVE_NO_FLUSH);
if (!ret)
goto out;
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
if (!ret)
goto out;
if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
btrfs_debug(root->fs_info,
"block rsv migrate returned %d", ret);
WARN_ON(1);
}
/*
* Ok this is a problem, let's just steal from the global rsv
* since this really shouldn't happen that often.
*/
ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
dst_rsv, num_bytes);
goto out;
}
migrate:
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
out:
/* /*
* Migrate only takes a reservation, it doesn't touch the size of the * Migrate only takes a reservation, it doesn't touch the size of the
* block_rsv. This is to simplify people who don't normally have things * block_rsv. This is to simplify people who don't normally have things
......
...@@ -111,6 +111,16 @@ static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, ...@@ -111,6 +111,16 @@ static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes); u64 num_bytes);
int btrfs_pin_extent(struct btrfs_root *root, int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int reserved); u64 bytenr, u64 num_bytes, int reserved);
static int __reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_space_info *space_info,
u64 orig_bytes,
enum btrfs_reserve_flush_enum flush);
static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 num_bytes);
static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 num_bytes);
static noinline int static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache) block_group_cache_done(struct btrfs_block_group_cache *cache)
...@@ -3913,6 +3923,7 @@ static const char *alloc_name(u64 flags) ...@@ -3913,6 +3923,7 @@ static const char *alloc_name(u64 flags)
static int update_space_info(struct btrfs_fs_info *info, u64 flags, static int update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used, u64 total_bytes, u64 bytes_used,
u64 bytes_readonly,
struct btrfs_space_info **space_info) struct btrfs_space_info **space_info)
{ {
struct btrfs_space_info *found; struct btrfs_space_info *found;
...@@ -3933,8 +3944,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, ...@@ -3933,8 +3944,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->disk_total += total_bytes * factor; found->disk_total += total_bytes * factor;
found->bytes_used += bytes_used; found->bytes_used += bytes_used;
found->disk_used += bytes_used * factor; found->disk_used += bytes_used * factor;
found->bytes_readonly += bytes_readonly;
if (total_bytes > 0) if (total_bytes > 0)
found->full = 0; found->full = 0;
space_info_add_new_bytes(info, found, total_bytes -
bytes_used - bytes_readonly);
spin_unlock(&found->lock); spin_unlock(&found->lock);
*space_info = found; *space_info = found;
return 0; return 0;
...@@ -3960,7 +3974,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, ...@@ -3960,7 +3974,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->disk_used = bytes_used * factor; found->disk_used = bytes_used * factor;
found->bytes_pinned = 0; found->bytes_pinned = 0;
found->bytes_reserved = 0; found->bytes_reserved = 0;
found->bytes_readonly = 0; found->bytes_readonly = bytes_readonly;
found->bytes_may_use = 0; found->bytes_may_use = 0;
found->full = 0; found->full = 0;
found->max_extent_size = 0; found->max_extent_size = 0;
...@@ -3969,6 +3983,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, ...@@ -3969,6 +3983,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->flush = 0; found->flush = 0;
init_waitqueue_head(&found->wait); init_waitqueue_head(&found->wait);
INIT_LIST_HEAD(&found->ro_bgs); INIT_LIST_HEAD(&found->ro_bgs);
INIT_LIST_HEAD(&found->tickets);
INIT_LIST_HEAD(&found->priority_tickets);
ret = kobject_init_and_add(&found->kobj, &space_info_ktype, ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
info->space_info_kobj, "%s", info->space_info_kobj, "%s",
...@@ -4470,7 +4486,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, ...@@ -4470,7 +4486,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
space_info = __find_space_info(extent_root->fs_info, flags); space_info = __find_space_info(extent_root->fs_info, flags);
if (!space_info) { if (!space_info) {
ret = update_space_info(extent_root->fs_info, flags, ret = update_space_info(extent_root->fs_info, flags,
0, 0, &space_info); 0, 0, 0, &space_info);
BUG_ON(ret); /* -ENOMEM */ BUG_ON(ret); /* -ENOMEM */
} }
BUG_ON(!space_info); /* Logic error */ BUG_ON(!space_info); /* Logic error */
...@@ -4582,12 +4598,19 @@ static int can_overcommit(struct btrfs_root *root, ...@@ -4582,12 +4598,19 @@ static int can_overcommit(struct btrfs_root *root,
struct btrfs_space_info *space_info, u64 bytes, struct btrfs_space_info *space_info, u64 bytes,
enum btrfs_reserve_flush_enum flush) enum btrfs_reserve_flush_enum flush)
{ {
struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; struct btrfs_block_rsv *global_rsv;
u64 profile = btrfs_get_alloc_profile(root, 0); u64 profile;
u64 space_size; u64 space_size;
u64 avail; u64 avail;
u64 used; u64 used;
/* Don't overcommit when in mixed mode. */
if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
return 0;
BUG_ON(root->fs_info == NULL);
global_rsv = &root->fs_info->global_block_rsv;
profile = btrfs_get_alloc_profile(root, 0);
used = space_info->bytes_used + space_info->bytes_reserved + used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly; space_info->bytes_pinned + space_info->bytes_readonly;
...@@ -4739,6 +4762,11 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, ...@@ -4739,6 +4762,11 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
break; break;
} }
if (list_empty(&space_info->tickets) &&
list_empty(&space_info->priority_tickets)) {
spin_unlock(&space_info->lock);
break;
}
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
loops++; loops++;
...@@ -4807,13 +4835,11 @@ static int may_commit_transaction(struct btrfs_root *root, ...@@ -4807,13 +4835,11 @@ static int may_commit_transaction(struct btrfs_root *root,
return btrfs_commit_transaction(trans, root); return btrfs_commit_transaction(trans, root);
} }
enum flush_state { struct reserve_ticket {
FLUSH_DELAYED_ITEMS_NR = 1, u64 bytes;
FLUSH_DELAYED_ITEMS = 2, int error;
FLUSH_DELALLOC = 3, struct list_head list;
FLUSH_DELALLOC_WAIT = 4, wait_queue_head_t wait;
ALLOC_CHUNK = 5,
COMMIT_TRANS = 6,
}; };
static int flush_space(struct btrfs_root *root, static int flush_space(struct btrfs_root *root,
...@@ -4866,6 +4892,8 @@ static int flush_space(struct btrfs_root *root, ...@@ -4866,6 +4892,8 @@ static int flush_space(struct btrfs_root *root,
break; break;
} }
trace_btrfs_flush_space(root->fs_info, space_info->flags, num_bytes,
orig_bytes, state, ret);
return ret; return ret;
} }
...@@ -4873,17 +4901,22 @@ static inline u64 ...@@ -4873,17 +4901,22 @@ static inline u64
btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
struct btrfs_space_info *space_info) struct btrfs_space_info *space_info)
{ {
struct reserve_ticket *ticket;
u64 used; u64 used;
u64 expected; u64 expected;
u64 to_reclaim; u64 to_reclaim = 0;
to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
spin_lock(&space_info->lock);
if (can_overcommit(root, space_info, to_reclaim, if (can_overcommit(root, space_info, to_reclaim,
BTRFS_RESERVE_FLUSH_ALL)) { BTRFS_RESERVE_FLUSH_ALL))
to_reclaim = 0; return 0;
goto out;
} list_for_each_entry(ticket, &space_info->tickets, list)
to_reclaim += ticket->bytes;
list_for_each_entry(ticket, &space_info->priority_tickets, list)
to_reclaim += ticket->bytes;
if (to_reclaim)
return to_reclaim;
used = space_info->bytes_used + space_info->bytes_reserved + used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly + space_info->bytes_pinned + space_info->bytes_readonly +
...@@ -4899,14 +4932,11 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, ...@@ -4899,14 +4932,11 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
to_reclaim = 0; to_reclaim = 0;
to_reclaim = min(to_reclaim, space_info->bytes_may_use + to_reclaim = min(to_reclaim, space_info->bytes_may_use +
space_info->bytes_reserved); space_info->bytes_reserved);
out:
spin_unlock(&space_info->lock);
return to_reclaim; return to_reclaim;
} }
static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
struct btrfs_fs_info *fs_info, u64 used) struct btrfs_root *root, u64 used)
{ {
u64 thresh = div_factor_fine(space_info->total_bytes, 98); u64 thresh = div_factor_fine(space_info->total_bytes, 98);
...@@ -4914,73 +4944,177 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, ...@@ -4914,73 +4944,177 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
return 0; return 0;
return (used >= thresh && !btrfs_fs_closing(fs_info) && if (!btrfs_calc_reclaim_metadata_size(root, space_info))
!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); return 0;
return (used >= thresh && !btrfs_fs_closing(root->fs_info) &&
!test_bit(BTRFS_FS_STATE_REMOUNTING,
&root->fs_info->fs_state));
} }
static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info, static void wake_all_tickets(struct list_head *head)
struct btrfs_fs_info *fs_info,
int flush_state)
{ {
u64 used; struct reserve_ticket *ticket;
spin_lock(&space_info->lock);
/*
* We run out of space and have not got any free space via flush_space,
* so don't bother doing async reclaim.
*/
if (flush_state > COMMIT_TRANS && space_info->full) {
spin_unlock(&space_info->lock);
return 0;
}
used = space_info->bytes_used + space_info->bytes_reserved + while (!list_empty(head)) {
space_info->bytes_pinned + space_info->bytes_readonly + ticket = list_first_entry(head, struct reserve_ticket, list);
space_info->bytes_may_use; list_del_init(&ticket->list);
if (need_do_async_reclaim(space_info, fs_info, used)) { ticket->error = -ENOSPC;
spin_unlock(&space_info->lock); wake_up(&ticket->wait);
return 1;
} }
spin_unlock(&space_info->lock);
return 0;
} }
/*
* This is for normal flushers, we can wait all goddamned day if we want to. We
* will loop and continuously try to flush as long as we are making progress.
* We count progress as clearing off tickets each time we have to loop.
*/
static void btrfs_async_reclaim_metadata_space(struct work_struct *work) static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
{ {
struct reserve_ticket *last_ticket = NULL;
struct btrfs_fs_info *fs_info; struct btrfs_fs_info *fs_info;
struct btrfs_space_info *space_info; struct btrfs_space_info *space_info;
u64 to_reclaim; u64 to_reclaim;
int flush_state; int flush_state;
int commit_cycles = 0;
fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
spin_lock(&space_info->lock);
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
space_info); space_info);
if (!to_reclaim) if (!to_reclaim) {
space_info->flush = 0;
spin_unlock(&space_info->lock);
return; return;
}
last_ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list);
spin_unlock(&space_info->lock);
flush_state = FLUSH_DELAYED_ITEMS_NR; flush_state = FLUSH_DELAYED_ITEMS_NR;
do {
struct reserve_ticket *ticket;
int ret;
ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
to_reclaim, flush_state);
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
space_info->flush = 0;
spin_unlock(&space_info->lock);
return;
}
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
space_info);
ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list);
if (last_ticket == ticket) {
flush_state++;
} else {
last_ticket = ticket;
flush_state = FLUSH_DELAYED_ITEMS_NR;
if (commit_cycles)
commit_cycles--;
}
if (flush_state > COMMIT_TRANS) {
commit_cycles++;
if (commit_cycles > 2) {
wake_all_tickets(&space_info->tickets);
space_info->flush = 0;
} else {
flush_state = FLUSH_DELAYED_ITEMS_NR;
}
}
spin_unlock(&space_info->lock);
} while (flush_state <= COMMIT_TRANS);
}
void btrfs_init_async_reclaim_work(struct work_struct *work)
{
INIT_WORK(work, btrfs_async_reclaim_metadata_space);
}
static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
struct reserve_ticket *ticket)
{
u64 to_reclaim;
int flush_state = FLUSH_DELAYED_ITEMS_NR;
spin_lock(&space_info->lock);
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
space_info);
if (!to_reclaim) {
spin_unlock(&space_info->lock);
return;
}
spin_unlock(&space_info->lock);
do { do {
flush_space(fs_info->fs_root, space_info, to_reclaim, flush_space(fs_info->fs_root, space_info, to_reclaim,
to_reclaim, flush_state); to_reclaim, flush_state);
flush_state++; flush_state++;
if (!btrfs_need_do_async_reclaim(space_info, fs_info, spin_lock(&space_info->lock);
flush_state)) if (ticket->bytes == 0) {
spin_unlock(&space_info->lock);
return; return;
}
spin_unlock(&space_info->lock);
/*
* Priority flushers can't wait on delalloc without
* deadlocking.
*/
if (flush_state == FLUSH_DELALLOC ||
flush_state == FLUSH_DELALLOC_WAIT)
flush_state = ALLOC_CHUNK;
} while (flush_state < COMMIT_TRANS); } while (flush_state < COMMIT_TRANS);
} }
void btrfs_init_async_reclaim_work(struct work_struct *work) static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
struct reserve_ticket *ticket, u64 orig_bytes)
{ {
INIT_WORK(work, btrfs_async_reclaim_metadata_space); DEFINE_WAIT(wait);
int ret = 0;
spin_lock(&space_info->lock);
while (ticket->bytes > 0 && ticket->error == 0) {
ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
if (ret) {
ret = -EINTR;
break;
}
spin_unlock(&space_info->lock);
schedule();
finish_wait(&ticket->wait, &wait);
spin_lock(&space_info->lock);
}
if (!ret)
ret = ticket->error;
if (!list_empty(&ticket->list))
list_del_init(&ticket->list);
if (ticket->bytes && ticket->bytes < orig_bytes) {
u64 num_bytes = orig_bytes - ticket->bytes;
space_info->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, num_bytes, 0);
}
spin_unlock(&space_info->lock);
return ret;
} }
/** /**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for * @root - the root we're allocating for
* @block_rsv - the block_rsv we're allocating for * @space_info - the space info we want to allocate from
* @orig_bytes - the number of bytes we want * @orig_bytes - the number of bytes we want
* @flush - whether or not we can flush to make our reservation * @flush - whether or not we can flush to make our reservation
* *
...@@ -4991,81 +5125,36 @@ void btrfs_init_async_reclaim_work(struct work_struct *work) ...@@ -4991,81 +5125,36 @@ void btrfs_init_async_reclaim_work(struct work_struct *work)
* regain reservations will be made and this will fail if there is not enough * regain reservations will be made and this will fail if there is not enough
* space already. * space already.
*/ */
static int reserve_metadata_bytes(struct btrfs_root *root, static int __reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, struct btrfs_space_info *space_info,
u64 orig_bytes, u64 orig_bytes,
enum btrfs_reserve_flush_enum flush) enum btrfs_reserve_flush_enum flush)
{ {
struct btrfs_space_info *space_info = block_rsv->space_info; struct reserve_ticket ticket;
u64 used; u64 used;
u64 num_bytes = orig_bytes;
int flush_state = FLUSH_DELAYED_ITEMS_NR;
int ret = 0; int ret = 0;
bool flushing = false;
again: ASSERT(orig_bytes);
ret = 0; ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
spin_lock(&space_info->lock);
/*
* We only want to wait if somebody other than us is flushing and we
* are actually allowed to flush all things.
*/
while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
space_info->flush) {
spin_unlock(&space_info->lock);
/*
* If we have a trans handle we can't wait because the flusher
* may have to commit the transaction, which would mean we would
* deadlock since we are waiting for the flusher to finish, but
* hold the current transaction open.
*/
if (current->journal_info)
return -EAGAIN;
ret = wait_event_killable(space_info->wait, !space_info->flush);
/* Must have been killed, return */
if (ret)
return -EINTR;
spin_lock(&space_info->lock); spin_lock(&space_info->lock);
}
ret = -ENOSPC; ret = -ENOSPC;
used = space_info->bytes_used + space_info->bytes_reserved + used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly + space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use; space_info->bytes_may_use;
/* /*
* The idea here is that we've not already over-reserved the block group * If we have enough space then hooray, make our reservation and carry
* then we can go ahead and save our reservation first and then start * on. If not see if we can overcommit, and if we can, hooray carry on.
* flushing if we need to. Otherwise if we've already overcommitted * If not things get more complicated.
* lets start flushing stuff first and then come back and try to make
* our reservation.
*/ */
if (used <= space_info->total_bytes) {
if (used + orig_bytes <= space_info->total_bytes) { if (used + orig_bytes <= space_info->total_bytes) {
space_info->bytes_may_use += orig_bytes; space_info->bytes_may_use += orig_bytes;
trace_btrfs_space_reservation(root->fs_info, trace_btrfs_space_reservation(root->fs_info, "space_info",
"space_info", space_info->flags, orig_bytes, 1); space_info->flags, orig_bytes,
1);
ret = 0; ret = 0;
} else { } else if (can_overcommit(root, space_info, orig_bytes, flush)) {
/*
* Ok set num_bytes to orig_bytes since we aren't
* overocmmitted, this way we only try and reclaim what
* we need.
*/
num_bytes = orig_bytes;
}
} else {
/*
* Ok we're over committed, set num_bytes to the overcommitted
* amount plus the amount of bytes that we need for this
* reservation.
*/
num_bytes = used - space_info->total_bytes +
(orig_bytes * 2);
}
if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
space_info->bytes_may_use += orig_bytes; space_info->bytes_may_use += orig_bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info", trace_btrfs_space_reservation(root->fs_info, "space_info",
space_info->flags, orig_bytes, space_info->flags, orig_bytes,
...@@ -5074,16 +5163,31 @@ static int reserve_metadata_bytes(struct btrfs_root *root, ...@@ -5074,16 +5163,31 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
} }
/* /*
* Couldn't make our reservation, save our place so while we're trying * If we couldn't make a reservation then setup our reservation ticket
* to reclaim space we can actually use it instead of somebody else * and kick the async worker if it's not already running.
* stealing it from us.
* *
* We make the other tasks wait for the flush only when we can flush * If we are a priority flusher then we just need to add our ticket to
* all things. * the list and we will do our own flushing further down.
*/ */
if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
flushing = true; ticket.bytes = orig_bytes;
ticket.error = 0;
init_waitqueue_head(&ticket.wait);
if (flush == BTRFS_RESERVE_FLUSH_ALL) {
list_add_tail(&ticket.list, &space_info->tickets);
if (!space_info->flush) {
space_info->flush = 1; space_info->flush = 1;
trace_btrfs_trigger_flush(root->fs_info,
space_info->flags,
orig_bytes, flush,
"enospc");
queue_work(system_unbound_wq,
&root->fs_info->async_reclaim_work);
}
} else {
list_add_tail(&ticket.list,
&space_info->priority_tickets);
}
} else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
used += orig_bytes; used += orig_bytes;
/* /*
...@@ -5092,39 +5196,67 @@ static int reserve_metadata_bytes(struct btrfs_root *root, ...@@ -5092,39 +5196,67 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
* the async reclaim as we will panic. * the async reclaim as we will panic.
*/ */
if (!root->fs_info->log_root_recovering && if (!root->fs_info->log_root_recovering &&
need_do_async_reclaim(space_info, root->fs_info, used) && need_do_async_reclaim(space_info, root, used) &&
!work_busy(&root->fs_info->async_reclaim_work)) !work_busy(&root->fs_info->async_reclaim_work)) {
trace_btrfs_trigger_flush(root->fs_info,
space_info->flags,
orig_bytes, flush,
"preempt");
queue_work(system_unbound_wq, queue_work(system_unbound_wq,
&root->fs_info->async_reclaim_work); &root->fs_info->async_reclaim_work);
} }
}
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
goto out; return ret;
ret = flush_space(root, space_info, num_bytes, orig_bytes, if (flush == BTRFS_RESERVE_FLUSH_ALL)
flush_state); return wait_reserve_ticket(root->fs_info, space_info, &ticket,
flush_state++; orig_bytes);
/* ret = 0;
* If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock priority_reclaim_metadata_space(root->fs_info, space_info, &ticket);
* would happen. So skip delalloc flush. spin_lock(&space_info->lock);
*/ if (ticket.bytes) {
if (flush == BTRFS_RESERVE_FLUSH_LIMIT && if (ticket.bytes < orig_bytes) {
(flush_state == FLUSH_DELALLOC || u64 num_bytes = orig_bytes - ticket.bytes;
flush_state == FLUSH_DELALLOC_WAIT)) space_info->bytes_may_use -= num_bytes;
flush_state = ALLOC_CHUNK; trace_btrfs_space_reservation(root->fs_info,
"space_info", space_info->flags,
num_bytes, 0);
if (!ret) }
goto again; list_del_init(&ticket.list);
else if (flush == BTRFS_RESERVE_FLUSH_LIMIT && ret = -ENOSPC;
flush_state < COMMIT_TRANS) }
goto again; spin_unlock(&space_info->lock);
else if (flush == BTRFS_RESERVE_FLUSH_ALL && ASSERT(list_empty(&ticket.list));
flush_state <= COMMIT_TRANS) return ret;
goto again; }
out: /**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for
* @block_rsv - the block_rsv we're allocating for
* @orig_bytes - the number of bytes we want
* @flush - whether or not we can flush to make our reservation
*
* This will reserve orgi_bytes number of bytes from the space info associated
* with the block_rsv. If there is not enough space it will make an attempt to
* flush out space to make room. It will do this by flushing delalloc if
* possible or committing the transaction. If flush is 0 then no attempts to
* regain reservations will be made and this will fail if there is not enough
* space already.
*/
static int reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 orig_bytes,
enum btrfs_reserve_flush_enum flush)
{
int ret;
ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes,
flush);
if (ret == -ENOSPC && if (ret == -ENOSPC &&
unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
struct btrfs_block_rsv *global_rsv = struct btrfs_block_rsv *global_rsv =
...@@ -5137,13 +5269,8 @@ static int reserve_metadata_bytes(struct btrfs_root *root, ...@@ -5137,13 +5269,8 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
if (ret == -ENOSPC) if (ret == -ENOSPC)
trace_btrfs_space_reservation(root->fs_info, trace_btrfs_space_reservation(root->fs_info,
"space_info:enospc", "space_info:enospc",
space_info->flags, orig_bytes, 1); block_rsv->space_info->flags,
if (flushing) { orig_bytes, 1);
spin_lock(&space_info->lock);
space_info->flush = 0;
wake_up_all(&space_info->wait);
spin_unlock(&space_info->lock);
}
return ret; return ret;
} }
...@@ -5219,6 +5346,108 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, ...@@ -5219,6 +5346,108 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
return 0; return 0;
} }
/*
* This is for space we already have accounted in space_info->bytes_may_use, so
* basically when we're returning space from block_rsv's.
*/
static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 num_bytes)
{
struct reserve_ticket *ticket;
struct list_head *head;
u64 used;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
bool check_overcommit = false;
spin_lock(&space_info->lock);
head = &space_info->priority_tickets;
/*
* If we are over our limit then we need to check and see if we can
* overcommit, and if we can't then we just need to free up our space
* and not satisfy any requests.
*/
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
if (used - num_bytes >= space_info->total_bytes)
check_overcommit = true;
again:
while (!list_empty(head) && num_bytes) {
ticket = list_first_entry(head, struct reserve_ticket,
list);
/*
* We use 0 bytes because this space is already reserved, so
* adding the ticket space would be a double count.
*/
if (check_overcommit &&
!can_overcommit(fs_info->extent_root, space_info, 0,
flush))
break;
if (num_bytes >= ticket->bytes) {
list_del_init(&ticket->list);
num_bytes -= ticket->bytes;
ticket->bytes = 0;
wake_up(&ticket->wait);
} else {
ticket->bytes -= num_bytes;
num_bytes = 0;
}
}
if (num_bytes && head == &space_info->priority_tickets) {
head = &space_info->tickets;
flush = BTRFS_RESERVE_FLUSH_ALL;
goto again;
}
space_info->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, num_bytes, 0);
spin_unlock(&space_info->lock);
}
/*
* This is for newly allocated space that isn't accounted in
* space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent
* we use this helper.
*/
static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 num_bytes)
{
struct reserve_ticket *ticket;
struct list_head *head = &space_info->priority_tickets;
again:
while (!list_empty(head) && num_bytes) {
ticket = list_first_entry(head, struct reserve_ticket,
list);
if (num_bytes >= ticket->bytes) {
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags,
ticket->bytes, 1);
list_del_init(&ticket->list);
num_bytes -= ticket->bytes;
space_info->bytes_may_use += ticket->bytes;
ticket->bytes = 0;
wake_up(&ticket->wait);
} else {
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags,
num_bytes, 1);
space_info->bytes_may_use += num_bytes;
ticket->bytes -= num_bytes;
num_bytes = 0;
}
}
if (num_bytes && head == &space_info->priority_tickets) {
head = &space_info->tickets;
goto again;
}
}
static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv, struct btrfs_block_rsv *block_rsv,
struct btrfs_block_rsv *dest, u64 num_bytes) struct btrfs_block_rsv *dest, u64 num_bytes)
...@@ -5253,18 +5482,15 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, ...@@ -5253,18 +5482,15 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
} }
spin_unlock(&dest->lock); spin_unlock(&dest->lock);
} }
if (num_bytes) { if (num_bytes)
spin_lock(&space_info->lock); space_info_add_old_bytes(fs_info, space_info,
space_info->bytes_may_use -= num_bytes; num_bytes);
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, num_bytes, 0);
spin_unlock(&space_info->lock);
}
} }
} }
static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
struct btrfs_block_rsv *dst, u64 num_bytes) struct btrfs_block_rsv *dst, u64 num_bytes,
int update_size)
{ {
int ret; int ret;
...@@ -5272,7 +5498,7 @@ static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, ...@@ -5272,7 +5498,7 @@ static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
if (ret) if (ret)
return ret; return ret;
block_rsv_add_bytes(dst, num_bytes, 1); block_rsv_add_bytes(dst, num_bytes, update_size);
return 0; return 0;
} }
...@@ -5379,13 +5605,6 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, ...@@ -5379,13 +5605,6 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
return ret; return ret;
} }
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes)
{
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
void btrfs_block_rsv_release(struct btrfs_root *root, void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, struct btrfs_block_rsv *block_rsv,
u64 num_bytes) u64 num_bytes)
...@@ -5398,48 +5617,21 @@ void btrfs_block_rsv_release(struct btrfs_root *root, ...@@ -5398,48 +5617,21 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
num_bytes); num_bytes);
} }
/*
* helper to calculate size of global block reservation.
* the desired value is sum of space used by extent tree,
* checksum tree and root tree
*/
static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
{
struct btrfs_space_info *sinfo;
u64 num_bytes;
u64 meta_used;
u64 data_used;
int csum_size = btrfs_super_csum_size(fs_info->super_copy);
sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
spin_lock(&sinfo->lock);
data_used = sinfo->bytes_used;
spin_unlock(&sinfo->lock);
sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
spin_lock(&sinfo->lock);
if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
data_used = 0;
meta_used = sinfo->bytes_used;
spin_unlock(&sinfo->lock);
num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
csum_size * 2;
num_bytes += div_u64(data_used + meta_used, 50);
if (num_bytes * 3 > meta_used)
num_bytes = div_u64(meta_used, 3);
return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
}
static void update_global_block_rsv(struct btrfs_fs_info *fs_info) static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
{ {
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
struct btrfs_space_info *sinfo = block_rsv->space_info; struct btrfs_space_info *sinfo = block_rsv->space_info;
u64 num_bytes; u64 num_bytes;
num_bytes = calc_global_metadata_size(fs_info); /*
* The global block rsv is based on the size of the extent tree, the
* checksum tree and the root tree. If the fs is empty we want to set
* it to a minimal amount for safety.
*/
num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
btrfs_root_used(&fs_info->csum_root->root_item) +
btrfs_root_used(&fs_info->tree_root->root_item);
num_bytes = max_t(u64, num_bytes, SZ_16M);
spin_lock(&sinfo->lock); spin_lock(&sinfo->lock);
spin_lock(&block_rsv->lock); spin_lock(&block_rsv->lock);
...@@ -5554,7 +5746,13 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, ...@@ -5554,7 +5746,13 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
struct inode *inode) struct inode *inode)
{ {
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); /*
* We always use trans->block_rsv here as we will have reserved space
* for our orphan when starting the transaction, using get_block_rsv()
* here will sometimes make us choose the wrong block rsv as we could be
* doing a reloc inode for a non refcounted root.
*/
struct btrfs_block_rsv *src_rsv = trans->block_rsv;
struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
/* /*
...@@ -5565,7 +5763,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, ...@@ -5565,7 +5763,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
trace_btrfs_space_reservation(root->fs_info, "orphan", trace_btrfs_space_reservation(root->fs_info, "orphan",
btrfs_ino(inode), num_bytes, 1); btrfs_ino(inode), num_bytes, 1);
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
} }
void btrfs_orphan_release_metadata(struct inode *inode) void btrfs_orphan_release_metadata(struct inode *inode)
...@@ -5620,7 +5818,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, ...@@ -5620,7 +5818,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
BTRFS_RESERVE_FLUSH_ALL); BTRFS_RESERVE_FLUSH_ALL);
if (ret == -ENOSPC && use_global_rsv) if (ret == -ENOSPC && use_global_rsv)
ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes); ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
if (ret && *qgroup_reserved) if (ret && *qgroup_reserved)
btrfs_qgroup_free_meta(root, *qgroup_reserved); btrfs_qgroup_free_meta(root, *qgroup_reserved);
...@@ -5730,21 +5928,26 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) ...@@ -5730,21 +5928,26 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
u64 to_reserve = 0; u64 to_reserve = 0;
u64 csum_bytes; u64 csum_bytes;
unsigned nr_extents = 0; unsigned nr_extents = 0;
int extra_reserve = 0;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0; int ret = 0;
bool delalloc_lock = true; bool delalloc_lock = true;
u64 to_free = 0; u64 to_free = 0;
unsigned dropped; unsigned dropped;
bool release_extra = false;
/* If we are a free space inode we need to not flush since we will be in /* If we are a free space inode we need to not flush since we will be in
* the middle of a transaction commit. We also don't need the delalloc * the middle of a transaction commit. We also don't need the delalloc
* mutex since we won't race with anybody. We need this mostly to make * mutex since we won't race with anybody. We need this mostly to make
* lockdep shut its filthy mouth. * lockdep shut its filthy mouth.
*
* If we have a transaction open (can happen if we call truncate_block
* from truncate), then we need FLUSH_LIMIT so we don't deadlock.
*/ */
if (btrfs_is_free_space_inode(inode)) { if (btrfs_is_free_space_inode(inode)) {
flush = BTRFS_RESERVE_NO_FLUSH; flush = BTRFS_RESERVE_NO_FLUSH;
delalloc_lock = false; delalloc_lock = false;
} else if (current->journal_info) {
flush = BTRFS_RESERVE_FLUSH_LIMIT;
} }
if (flush != BTRFS_RESERVE_NO_FLUSH && if (flush != BTRFS_RESERVE_NO_FLUSH &&
...@@ -5761,24 +5964,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) ...@@ -5761,24 +5964,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE); BTRFS_MAX_EXTENT_SIZE);
BTRFS_I(inode)->outstanding_extents += nr_extents; BTRFS_I(inode)->outstanding_extents += nr_extents;
nr_extents = 0;
nr_extents = 0;
if (BTRFS_I(inode)->outstanding_extents > if (BTRFS_I(inode)->outstanding_extents >
BTRFS_I(inode)->reserved_extents) BTRFS_I(inode)->reserved_extents)
nr_extents = BTRFS_I(inode)->outstanding_extents - nr_extents += BTRFS_I(inode)->outstanding_extents -
BTRFS_I(inode)->reserved_extents; BTRFS_I(inode)->reserved_extents;
/* /* We always want to reserve a slot for updating the inode. */
* Add an item to reserve for updating the inode when we complete the to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents + 1);
* delalloc io.
*/
if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags)) {
nr_extents++;
extra_reserve = 1;
}
to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
csum_bytes = BTRFS_I(inode)->csum_bytes; csum_bytes = BTRFS_I(inode)->csum_bytes;
spin_unlock(&BTRFS_I(inode)->lock); spin_unlock(&BTRFS_I(inode)->lock);
...@@ -5790,17 +5984,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) ...@@ -5790,17 +5984,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
goto out_fail; goto out_fail;
} }
ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush);
if (unlikely(ret)) { if (unlikely(ret)) {
btrfs_qgroup_free_meta(root, nr_extents * root->nodesize); btrfs_qgroup_free_meta(root, nr_extents * root->nodesize);
goto out_fail; goto out_fail;
} }
spin_lock(&BTRFS_I(inode)->lock); spin_lock(&BTRFS_I(inode)->lock);
if (extra_reserve) { if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
set_bit(BTRFS_INODE_DELALLOC_META_RESERVED, &BTRFS_I(inode)->runtime_flags)) {
&BTRFS_I(inode)->runtime_flags); to_reserve -= btrfs_calc_trans_metadata_size(root, 1);
nr_extents--; release_extra = true;
} }
BTRFS_I(inode)->reserved_extents += nr_extents; BTRFS_I(inode)->reserved_extents += nr_extents;
spin_unlock(&BTRFS_I(inode)->lock); spin_unlock(&BTRFS_I(inode)->lock);
...@@ -5811,8 +6005,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) ...@@ -5811,8 +6005,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
if (to_reserve) if (to_reserve)
trace_btrfs_space_reservation(root->fs_info, "delalloc", trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), to_reserve, 1); btrfs_ino(inode), to_reserve, 1);
block_rsv_add_bytes(block_rsv, to_reserve, 1); if (release_extra)
btrfs_block_rsv_release(root, block_rsv,
btrfs_calc_trans_metadata_size(root,
1));
return 0; return 0;
out_fail: out_fail:
...@@ -6044,6 +6240,9 @@ static int update_block_group(struct btrfs_trans_handle *trans, ...@@ -6044,6 +6240,9 @@ static int update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock); spin_unlock(&cache->space_info->lock);
trace_btrfs_space_reservation(root->fs_info, "pinned",
cache->space_info->flags,
num_bytes, 1);
set_extent_dirty(info->pinned_extents, set_extent_dirty(info->pinned_extents,
bytenr, bytenr + num_bytes - 1, bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL); GFP_NOFS | __GFP_NOFAIL);
...@@ -6118,10 +6317,10 @@ static int pin_down_extent(struct btrfs_root *root, ...@@ -6118,10 +6317,10 @@ static int pin_down_extent(struct btrfs_root *root,
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock); spin_unlock(&cache->space_info->lock);
trace_btrfs_space_reservation(root->fs_info, "pinned",
cache->space_info->flags, num_bytes, 1);
set_extent_dirty(root->fs_info->pinned_extents, bytenr, set_extent_dirty(root->fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
if (reserved)
trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
return 0; return 0;
} }
...@@ -6476,6 +6675,9 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, ...@@ -6476,6 +6675,9 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
spin_lock(&cache->lock); spin_lock(&cache->lock);
cache->pinned -= len; cache->pinned -= len;
space_info->bytes_pinned -= len; space_info->bytes_pinned -= len;
trace_btrfs_space_reservation(fs_info, "pinned",
space_info->flags, len, 0);
space_info->max_extent_size = 0; space_info->max_extent_size = 0;
percpu_counter_add(&space_info->total_bytes_pinned, -len); percpu_counter_add(&space_info->total_bytes_pinned, -len);
if (cache->ro) { if (cache->ro) {
...@@ -6483,17 +6685,29 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, ...@@ -6483,17 +6685,29 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
readonly = true; readonly = true;
} }
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
if (!readonly && global_rsv->space_info == space_info) { if (!readonly && return_free_space &&
global_rsv->space_info == space_info) {
u64 to_add = len;
WARN_ON(!return_free_space);
spin_lock(&global_rsv->lock); spin_lock(&global_rsv->lock);
if (!global_rsv->full) { if (!global_rsv->full) {
len = min(len, global_rsv->size - to_add = min(len, global_rsv->size -
global_rsv->reserved); global_rsv->reserved);
global_rsv->reserved += len; global_rsv->reserved += to_add;
space_info->bytes_may_use += len; space_info->bytes_may_use += to_add;
if (global_rsv->reserved >= global_rsv->size) if (global_rsv->reserved >= global_rsv->size)
global_rsv->full = 1; global_rsv->full = 1;
trace_btrfs_space_reservation(fs_info,
"space_info",
space_info->flags,
to_add, 1);
len -= to_add;
} }
spin_unlock(&global_rsv->lock); spin_unlock(&global_rsv->lock);
/* Add to any tickets we may have */
if (len)
space_info_add_new_bytes(fs_info, space_info,
len);
} }
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
} }
...@@ -7782,12 +7996,10 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, ...@@ -7782,12 +7996,10 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
ret = btrfs_discard_extent(root, start, len, NULL); ret = btrfs_discard_extent(root, start, len, NULL);
btrfs_add_free_space(cache, start, len); btrfs_add_free_space(cache, start, len);
btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
trace_btrfs_reserved_extent_free(root, start, len);
} }
btrfs_put_block_group(cache); btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(root, start, len);
return ret; return ret;
} }
...@@ -9791,13 +10003,15 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) ...@@ -9791,13 +10003,15 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
space_info = list_entry(info->space_info.next, space_info = list_entry(info->space_info.next,
struct btrfs_space_info, struct btrfs_space_info,
list); list);
if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
/*
* Do not hide this behind enospc_debug, this is actually
* important and indicates a real bug if this happens.
*/
if (WARN_ON(space_info->bytes_pinned > 0 || if (WARN_ON(space_info->bytes_pinned > 0 ||
space_info->bytes_reserved > 0 || space_info->bytes_reserved > 0 ||
space_info->bytes_may_use > 0)) { space_info->bytes_may_use > 0))
dump_space_info(space_info, 0, 0); dump_space_info(space_info, 0, 0);
}
}
list_del(&space_info->list); list_del(&space_info->list);
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
struct kobject *kobj; struct kobject *kobj;
...@@ -10005,9 +10219,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) ...@@ -10005,9 +10219,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
goto error; goto error;
} }
trace_btrfs_add_block_group(root->fs_info, cache, 0);
ret = update_space_info(info, cache->flags, found_key.offset, ret = update_space_info(info, cache->flags, found_key.offset,
btrfs_block_group_used(&cache->item), btrfs_block_group_used(&cache->item),
&space_info); cache->bytes_super, &space_info);
if (ret) { if (ret) {
btrfs_remove_free_space_cache(cache); btrfs_remove_free_space_cache(cache);
spin_lock(&info->block_group_cache_lock); spin_lock(&info->block_group_cache_lock);
...@@ -10020,9 +10235,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) ...@@ -10020,9 +10235,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
} }
cache->space_info = space_info; cache->space_info = space_info;
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_readonly += cache->bytes_super;
spin_unlock(&cache->space_info->lock);
__link_block_group(space_info, cache); __link_block_group(space_info, cache);
...@@ -10114,7 +10326,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ...@@ -10114,7 +10326,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
int ret; int ret;
struct btrfs_root *extent_root; struct btrfs_root *extent_root;
struct btrfs_block_group_cache *cache; struct btrfs_block_group_cache *cache;
extent_root = root->fs_info->extent_root; extent_root = root->fs_info->extent_root;
btrfs_set_log_full_commit(root->fs_info, trans); btrfs_set_log_full_commit(root->fs_info, trans);
...@@ -10160,7 +10371,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ...@@ -10160,7 +10371,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
* assigned to our block group, but don't update its counters just yet. * assigned to our block group, but don't update its counters just yet.
* We want our bg to be added to the rbtree with its ->space_info set. * We want our bg to be added to the rbtree with its ->space_info set.
*/ */
ret = update_space_info(root->fs_info, cache->flags, 0, 0, ret = update_space_info(root->fs_info, cache->flags, 0, 0, 0,
&cache->space_info); &cache->space_info);
if (ret) { if (ret) {
btrfs_remove_free_space_cache(cache); btrfs_remove_free_space_cache(cache);
...@@ -10179,8 +10390,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ...@@ -10179,8 +10390,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
* Now that our block group has its ->space_info set and is inserted in * Now that our block group has its ->space_info set and is inserted in
* the rbtree, update the space info's counters. * the rbtree, update the space info's counters.
*/ */
trace_btrfs_add_block_group(root->fs_info, cache, 1);
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info); cache->bytes_super, &cache->space_info);
if (ret) { if (ret) {
btrfs_remove_free_space_cache(cache); btrfs_remove_free_space_cache(cache);
spin_lock(&root->fs_info->block_group_cache_lock); spin_lock(&root->fs_info->block_group_cache_lock);
...@@ -10193,16 +10405,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ...@@ -10193,16 +10405,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
} }
update_global_block_rsv(root->fs_info); update_global_block_rsv(root->fs_info);
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_readonly += cache->bytes_super;
spin_unlock(&cache->space_info->lock);
__link_block_group(cache->space_info, cache); __link_block_group(cache->space_info, cache);
list_add_tail(&cache->bg_list, &trans->new_bgs); list_add_tail(&cache->bg_list, &trans->new_bgs);
set_avail_alloc_bits(extent_root->fs_info, type); set_avail_alloc_bits(extent_root->fs_info, type);
return 0; return 0;
} }
...@@ -10747,21 +10954,21 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info) ...@@ -10747,21 +10954,21 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
mixed = 1; mixed = 1;
flags = BTRFS_BLOCK_GROUP_SYSTEM; flags = BTRFS_BLOCK_GROUP_SYSTEM;
ret = update_space_info(fs_info, flags, 0, 0, &space_info); ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
if (ret) if (ret)
goto out; goto out;
if (mixed) { if (mixed) {
flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
ret = update_space_info(fs_info, flags, 0, 0, &space_info); ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
} else { } else {
flags = BTRFS_BLOCK_GROUP_METADATA; flags = BTRFS_BLOCK_GROUP_METADATA;
ret = update_space_info(fs_info, flags, 0, 0, &space_info); ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
if (ret) if (ret)
goto out; goto out;
flags = BTRFS_BLOCK_GROUP_DATA; flags = BTRFS_BLOCK_GROUP_DATA;
ret = update_space_info(fs_info, flags, 0, 0, &space_info); ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
} }
out: out:
return ret; return ret;
......
...@@ -1629,13 +1629,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, ...@@ -1629,13 +1629,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
* managed to copy. * managed to copy.
*/ */
if (num_sectors > dirty_sectors) { if (num_sectors > dirty_sectors) {
/*
* we round down because we don't want to count /* release everything except the sectors we dirtied */
* any partial blocks actually sent through the release_bytes -= dirty_sectors <<
* IO machines root->fs_info->sb->s_blocksize_bits;
*/
release_bytes = round_down(release_bytes - copied,
root->sectorsize);
if (copied > 0) { if (copied > 0) {
spin_lock(&BTRFS_I(inode)->lock); spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++; BTRFS_I(inode)->outstanding_extents++;
...@@ -2479,7 +2477,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) ...@@ -2479,7 +2477,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
} }
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
min_size); min_size, 0);
BUG_ON(ret); BUG_ON(ret);
trans->block_rsv = rsv; trans->block_rsv = rsv;
...@@ -2522,7 +2520,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) ...@@ -2522,7 +2520,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
} }
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
rsv, min_size); rsv, min_size, 0);
BUG_ON(ret); /* shouldn't happen */ BUG_ON(ret); /* shouldn't happen */
trans->block_rsv = rsv; trans->block_rsv = rsv;
......
...@@ -5263,7 +5263,7 @@ void btrfs_evict_inode(struct inode *inode) ...@@ -5263,7 +5263,7 @@ void btrfs_evict_inode(struct inode *inode)
if (steal_from_global) { if (steal_from_global) {
if (!btrfs_check_space_for_delayed_refs(trans, root)) if (!btrfs_check_space_for_delayed_refs(trans, root))
ret = btrfs_block_rsv_migrate(global_rsv, rsv, ret = btrfs_block_rsv_migrate(global_rsv, rsv,
min_size); min_size, 0);
else else
ret = -ENOSPC; ret = -ENOSPC;
} }
...@@ -9116,7 +9116,7 @@ static int btrfs_truncate(struct inode *inode) ...@@ -9116,7 +9116,7 @@ static int btrfs_truncate(struct inode *inode)
/* Migrate the slack space for the truncate to our reserve */ /* Migrate the slack space for the truncate to our reserve */
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
min_size); min_size, 0);
BUG_ON(ret); BUG_ON(ret);
/* /*
...@@ -9156,7 +9156,7 @@ static int btrfs_truncate(struct inode *inode) ...@@ -9156,7 +9156,7 @@ static int btrfs_truncate(struct inode *inode)
} }
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
rsv, min_size); rsv, min_size, 0);
BUG_ON(ret); /* shouldn't happen */ BUG_ON(ret); /* shouldn't happen */
trans->block_rsv = rsv; trans->block_rsv = rsv;
} }
...@@ -9177,7 +9177,6 @@ static int btrfs_truncate(struct inode *inode) ...@@ -9177,7 +9177,6 @@ static int btrfs_truncate(struct inode *inode)
ret = btrfs_end_transaction(trans, root); ret = btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root); btrfs_btree_balance_dirty(root);
} }
out: out:
btrfs_free_block_rsv(root, rsv); btrfs_free_block_rsv(root, rsv);
......
...@@ -2604,25 +2604,28 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans, ...@@ -2604,25 +2604,28 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
trans->block_rsv = rc->block_rsv; trans->block_rsv = rc->block_rsv;
rc->reserved_bytes += num_bytes; rc->reserved_bytes += num_bytes;
/*
* We are under a transaction here so we can only do limited flushing.
* If we get an enospc just kick back -EAGAIN so we know to drop the
* transaction and try to refill when we can flush all the things.
*/
ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes, ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
BTRFS_RESERVE_FLUSH_ALL); BTRFS_RESERVE_FLUSH_LIMIT);
if (ret) { if (ret) {
if (ret == -EAGAIN) { tmp = rc->extent_root->nodesize * RELOCATION_RESERVED_NODES;
tmp = rc->extent_root->nodesize *
RELOCATION_RESERVED_NODES;
while (tmp <= rc->reserved_bytes) while (tmp <= rc->reserved_bytes)
tmp <<= 1; tmp <<= 1;
/* /*
* only one thread can access block_rsv at this point, * only one thread can access block_rsv at this point,
* so we don't need hold lock to protect block_rsv. * so we don't need hold lock to protect block_rsv.
* we expand more reservation size here to allow enough * we expand more reservation size here to allow enough
* space for relocation and we will return earlier in * space for relocation and we will return eailer in
* enospc case. * enospc case.
*/ */
rc->block_rsv->size = tmp + rc->extent_root->nodesize * rc->block_rsv->size = tmp + rc->extent_root->nodesize *
RELOCATION_RESERVED_NODES; RELOCATION_RESERVED_NODES;
} return -EAGAIN;
return ret;
} }
return 0; return 0;
...@@ -3871,6 +3874,7 @@ static noinline_for_stack ...@@ -3871,6 +3874,7 @@ static noinline_for_stack
int prepare_to_relocate(struct reloc_control *rc) int prepare_to_relocate(struct reloc_control *rc)
{ {
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
int ret;
rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root,
BTRFS_BLOCK_RSV_TEMP); BTRFS_BLOCK_RSV_TEMP);
...@@ -3885,6 +3889,11 @@ int prepare_to_relocate(struct reloc_control *rc) ...@@ -3885,6 +3889,11 @@ int prepare_to_relocate(struct reloc_control *rc)
rc->reserved_bytes = 0; rc->reserved_bytes = 0;
rc->block_rsv->size = rc->extent_root->nodesize * rc->block_rsv->size = rc->extent_root->nodesize *
RELOCATION_RESERVED_NODES; RELOCATION_RESERVED_NODES;
ret = btrfs_block_rsv_refill(rc->extent_root,
rc->block_rsv, rc->block_rsv->size,
BTRFS_RESERVE_FLUSH_ALL);
if (ret)
return ret;
rc->create_reloc_tree = 1; rc->create_reloc_tree = 1;
set_reloc_control(rc); set_reloc_control(rc);
...@@ -4643,7 +4652,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, ...@@ -4643,7 +4652,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
if (rc->merge_reloc_tree) { if (rc->merge_reloc_tree) {
ret = btrfs_block_rsv_migrate(&pending->block_rsv, ret = btrfs_block_rsv_migrate(&pending->block_rsv,
rc->block_rsv, rc->block_rsv,
rc->nodes_relocated); rc->nodes_relocated, 1);
if (ret) if (ret)
return ret; return ret;
} }
......
...@@ -440,6 +440,46 @@ TRACE_EVENT(btrfs_sync_fs, ...@@ -440,6 +440,46 @@ TRACE_EVENT(btrfs_sync_fs,
TP_printk("wait = %d", __entry->wait) TP_printk("wait = %d", __entry->wait)
); );
TRACE_EVENT(btrfs_add_block_group,
TP_PROTO(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group, int create),
TP_ARGS(fs_info, block_group, create),
TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, offset )
__field( u64, size )
__field( u64, flags )
__field( u64, bytes_used )
__field( u64, bytes_super )
__field( int, create )
),
TP_fast_assign(
memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
__entry->offset = block_group->key.objectid;
__entry->size = block_group->key.offset;
__entry->flags = block_group->flags;
__entry->bytes_used =
btrfs_block_group_used(&block_group->item);
__entry->bytes_super = block_group->bytes_super;
__entry->create = create;
),
TP_printk("%pU: block_group offset = %llu, size = %llu, "
"flags = %llu(%s), bytes_used = %llu, bytes_super = %llu, "
"create = %d", __entry->fsid,
(unsigned long long)__entry->offset,
(unsigned long long)__entry->size,
(unsigned long long)__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
(unsigned long long)__entry->bytes_used,
(unsigned long long)__entry->bytes_super, __entry->create)
);
#define show_ref_action(action) \ #define show_ref_action(action) \
__print_symbolic(action, \ __print_symbolic(action, \
{ BTRFS_ADD_DELAYED_REF, "ADD_DELAYED_REF" }, \ { BTRFS_ADD_DELAYED_REF, "ADD_DELAYED_REF" }, \
...@@ -744,6 +784,88 @@ TRACE_EVENT(btrfs_space_reservation, ...@@ -744,6 +784,88 @@ TRACE_EVENT(btrfs_space_reservation,
__entry->bytes) __entry->bytes)
); );
#define show_flush_action(action) \
__print_symbolic(action, \
{ BTRFS_RESERVE_NO_FLUSH, "BTRFS_RESERVE_NO_FLUSH"}, \
{ BTRFS_RESERVE_FLUSH_LIMIT, "BTRFS_RESERVE_FLUSH_LIMIT"}, \
{ BTRFS_RESERVE_FLUSH_ALL, "BTRFS_RESERVE_FLUSH_ALL"})
TRACE_EVENT(btrfs_trigger_flush,
TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 bytes,
int flush, char *reason),
TP_ARGS(fs_info, flags, bytes, flush, reason),
TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, flags )
__field( u64, bytes )
__field( int, flush )
__string( reason, reason )
),
TP_fast_assign(
memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
__entry->flags = flags;
__entry->bytes = bytes;
__entry->flush = flush;
__assign_str(reason, reason)
),
TP_printk("%pU: %s: flush = %d(%s), flags = %llu(%s), bytes = %llu",
__entry->fsid, __get_str(reason), __entry->flush,
show_flush_action(__entry->flush),
(unsigned long long)__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
(unsigned long long)__entry->bytes)
);
#define show_flush_state(state) \
__print_symbolic(state, \
{ FLUSH_DELAYED_ITEMS_NR, "FLUSH_DELAYED_ITEMS_NR"}, \
{ FLUSH_DELAYED_ITEMS, "FLUSH_DELAYED_ITEMS"}, \
{ FLUSH_DELALLOC, "FLUSH_DELALLOC"}, \
{ FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT"}, \
{ ALLOC_CHUNK, "ALLOC_CHUNK"}, \
{ COMMIT_TRANS, "COMMIT_TRANS"})
TRACE_EVENT(btrfs_flush_space,
TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes,
u64 orig_bytes, int state, int ret),
TP_ARGS(fs_info, flags, num_bytes, orig_bytes, state, ret),
TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, flags )
__field( u64, num_bytes )
__field( u64, orig_bytes )
__field( int, state )
__field( int, ret )
),
TP_fast_assign(
memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
__entry->flags = flags;
__entry->num_bytes = num_bytes;
__entry->orig_bytes = orig_bytes;
__entry->state = state;
__entry->ret = ret;
),
TP_printk("%pU: state = %d(%s), flags = %llu(%s), num_bytes = %llu, "
"orig_bytes = %llu, ret = %d", __entry->fsid, __entry->state,
show_flush_state(__entry->state),
(unsigned long long)__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
(unsigned long long)__entry->num_bytes,
(unsigned long long)__entry->orig_bytes, __entry->ret)
);
DECLARE_EVENT_CLASS(btrfs__reserved_extent, DECLARE_EVENT_CLASS(btrfs__reserved_extent,
TP_PROTO(struct btrfs_root *root, u64 start, u64 len), TP_PROTO(struct btrfs_root *root, u64 start, u64 len),
...@@ -751,18 +873,21 @@ DECLARE_EVENT_CLASS(btrfs__reserved_extent, ...@@ -751,18 +873,21 @@ DECLARE_EVENT_CLASS(btrfs__reserved_extent,
TP_ARGS(root, start, len), TP_ARGS(root, start, len),
TP_STRUCT__entry( TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, root_objectid ) __field( u64, root_objectid )
__field( u64, start ) __field( u64, start )
__field( u64, len ) __field( u64, len )
), ),
TP_fast_assign( TP_fast_assign(
memcpy(__entry->fsid, root->fs_info->fsid, BTRFS_UUID_SIZE);
__entry->root_objectid = root->root_key.objectid; __entry->root_objectid = root->root_key.objectid;
__entry->start = start; __entry->start = start;
__entry->len = len; __entry->len = len;
), ),
TP_printk("root = %llu(%s), start = %llu, len = %llu", TP_printk("%pU: root = %llu(%s), start = %llu, len = %llu",
__entry->fsid,
show_root_type(__entry->root_objectid), show_root_type(__entry->root_objectid),
(unsigned long long)__entry->start, (unsigned long long)__entry->start,
(unsigned long long)__entry->len) (unsigned long long)__entry->len)
...@@ -819,6 +944,7 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent, ...@@ -819,6 +944,7 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
TP_ARGS(root, block_group, start, len), TP_ARGS(root, block_group, start, len),
TP_STRUCT__entry( TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, root_objectid ) __field( u64, root_objectid )
__field( u64, bg_objectid ) __field( u64, bg_objectid )
__field( u64, flags ) __field( u64, flags )
...@@ -827,6 +953,7 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent, ...@@ -827,6 +953,7 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
), ),
TP_fast_assign( TP_fast_assign(
memcpy(__entry->fsid, root->fs_info->fsid, BTRFS_UUID_SIZE);
__entry->root_objectid = root->root_key.objectid; __entry->root_objectid = root->root_key.objectid;
__entry->bg_objectid = block_group->key.objectid; __entry->bg_objectid = block_group->key.objectid;
__entry->flags = block_group->flags; __entry->flags = block_group->flags;
...@@ -834,8 +961,8 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent, ...@@ -834,8 +961,8 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
__entry->len = len; __entry->len = len;
), ),
TP_printk("root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), " TP_printk("%pU: root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
"start = %Lu, len = %Lu", "start = %Lu, len = %Lu", __entry->fsid,
show_root_type(__entry->root_objectid), __entry->bg_objectid, show_root_type(__entry->root_objectid), __entry->bg_objectid,
__entry->flags, __print_flags((unsigned long)__entry->flags, __entry->flags, __print_flags((unsigned long)__entry->flags,
"|", BTRFS_GROUP_FLAGS), "|", BTRFS_GROUP_FLAGS),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment