Commit ba929b66 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "This pull is dedicated to Josef's enospc rework, which we've been
  testing for a few releases now.  It fixes some early enospc problems
  and is dramatically faster.

  This also includes an updated fix for the delalloc accounting that
  happens after a fault in copy_from_user.  My patch in v4.7 was almost
  but not quite enough"

* 'for-linus-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: fix delalloc accounting after copy_from_user faults
  Btrfs: avoid deadlocks during reservations in btrfs_truncate_block
  Btrfs: use FLUSH_LIMIT for relocation in reserve_metadata_bytes
  Btrfs: fill relocation block rsv after allocation
  Btrfs: always use trans->block_rsv for orphans
  Btrfs: change how we calculate the global block rsv
  Btrfs: use root when checking need_async_flush
  Btrfs: don't bother kicking async if there's nothing to reclaim
  Btrfs: fix release reserved extents trace points
  Btrfs: add fsid to some tracepoints
  Btrfs: add tracepoints for flush events
  Btrfs: fix delalloc reservation amount tracepoint
  Btrfs: trace pinned extents
  Btrfs: introduce ticketed enospc infrastructure
  Btrfs: add tracepoint for adding block groups
  Btrfs: warn_on for unaccounted spaces
  Btrfs: change delayed reservation fallback behavior
  Btrfs: always reserve metadata for delalloc extents
  Btrfs: fix callers of btrfs_block_rsv_migrate
  Btrfs: add bytes_readonly to the spaceinfo at once
parents c9b95e59 8b8b08cb
......@@ -439,6 +439,8 @@ struct btrfs_space_info {
struct list_head list;
/* Protected by the spinlock 'lock'. */
struct list_head ro_bgs;
struct list_head priority_tickets;
struct list_head tickets;
struct rw_semaphore groups_sem;
/* for block groups in our same type */
......@@ -2624,6 +2626,15 @@ enum btrfs_reserve_flush_enum {
BTRFS_RESERVE_FLUSH_ALL,
};
enum btrfs_flush_state {
FLUSH_DELAYED_ITEMS_NR = 1,
FLUSH_DELAYED_ITEMS = 2,
FLUSH_DELALLOC = 3,
FLUSH_DELALLOC_WAIT = 4,
ALLOC_CHUNK = 5,
COMMIT_TRANS = 6,
};
int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len);
int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len);
......@@ -2661,8 +2672,8 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 min_reserved,
enum btrfs_reserve_flush_enum flush);
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes);
struct btrfs_block_rsv *dst_rsv, u64 num_bytes,
int update_size);
int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *dest, u64 num_bytes,
int min_factor);
......
......@@ -553,7 +553,7 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
dst_rsv = &root->fs_info->delayed_block_rsv;
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
if (!ret) {
trace_btrfs_space_reservation(root->fs_info, "delayed_item",
item->key.objectid,
......@@ -597,6 +597,29 @@ static int btrfs_delayed_inode_reserve_metadata(
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
/*
* If our block_rsv is the delalloc block reserve then check and see if
* we have our extra reservation for updating the inode. If not fall
* through and try to reserve space quickly.
*
* We used to try and steal from the delalloc block rsv or the global
* reserve, but we'd steal a full reservation, which isn't kind. We are
* here through delalloc which means we've likely just cowed down close
* to the leaf that contains the inode, so we would steal less just
* doing the fallback inode update, so if we do end up having to steal
* from the global block rsv we hopefully only steal one or two blocks
* worth which is less likely to hurt us.
*/
if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
spin_lock(&BTRFS_I(inode)->lock);
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags))
release = true;
else
src_rsv = NULL;
spin_unlock(&BTRFS_I(inode)->lock);
}
/*
* btrfs_dirty_inode will update the inode under btrfs_join_transaction
* which doesn't reserve space for speed. This is a problem since we
......@@ -626,51 +649,10 @@ static int btrfs_delayed_inode_reserve_metadata(
num_bytes, 1);
}
return ret;
} else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
spin_lock(&BTRFS_I(inode)->lock);
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags)) {
spin_unlock(&BTRFS_I(inode)->lock);
release = true;
goto migrate;
}
spin_unlock(&BTRFS_I(inode)->lock);
/* Ok we didn't have space pre-reserved. This shouldn't happen
* too often but it can happen if we do delalloc to an existing
* inode which gets dirtied because of the time update, and then
* isn't touched again until after the transaction commits and
* then we try to write out the data. First try to be nice and
* reserve something strictly for us. If not be a pain and try
* to steal from the delalloc block rsv.
*/
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
BTRFS_RESERVE_NO_FLUSH);
if (!ret)
goto out;
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
if (!ret)
goto out;
if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
btrfs_debug(root->fs_info,
"block rsv migrate returned %d", ret);
WARN_ON(1);
}
/*
* Ok this is a problem, let's just steal from the global rsv
* since this really shouldn't happen that often.
*/
ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
dst_rsv, num_bytes);
goto out;
}
migrate:
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
out:
/*
* Migrate only takes a reservation, it doesn't touch the size of the
* block_rsv. This is to simplify people who don't normally have things
......
......@@ -111,6 +111,16 @@ static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int reserved);
static int __reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_space_info *space_info,
u64 orig_bytes,
enum btrfs_reserve_flush_enum flush);
static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 num_bytes);
static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 num_bytes);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
......@@ -3913,6 +3923,7 @@ static const char *alloc_name(u64 flags)
static int update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
u64 bytes_readonly,
struct btrfs_space_info **space_info)
{
struct btrfs_space_info *found;
......@@ -3933,8 +3944,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->disk_total += total_bytes * factor;
found->bytes_used += bytes_used;
found->disk_used += bytes_used * factor;
found->bytes_readonly += bytes_readonly;
if (total_bytes > 0)
found->full = 0;
space_info_add_new_bytes(info, found, total_bytes -
bytes_used - bytes_readonly);
spin_unlock(&found->lock);
*space_info = found;
return 0;
......@@ -3960,7 +3974,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->disk_used = bytes_used * factor;
found->bytes_pinned = 0;
found->bytes_reserved = 0;
found->bytes_readonly = 0;
found->bytes_readonly = bytes_readonly;
found->bytes_may_use = 0;
found->full = 0;
found->max_extent_size = 0;
......@@ -3969,6 +3983,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->flush = 0;
init_waitqueue_head(&found->wait);
INIT_LIST_HEAD(&found->ro_bgs);
INIT_LIST_HEAD(&found->tickets);
INIT_LIST_HEAD(&found->priority_tickets);
ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
info->space_info_kobj, "%s",
......@@ -4470,7 +4486,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
space_info = __find_space_info(extent_root->fs_info, flags);
if (!space_info) {
ret = update_space_info(extent_root->fs_info, flags,
0, 0, &space_info);
0, 0, 0, &space_info);
BUG_ON(ret); /* -ENOMEM */
}
BUG_ON(!space_info); /* Logic error */
......@@ -4582,12 +4598,19 @@ static int can_overcommit(struct btrfs_root *root,
struct btrfs_space_info *space_info, u64 bytes,
enum btrfs_reserve_flush_enum flush)
{
struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
u64 profile = btrfs_get_alloc_profile(root, 0);
struct btrfs_block_rsv *global_rsv;
u64 profile;
u64 space_size;
u64 avail;
u64 used;
/* Don't overcommit when in mixed mode. */
if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
return 0;
BUG_ON(root->fs_info == NULL);
global_rsv = &root->fs_info->global_block_rsv;
profile = btrfs_get_alloc_profile(root, 0);
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly;
......@@ -4739,6 +4762,11 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
spin_unlock(&space_info->lock);
break;
}
if (list_empty(&space_info->tickets) &&
list_empty(&space_info->priority_tickets)) {
spin_unlock(&space_info->lock);
break;
}
spin_unlock(&space_info->lock);
loops++;
......@@ -4807,13 +4835,11 @@ static int may_commit_transaction(struct btrfs_root *root,
return btrfs_commit_transaction(trans, root);
}
enum flush_state {
FLUSH_DELAYED_ITEMS_NR = 1,
FLUSH_DELAYED_ITEMS = 2,
FLUSH_DELALLOC = 3,
FLUSH_DELALLOC_WAIT = 4,
ALLOC_CHUNK = 5,
COMMIT_TRANS = 6,
struct reserve_ticket {
u64 bytes;
int error;
struct list_head list;
wait_queue_head_t wait;
};
static int flush_space(struct btrfs_root *root,
......@@ -4866,6 +4892,8 @@ static int flush_space(struct btrfs_root *root,
break;
}
trace_btrfs_flush_space(root->fs_info, space_info->flags, num_bytes,
orig_bytes, state, ret);
return ret;
}
......@@ -4873,17 +4901,22 @@ static inline u64
btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
struct btrfs_space_info *space_info)
{
struct reserve_ticket *ticket;
u64 used;
u64 expected;
u64 to_reclaim;
u64 to_reclaim = 0;
to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
spin_lock(&space_info->lock);
if (can_overcommit(root, space_info, to_reclaim,
BTRFS_RESERVE_FLUSH_ALL)) {
to_reclaim = 0;
goto out;
}
BTRFS_RESERVE_FLUSH_ALL))
return 0;
list_for_each_entry(ticket, &space_info->tickets, list)
to_reclaim += ticket->bytes;
list_for_each_entry(ticket, &space_info->priority_tickets, list)
to_reclaim += ticket->bytes;
if (to_reclaim)
return to_reclaim;
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
......@@ -4899,14 +4932,11 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
to_reclaim = 0;
to_reclaim = min(to_reclaim, space_info->bytes_may_use +
space_info->bytes_reserved);
out:
spin_unlock(&space_info->lock);
return to_reclaim;
}
static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
struct btrfs_fs_info *fs_info, u64 used)
struct btrfs_root *root, u64 used)
{
u64 thresh = div_factor_fine(space_info->total_bytes, 98);
......@@ -4914,73 +4944,177 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
return 0;
return (used >= thresh && !btrfs_fs_closing(fs_info) &&
!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
if (!btrfs_calc_reclaim_metadata_size(root, space_info))
return 0;
return (used >= thresh && !btrfs_fs_closing(root->fs_info) &&
!test_bit(BTRFS_FS_STATE_REMOUNTING,
&root->fs_info->fs_state));
}
static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
struct btrfs_fs_info *fs_info,
int flush_state)
static void wake_all_tickets(struct list_head *head)
{
u64 used;
spin_lock(&space_info->lock);
/*
* We run out of space and have not got any free space via flush_space,
* so don't bother doing async reclaim.
*/
if (flush_state > COMMIT_TRANS && space_info->full) {
spin_unlock(&space_info->lock);
return 0;
}
struct reserve_ticket *ticket;
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
if (need_do_async_reclaim(space_info, fs_info, used)) {
spin_unlock(&space_info->lock);
return 1;
while (!list_empty(head)) {
ticket = list_first_entry(head, struct reserve_ticket, list);
list_del_init(&ticket->list);
ticket->error = -ENOSPC;
wake_up(&ticket->wait);
}
spin_unlock(&space_info->lock);
return 0;
}
/*
* This is for normal flushers, we can wait all goddamned day if we want to. We
* will loop and continuously try to flush as long as we are making progress.
* We count progress as clearing off tickets each time we have to loop.
*/
static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
{
struct reserve_ticket *last_ticket = NULL;
struct btrfs_fs_info *fs_info;
struct btrfs_space_info *space_info;
u64 to_reclaim;
int flush_state;
int commit_cycles = 0;
fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
spin_lock(&space_info->lock);
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
space_info);
if (!to_reclaim)
if (!to_reclaim) {
space_info->flush = 0;
spin_unlock(&space_info->lock);
return;
}
last_ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list);
spin_unlock(&space_info->lock);
flush_state = FLUSH_DELAYED_ITEMS_NR;
do {
struct reserve_ticket *ticket;
int ret;
ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
to_reclaim, flush_state);
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
space_info->flush = 0;
spin_unlock(&space_info->lock);
return;
}
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
space_info);
ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list);
if (last_ticket == ticket) {
flush_state++;
} else {
last_ticket = ticket;
flush_state = FLUSH_DELAYED_ITEMS_NR;
if (commit_cycles)
commit_cycles--;
}
if (flush_state > COMMIT_TRANS) {
commit_cycles++;
if (commit_cycles > 2) {
wake_all_tickets(&space_info->tickets);
space_info->flush = 0;
} else {
flush_state = FLUSH_DELAYED_ITEMS_NR;
}
}
spin_unlock(&space_info->lock);
} while (flush_state <= COMMIT_TRANS);
}
void btrfs_init_async_reclaim_work(struct work_struct *work)
{
INIT_WORK(work, btrfs_async_reclaim_metadata_space);
}
static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
struct reserve_ticket *ticket)
{
u64 to_reclaim;
int flush_state = FLUSH_DELAYED_ITEMS_NR;
spin_lock(&space_info->lock);
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
space_info);
if (!to_reclaim) {
spin_unlock(&space_info->lock);
return;
}
spin_unlock(&space_info->lock);
do {
flush_space(fs_info->fs_root, space_info, to_reclaim,
to_reclaim, flush_state);
flush_state++;
if (!btrfs_need_do_async_reclaim(space_info, fs_info,
flush_state))
spin_lock(&space_info->lock);
if (ticket->bytes == 0) {
spin_unlock(&space_info->lock);
return;
}
spin_unlock(&space_info->lock);
/*
* Priority flushers can't wait on delalloc without
* deadlocking.
*/
if (flush_state == FLUSH_DELALLOC ||
flush_state == FLUSH_DELALLOC_WAIT)
flush_state = ALLOC_CHUNK;
} while (flush_state < COMMIT_TRANS);
}
void btrfs_init_async_reclaim_work(struct work_struct *work)
static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
struct reserve_ticket *ticket, u64 orig_bytes)
{
INIT_WORK(work, btrfs_async_reclaim_metadata_space);
DEFINE_WAIT(wait);
int ret = 0;
spin_lock(&space_info->lock);
while (ticket->bytes > 0 && ticket->error == 0) {
ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
if (ret) {
ret = -EINTR;
break;
}
spin_unlock(&space_info->lock);
schedule();
finish_wait(&ticket->wait, &wait);
spin_lock(&space_info->lock);
}
if (!ret)
ret = ticket->error;
if (!list_empty(&ticket->list))
list_del_init(&ticket->list);
if (ticket->bytes && ticket->bytes < orig_bytes) {
u64 num_bytes = orig_bytes - ticket->bytes;
space_info->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, num_bytes, 0);
}
spin_unlock(&space_info->lock);
return ret;
}
/**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for
* @block_rsv - the block_rsv we're allocating for
* @space_info - the space info we want to allocate from
* @orig_bytes - the number of bytes we want
* @flush - whether or not we can flush to make our reservation
*
......@@ -4991,81 +5125,36 @@ void btrfs_init_async_reclaim_work(struct work_struct *work)
* regain reservations will be made and this will fail if there is not enough
* space already.
*/
static int reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
static int __reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_space_info *space_info,
u64 orig_bytes,
enum btrfs_reserve_flush_enum flush)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
struct reserve_ticket ticket;
u64 used;
u64 num_bytes = orig_bytes;
int flush_state = FLUSH_DELAYED_ITEMS_NR;
int ret = 0;
bool flushing = false;
again:
ret = 0;
spin_lock(&space_info->lock);
/*
* We only want to wait if somebody other than us is flushing and we
* are actually allowed to flush all things.
*/
while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
space_info->flush) {
spin_unlock(&space_info->lock);
/*
* If we have a trans handle we can't wait because the flusher
* may have to commit the transaction, which would mean we would
* deadlock since we are waiting for the flusher to finish, but
* hold the current transaction open.
*/
if (current->journal_info)
return -EAGAIN;
ret = wait_event_killable(space_info->wait, !space_info->flush);
/* Must have been killed, return */
if (ret)
return -EINTR;
ASSERT(orig_bytes);
ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
spin_lock(&space_info->lock);
}
ret = -ENOSPC;
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
/*
* The idea here is that we've not already over-reserved the block group
* then we can go ahead and save our reservation first and then start
* flushing if we need to. Otherwise if we've already overcommitted
* lets start flushing stuff first and then come back and try to make
* our reservation.
* If we have enough space then hooray, make our reservation and carry
* on. If not see if we can overcommit, and if we can, hooray carry on.
* If not things get more complicated.
*/
if (used <= space_info->total_bytes) {
if (used + orig_bytes <= space_info->total_bytes) {
space_info->bytes_may_use += orig_bytes;
trace_btrfs_space_reservation(root->fs_info,
"space_info", space_info->flags, orig_bytes, 1);
trace_btrfs_space_reservation(root->fs_info, "space_info",
space_info->flags, orig_bytes,
1);
ret = 0;
} else {
/*
* Ok set num_bytes to orig_bytes since we aren't
* overocmmitted, this way we only try and reclaim what
* we need.
*/
num_bytes = orig_bytes;
}
} else {
/*
* Ok we're over committed, set num_bytes to the overcommitted
* amount plus the amount of bytes that we need for this
* reservation.
*/
num_bytes = used - space_info->total_bytes +
(orig_bytes * 2);
}
if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
} else if (can_overcommit(root, space_info, orig_bytes, flush)) {
space_info->bytes_may_use += orig_bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
space_info->flags, orig_bytes,
......@@ -5074,16 +5163,31 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
}
/*
* Couldn't make our reservation, save our place so while we're trying
* to reclaim space we can actually use it instead of somebody else
* stealing it from us.
* If we couldn't make a reservation then setup our reservation ticket
* and kick the async worker if it's not already running.
*
* We make the other tasks wait for the flush only when we can flush
* all things.
* If we are a priority flusher then we just need to add our ticket to
* the list and we will do our own flushing further down.
*/
if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
flushing = true;
ticket.bytes = orig_bytes;
ticket.error = 0;
init_waitqueue_head(&ticket.wait);
if (flush == BTRFS_RESERVE_FLUSH_ALL) {
list_add_tail(&ticket.list, &space_info->tickets);
if (!space_info->flush) {
space_info->flush = 1;
trace_btrfs_trigger_flush(root->fs_info,
space_info->flags,
orig_bytes, flush,
"enospc");
queue_work(system_unbound_wq,
&root->fs_info->async_reclaim_work);
}
} else {
list_add_tail(&ticket.list,
&space_info->priority_tickets);
}
} else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
used += orig_bytes;
/*
......@@ -5092,39 +5196,67 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
* the async reclaim as we will panic.
*/
if (!root->fs_info->log_root_recovering &&
need_do_async_reclaim(space_info, root->fs_info, used) &&
!work_busy(&root->fs_info->async_reclaim_work))
need_do_async_reclaim(space_info, root, used) &&
!work_busy(&root->fs_info->async_reclaim_work)) {
trace_btrfs_trigger_flush(root->fs_info,
space_info->flags,
orig_bytes, flush,
"preempt");
queue_work(system_unbound_wq,
&root->fs_info->async_reclaim_work);
}
}
spin_unlock(&space_info->lock);
if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
goto out;
return ret;
ret = flush_space(root, space_info, num_bytes, orig_bytes,
flush_state);
flush_state++;
if (flush == BTRFS_RESERVE_FLUSH_ALL)
return wait_reserve_ticket(root->fs_info, space_info, &ticket,
orig_bytes);
/*
* If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock
* would happen. So skip delalloc flush.
*/
if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
(flush_state == FLUSH_DELALLOC ||
flush_state == FLUSH_DELALLOC_WAIT))
flush_state = ALLOC_CHUNK;
ret = 0;
priority_reclaim_metadata_space(root->fs_info, space_info, &ticket);
spin_lock(&space_info->lock);
if (ticket.bytes) {
if (ticket.bytes < orig_bytes) {
u64 num_bytes = orig_bytes - ticket.bytes;
space_info->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(root->fs_info,
"space_info", space_info->flags,
num_bytes, 0);
if (!ret)
goto again;
else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
flush_state < COMMIT_TRANS)
goto again;
else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
flush_state <= COMMIT_TRANS)
goto again;
}
list_del_init(&ticket.list);
ret = -ENOSPC;
}
spin_unlock(&space_info->lock);
ASSERT(list_empty(&ticket.list));
return ret;
}
out:
/**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for
* @block_rsv - the block_rsv we're allocating for
* @orig_bytes - the number of bytes we want
* @flush - whether or not we can flush to make our reservation
*
* This will reserve orgi_bytes number of bytes from the space info associated
* with the block_rsv. If there is not enough space it will make an attempt to
* flush out space to make room. It will do this by flushing delalloc if
* possible or committing the transaction. If flush is 0 then no attempts to
* regain reservations will be made and this will fail if there is not enough
* space already.
*/
static int reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 orig_bytes,
enum btrfs_reserve_flush_enum flush)
{
int ret;
ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes,
flush);
if (ret == -ENOSPC &&
unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
struct btrfs_block_rsv *global_rsv =
......@@ -5137,13 +5269,8 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
if (ret == -ENOSPC)
trace_btrfs_space_reservation(root->fs_info,
"space_info:enospc",
space_info->flags, orig_bytes, 1);
if (flushing) {
spin_lock(&space_info->lock);
space_info->flush = 0;
wake_up_all(&space_info->wait);
spin_unlock(&space_info->lock);
}
block_rsv->space_info->flags,
orig_bytes, 1);
return ret;
}
......@@ -5219,6 +5346,108 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
return 0;
}
/*
* This is for space we already have accounted in space_info->bytes_may_use, so
* basically when we're returning space from block_rsv's.
*/
static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 num_bytes)
{
struct reserve_ticket *ticket;
struct list_head *head;
u64 used;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
bool check_overcommit = false;
spin_lock(&space_info->lock);
head = &space_info->priority_tickets;
/*
* If we are over our limit then we need to check and see if we can
* overcommit, and if we can't then we just need to free up our space
* and not satisfy any requests.
*/
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
if (used - num_bytes >= space_info->total_bytes)
check_overcommit = true;
again:
while (!list_empty(head) && num_bytes) {
ticket = list_first_entry(head, struct reserve_ticket,
list);
/*
* We use 0 bytes because this space is already reserved, so
* adding the ticket space would be a double count.
*/
if (check_overcommit &&
!can_overcommit(fs_info->extent_root, space_info, 0,
flush))
break;
if (num_bytes >= ticket->bytes) {
list_del_init(&ticket->list);
num_bytes -= ticket->bytes;
ticket->bytes = 0;
wake_up(&ticket->wait);
} else {
ticket->bytes -= num_bytes;
num_bytes = 0;
}
}
if (num_bytes && head == &space_info->priority_tickets) {
head = &space_info->tickets;
flush = BTRFS_RESERVE_FLUSH_ALL;
goto again;
}
space_info->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, num_bytes, 0);
spin_unlock(&space_info->lock);
}
/*
* This is for newly allocated space that isn't accounted in
* space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent
* we use this helper.
*/
static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 num_bytes)
{
struct reserve_ticket *ticket;
struct list_head *head = &space_info->priority_tickets;
again:
while (!list_empty(head) && num_bytes) {
ticket = list_first_entry(head, struct reserve_ticket,
list);
if (num_bytes >= ticket->bytes) {
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags,
ticket->bytes, 1);
list_del_init(&ticket->list);
num_bytes -= ticket->bytes;
space_info->bytes_may_use += ticket->bytes;
ticket->bytes = 0;
wake_up(&ticket->wait);
} else {
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags,
num_bytes, 1);
space_info->bytes_may_use += num_bytes;
ticket->bytes -= num_bytes;
num_bytes = 0;
}
}
if (num_bytes && head == &space_info->priority_tickets) {
head = &space_info->tickets;
goto again;
}
}
static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
struct btrfs_block_rsv *dest, u64 num_bytes)
......@@ -5253,18 +5482,15 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
}
spin_unlock(&dest->lock);
}
if (num_bytes) {
spin_lock(&space_info->lock);
space_info->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, num_bytes, 0);
spin_unlock(&space_info->lock);
}
if (num_bytes)
space_info_add_old_bytes(fs_info, space_info,
num_bytes);
}
}
static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
struct btrfs_block_rsv *dst, u64 num_bytes)
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
struct btrfs_block_rsv *dst, u64 num_bytes,
int update_size)
{
int ret;
......@@ -5272,7 +5498,7 @@ static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
if (ret)
return ret;
block_rsv_add_bytes(dst, num_bytes, 1);
block_rsv_add_bytes(dst, num_bytes, update_size);
return 0;
}
......@@ -5379,13 +5605,6 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
return ret;
}
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes)
{
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
......@@ -5398,48 +5617,21 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
num_bytes);
}
/*
* helper to calculate size of global block reservation.
* the desired value is sum of space used by extent tree,
* checksum tree and root tree
*/
static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
{
struct btrfs_space_info *sinfo;
u64 num_bytes;
u64 meta_used;
u64 data_used;
int csum_size = btrfs_super_csum_size(fs_info->super_copy);
sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
spin_lock(&sinfo->lock);
data_used = sinfo->bytes_used;
spin_unlock(&sinfo->lock);
sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
spin_lock(&sinfo->lock);
if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
data_used = 0;
meta_used = sinfo->bytes_used;
spin_unlock(&sinfo->lock);
num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
csum_size * 2;
num_bytes += div_u64(data_used + meta_used, 50);
if (num_bytes * 3 > meta_used)
num_bytes = div_u64(meta_used, 3);
return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
}
static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
{
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
struct btrfs_space_info *sinfo = block_rsv->space_info;
u64 num_bytes;
num_bytes = calc_global_metadata_size(fs_info);
/*
* The global block rsv is based on the size of the extent tree, the
* checksum tree and the root tree. If the fs is empty we want to set
* it to a minimal amount for safety.
*/
num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
btrfs_root_used(&fs_info->csum_root->root_item) +
btrfs_root_used(&fs_info->tree_root->root_item);
num_bytes = max_t(u64, num_bytes, SZ_16M);
spin_lock(&sinfo->lock);
spin_lock(&block_rsv->lock);
......@@ -5554,7 +5746,13 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
/*
* We always use trans->block_rsv here as we will have reserved space
* for our orphan when starting the transaction, using get_block_rsv()
* here will sometimes make us choose the wrong block rsv as we could be
* doing a reloc inode for a non refcounted root.
*/
struct btrfs_block_rsv *src_rsv = trans->block_rsv;
struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
/*
......@@ -5565,7 +5763,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
trace_btrfs_space_reservation(root->fs_info, "orphan",
btrfs_ino(inode), num_bytes, 1);
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
}
void btrfs_orphan_release_metadata(struct inode *inode)
......@@ -5620,7 +5818,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
BTRFS_RESERVE_FLUSH_ALL);
if (ret == -ENOSPC && use_global_rsv)
ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
if (ret && *qgroup_reserved)
btrfs_qgroup_free_meta(root, *qgroup_reserved);
......@@ -5730,21 +5928,26 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
u64 to_reserve = 0;
u64 csum_bytes;
unsigned nr_extents = 0;
int extra_reserve = 0;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0;
bool delalloc_lock = true;
u64 to_free = 0;
unsigned dropped;
bool release_extra = false;
/* If we are a free space inode we need to not flush since we will be in
* the middle of a transaction commit. We also don't need the delalloc
* mutex since we won't race with anybody. We need this mostly to make
* lockdep shut its filthy mouth.
*
* If we have a transaction open (can happen if we call truncate_block
* from truncate), then we need FLUSH_LIMIT so we don't deadlock.
*/
if (btrfs_is_free_space_inode(inode)) {
flush = BTRFS_RESERVE_NO_FLUSH;
delalloc_lock = false;
} else if (current->journal_info) {
flush = BTRFS_RESERVE_FLUSH_LIMIT;
}
if (flush != BTRFS_RESERVE_NO_FLUSH &&
......@@ -5761,24 +5964,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
BTRFS_I(inode)->outstanding_extents += nr_extents;
nr_extents = 0;
nr_extents = 0;
if (BTRFS_I(inode)->outstanding_extents >
BTRFS_I(inode)->reserved_extents)
nr_extents = BTRFS_I(inode)->outstanding_extents -
nr_extents += BTRFS_I(inode)->outstanding_extents -
BTRFS_I(inode)->reserved_extents;
/*
* Add an item to reserve for updating the inode when we complete the
* delalloc io.
*/
if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags)) {
nr_extents++;
extra_reserve = 1;
}
to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
/* We always want to reserve a slot for updating the inode. */
to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents + 1);
to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
csum_bytes = BTRFS_I(inode)->csum_bytes;
spin_unlock(&BTRFS_I(inode)->lock);
......@@ -5790,17 +5984,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
goto out_fail;
}
ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush);
if (unlikely(ret)) {
btrfs_qgroup_free_meta(root, nr_extents * root->nodesize);
goto out_fail;
}
spin_lock(&BTRFS_I(inode)->lock);
if (extra_reserve) {
set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags);
nr_extents--;
if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags)) {
to_reserve -= btrfs_calc_trans_metadata_size(root, 1);
release_extra = true;
}
BTRFS_I(inode)->reserved_extents += nr_extents;
spin_unlock(&BTRFS_I(inode)->lock);
......@@ -5811,8 +6005,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
if (to_reserve)
trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), to_reserve, 1);
block_rsv_add_bytes(block_rsv, to_reserve, 1);
if (release_extra)
btrfs_block_rsv_release(root, block_rsv,
btrfs_calc_trans_metadata_size(root,
1));
return 0;
out_fail:
......@@ -6044,6 +6240,9 @@ static int update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
trace_btrfs_space_reservation(root->fs_info, "pinned",
cache->space_info->flags,
num_bytes, 1);
set_extent_dirty(info->pinned_extents,
bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL);
......@@ -6118,10 +6317,10 @@ static int pin_down_extent(struct btrfs_root *root,
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
trace_btrfs_space_reservation(root->fs_info, "pinned",
cache->space_info->flags, num_bytes, 1);
set_extent_dirty(root->fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
if (reserved)
trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
return 0;
}
......@@ -6476,6 +6675,9 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
spin_lock(&cache->lock);
cache->pinned -= len;
space_info->bytes_pinned -= len;
trace_btrfs_space_reservation(fs_info, "pinned",
space_info->flags, len, 0);
space_info->max_extent_size = 0;
percpu_counter_add(&space_info->total_bytes_pinned, -len);
if (cache->ro) {
......@@ -6483,17 +6685,29 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
readonly = true;
}
spin_unlock(&cache->lock);
if (!readonly && global_rsv->space_info == space_info) {
if (!readonly && return_free_space &&
global_rsv->space_info == space_info) {
u64 to_add = len;
WARN_ON(!return_free_space);
spin_lock(&global_rsv->lock);
if (!global_rsv->full) {
len = min(len, global_rsv->size -
to_add = min(len, global_rsv->size -
global_rsv->reserved);
global_rsv->reserved += len;
space_info->bytes_may_use += len;
global_rsv->reserved += to_add;
space_info->bytes_may_use += to_add;
if (global_rsv->reserved >= global_rsv->size)
global_rsv->full = 1;
trace_btrfs_space_reservation(fs_info,
"space_info",
space_info->flags,
to_add, 1);
len -= to_add;
}
spin_unlock(&global_rsv->lock);
/* Add to any tickets we may have */
if (len)
space_info_add_new_bytes(fs_info, space_info,
len);
}
spin_unlock(&space_info->lock);
}
......@@ -7782,12 +7996,10 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
ret = btrfs_discard_extent(root, start, len, NULL);
btrfs_add_free_space(cache, start, len);
btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
trace_btrfs_reserved_extent_free(root, start, len);
}
btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(root, start, len);
return ret;
}
......@@ -9791,13 +10003,15 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
space_info = list_entry(info->space_info.next,
struct btrfs_space_info,
list);
if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
/*
* Do not hide this behind enospc_debug, this is actually
* important and indicates a real bug if this happens.
*/
if (WARN_ON(space_info->bytes_pinned > 0 ||
space_info->bytes_reserved > 0 ||
space_info->bytes_may_use > 0)) {
space_info->bytes_may_use > 0))
dump_space_info(space_info, 0, 0);
}
}
list_del(&space_info->list);
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
struct kobject *kobj;
......@@ -10005,9 +10219,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
goto error;
}
trace_btrfs_add_block_group(root->fs_info, cache, 0);
ret = update_space_info(info, cache->flags, found_key.offset,
btrfs_block_group_used(&cache->item),
&space_info);
cache->bytes_super, &space_info);
if (ret) {
btrfs_remove_free_space_cache(cache);
spin_lock(&info->block_group_cache_lock);
......@@ -10020,9 +10235,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
}
cache->space_info = space_info;
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_readonly += cache->bytes_super;
spin_unlock(&cache->space_info->lock);
__link_block_group(space_info, cache);
......@@ -10114,7 +10326,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
int ret;
struct btrfs_root *extent_root;
struct btrfs_block_group_cache *cache;
extent_root = root->fs_info->extent_root;
btrfs_set_log_full_commit(root->fs_info, trans);
......@@ -10160,7 +10371,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
* assigned to our block group, but don't update its counters just yet.
* We want our bg to be added to the rbtree with its ->space_info set.
*/
ret = update_space_info(root->fs_info, cache->flags, 0, 0,
ret = update_space_info(root->fs_info, cache->flags, 0, 0, 0,
&cache->space_info);
if (ret) {
btrfs_remove_free_space_cache(cache);
......@@ -10179,8 +10390,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
* Now that our block group has its ->space_info set and is inserted in
* the rbtree, update the space info's counters.
*/
trace_btrfs_add_block_group(root->fs_info, cache, 1);
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info);
cache->bytes_super, &cache->space_info);
if (ret) {
btrfs_remove_free_space_cache(cache);
spin_lock(&root->fs_info->block_group_cache_lock);
......@@ -10193,16 +10405,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
}
update_global_block_rsv(root->fs_info);
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_readonly += cache->bytes_super;
spin_unlock(&cache->space_info->lock);
__link_block_group(cache->space_info, cache);
list_add_tail(&cache->bg_list, &trans->new_bgs);
set_avail_alloc_bits(extent_root->fs_info, type);
return 0;
}
......@@ -10747,21 +10954,21 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
mixed = 1;
flags = BTRFS_BLOCK_GROUP_SYSTEM;
ret = update_space_info(fs_info, flags, 0, 0, &space_info);
ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
if (ret)
goto out;
if (mixed) {
flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
ret = update_space_info(fs_info, flags, 0, 0, &space_info);
ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
} else {
flags = BTRFS_BLOCK_GROUP_METADATA;
ret = update_space_info(fs_info, flags, 0, 0, &space_info);
ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
if (ret)
goto out;
flags = BTRFS_BLOCK_GROUP_DATA;
ret = update_space_info(fs_info, flags, 0, 0, &space_info);
ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
}
out:
return ret;
......
......@@ -1629,13 +1629,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
* managed to copy.
*/
if (num_sectors > dirty_sectors) {
/*
* we round down because we don't want to count
* any partial blocks actually sent through the
* IO machines
*/
release_bytes = round_down(release_bytes - copied,
root->sectorsize);
/* release everything except the sectors we dirtied */
release_bytes -= dirty_sectors <<
root->fs_info->sb->s_blocksize_bits;
if (copied > 0) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
......@@ -2479,7 +2477,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
}
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
min_size);
min_size, 0);
BUG_ON(ret);
trans->block_rsv = rsv;
......@@ -2522,7 +2520,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
}
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
rsv, min_size);
rsv, min_size, 0);
BUG_ON(ret); /* shouldn't happen */
trans->block_rsv = rsv;
......
......@@ -5263,7 +5263,7 @@ void btrfs_evict_inode(struct inode *inode)
if (steal_from_global) {
if (!btrfs_check_space_for_delayed_refs(trans, root))
ret = btrfs_block_rsv_migrate(global_rsv, rsv,
min_size);
min_size, 0);
else
ret = -ENOSPC;
}
......@@ -9116,7 +9116,7 @@ static int btrfs_truncate(struct inode *inode)
/* Migrate the slack space for the truncate to our reserve */
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
min_size);
min_size, 0);
BUG_ON(ret);
/*
......@@ -9156,7 +9156,7 @@ static int btrfs_truncate(struct inode *inode)
}
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
rsv, min_size);
rsv, min_size, 0);
BUG_ON(ret); /* shouldn't happen */
trans->block_rsv = rsv;
}
......@@ -9177,7 +9177,6 @@ static int btrfs_truncate(struct inode *inode)
ret = btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
}
out:
btrfs_free_block_rsv(root, rsv);
......
......@@ -2604,25 +2604,28 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
trans->block_rsv = rc->block_rsv;
rc->reserved_bytes += num_bytes;
/*
* We are under a transaction here so we can only do limited flushing.
* If we get an enospc just kick back -EAGAIN so we know to drop the
* transaction and try to refill when we can flush all the things.
*/
ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
BTRFS_RESERVE_FLUSH_ALL);
BTRFS_RESERVE_FLUSH_LIMIT);
if (ret) {
if (ret == -EAGAIN) {
tmp = rc->extent_root->nodesize *
RELOCATION_RESERVED_NODES;
tmp = rc->extent_root->nodesize * RELOCATION_RESERVED_NODES;
while (tmp <= rc->reserved_bytes)
tmp <<= 1;
/*
* only one thread can access block_rsv at this point,
* so we don't need hold lock to protect block_rsv.
* we expand more reservation size here to allow enough
* space for relocation and we will return earlier in
* space for relocation and we will return eailer in
* enospc case.
*/
rc->block_rsv->size = tmp + rc->extent_root->nodesize *
RELOCATION_RESERVED_NODES;
}
return ret;
return -EAGAIN;
}
return 0;
......@@ -3871,6 +3874,7 @@ static noinline_for_stack
int prepare_to_relocate(struct reloc_control *rc)
{
struct btrfs_trans_handle *trans;
int ret;
rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root,
BTRFS_BLOCK_RSV_TEMP);
......@@ -3885,6 +3889,11 @@ int prepare_to_relocate(struct reloc_control *rc)
rc->reserved_bytes = 0;
rc->block_rsv->size = rc->extent_root->nodesize *
RELOCATION_RESERVED_NODES;
ret = btrfs_block_rsv_refill(rc->extent_root,
rc->block_rsv, rc->block_rsv->size,
BTRFS_RESERVE_FLUSH_ALL);
if (ret)
return ret;
rc->create_reloc_tree = 1;
set_reloc_control(rc);
......@@ -4643,7 +4652,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
if (rc->merge_reloc_tree) {
ret = btrfs_block_rsv_migrate(&pending->block_rsv,
rc->block_rsv,
rc->nodes_relocated);
rc->nodes_relocated, 1);
if (ret)
return ret;
}
......
......@@ -440,6 +440,46 @@ TRACE_EVENT(btrfs_sync_fs,
TP_printk("wait = %d", __entry->wait)
);
TRACE_EVENT(btrfs_add_block_group,
TP_PROTO(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group, int create),
TP_ARGS(fs_info, block_group, create),
TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, offset )
__field( u64, size )
__field( u64, flags )
__field( u64, bytes_used )
__field( u64, bytes_super )
__field( int, create )
),
TP_fast_assign(
memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
__entry->offset = block_group->key.objectid;
__entry->size = block_group->key.offset;
__entry->flags = block_group->flags;
__entry->bytes_used =
btrfs_block_group_used(&block_group->item);
__entry->bytes_super = block_group->bytes_super;
__entry->create = create;
),
TP_printk("%pU: block_group offset = %llu, size = %llu, "
"flags = %llu(%s), bytes_used = %llu, bytes_super = %llu, "
"create = %d", __entry->fsid,
(unsigned long long)__entry->offset,
(unsigned long long)__entry->size,
(unsigned long long)__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
(unsigned long long)__entry->bytes_used,
(unsigned long long)__entry->bytes_super, __entry->create)
);
#define show_ref_action(action) \
__print_symbolic(action, \
{ BTRFS_ADD_DELAYED_REF, "ADD_DELAYED_REF" }, \
......@@ -744,6 +784,88 @@ TRACE_EVENT(btrfs_space_reservation,
__entry->bytes)
);
#define show_flush_action(action) \
__print_symbolic(action, \
{ BTRFS_RESERVE_NO_FLUSH, "BTRFS_RESERVE_NO_FLUSH"}, \
{ BTRFS_RESERVE_FLUSH_LIMIT, "BTRFS_RESERVE_FLUSH_LIMIT"}, \
{ BTRFS_RESERVE_FLUSH_ALL, "BTRFS_RESERVE_FLUSH_ALL"})
TRACE_EVENT(btrfs_trigger_flush,
TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 bytes,
int flush, char *reason),
TP_ARGS(fs_info, flags, bytes, flush, reason),
TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, flags )
__field( u64, bytes )
__field( int, flush )
__string( reason, reason )
),
TP_fast_assign(
memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
__entry->flags = flags;
__entry->bytes = bytes;
__entry->flush = flush;
__assign_str(reason, reason)
),
TP_printk("%pU: %s: flush = %d(%s), flags = %llu(%s), bytes = %llu",
__entry->fsid, __get_str(reason), __entry->flush,
show_flush_action(__entry->flush),
(unsigned long long)__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
(unsigned long long)__entry->bytes)
);
#define show_flush_state(state) \
__print_symbolic(state, \
{ FLUSH_DELAYED_ITEMS_NR, "FLUSH_DELAYED_ITEMS_NR"}, \
{ FLUSH_DELAYED_ITEMS, "FLUSH_DELAYED_ITEMS"}, \
{ FLUSH_DELALLOC, "FLUSH_DELALLOC"}, \
{ FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT"}, \
{ ALLOC_CHUNK, "ALLOC_CHUNK"}, \
{ COMMIT_TRANS, "COMMIT_TRANS"})
TRACE_EVENT(btrfs_flush_space,
TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes,
u64 orig_bytes, int state, int ret),
TP_ARGS(fs_info, flags, num_bytes, orig_bytes, state, ret),
TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, flags )
__field( u64, num_bytes )
__field( u64, orig_bytes )
__field( int, state )
__field( int, ret )
),
TP_fast_assign(
memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
__entry->flags = flags;
__entry->num_bytes = num_bytes;
__entry->orig_bytes = orig_bytes;
__entry->state = state;
__entry->ret = ret;
),
TP_printk("%pU: state = %d(%s), flags = %llu(%s), num_bytes = %llu, "
"orig_bytes = %llu, ret = %d", __entry->fsid, __entry->state,
show_flush_state(__entry->state),
(unsigned long long)__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
(unsigned long long)__entry->num_bytes,
(unsigned long long)__entry->orig_bytes, __entry->ret)
);
DECLARE_EVENT_CLASS(btrfs__reserved_extent,
TP_PROTO(struct btrfs_root *root, u64 start, u64 len),
......@@ -751,18 +873,21 @@ DECLARE_EVENT_CLASS(btrfs__reserved_extent,
TP_ARGS(root, start, len),
TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, root_objectid )
__field( u64, start )
__field( u64, len )
),
TP_fast_assign(
memcpy(__entry->fsid, root->fs_info->fsid, BTRFS_UUID_SIZE);
__entry->root_objectid = root->root_key.objectid;
__entry->start = start;
__entry->len = len;
),
TP_printk("root = %llu(%s), start = %llu, len = %llu",
TP_printk("%pU: root = %llu(%s), start = %llu, len = %llu",
__entry->fsid,
show_root_type(__entry->root_objectid),
(unsigned long long)__entry->start,
(unsigned long long)__entry->len)
......@@ -819,6 +944,7 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
TP_ARGS(root, block_group, start, len),
TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, root_objectid )
__field( u64, bg_objectid )
__field( u64, flags )
......@@ -827,6 +953,7 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
),
TP_fast_assign(
memcpy(__entry->fsid, root->fs_info->fsid, BTRFS_UUID_SIZE);
__entry->root_objectid = root->root_key.objectid;
__entry->bg_objectid = block_group->key.objectid;
__entry->flags = block_group->flags;
......@@ -834,8 +961,8 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
__entry->len = len;
),
TP_printk("root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
"start = %Lu, len = %Lu",
TP_printk("%pU: root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
"start = %Lu, len = %Lu", __entry->fsid,
show_root_type(__entry->root_objectid), __entry->bg_objectid,
__entry->flags, __print_flags((unsigned long)__entry->flags,
"|", BTRFS_GROUP_FLAGS),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment