Commit ba929b66 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "This pull is dedicated to Josef's enospc rework, which we've been
  testing for a few releases now.  It fixes some early enospc problems
  and is dramatically faster.

  This also includes an updated fix for the delalloc accounting that
  happens after a fault in copy_from_user.  My patch in v4.7 was almost
  but not quite enough"

* 'for-linus-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: fix delalloc accounting after copy_from_user faults
  Btrfs: avoid deadlocks during reservations in btrfs_truncate_block
  Btrfs: use FLUSH_LIMIT for relocation in reserve_metadata_bytes
  Btrfs: fill relocation block rsv after allocation
  Btrfs: always use trans->block_rsv for orphans
  Btrfs: change how we calculate the global block rsv
  Btrfs: use root when checking need_async_flush
  Btrfs: don't bother kicking async if there's nothing to reclaim
  Btrfs: fix release reserved extents trace points
  Btrfs: add fsid to some tracepoints
  Btrfs: add tracepoints for flush events
  Btrfs: fix delalloc reservation amount tracepoint
  Btrfs: trace pinned extents
  Btrfs: introduce ticketed enospc infrastructure
  Btrfs: add tracepoint for adding block groups
  Btrfs: warn_on for unaccounted spaces
  Btrfs: change delayed reservation fallback behavior
  Btrfs: always reserve metadata for delalloc extents
  Btrfs: fix callers of btrfs_block_rsv_migrate
  Btrfs: add bytes_readonly to the spaceinfo at once
parents c9b95e59 8b8b08cb
......@@ -439,6 +439,8 @@ struct btrfs_space_info {
struct list_head list;
/* Protected by the spinlock 'lock'. */
struct list_head ro_bgs;
struct list_head priority_tickets;
struct list_head tickets;
struct rw_semaphore groups_sem;
/* for block groups in our same type */
......@@ -2624,6 +2626,15 @@ enum btrfs_reserve_flush_enum {
BTRFS_RESERVE_FLUSH_ALL,
};
enum btrfs_flush_state {
FLUSH_DELAYED_ITEMS_NR = 1,
FLUSH_DELAYED_ITEMS = 2,
FLUSH_DELALLOC = 3,
FLUSH_DELALLOC_WAIT = 4,
ALLOC_CHUNK = 5,
COMMIT_TRANS = 6,
};
int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len);
int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len);
......@@ -2661,8 +2672,8 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 min_reserved,
enum btrfs_reserve_flush_enum flush);
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes);
struct btrfs_block_rsv *dst_rsv, u64 num_bytes,
int update_size);
int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *dest, u64 num_bytes,
int min_factor);
......
......@@ -553,7 +553,7 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
dst_rsv = &root->fs_info->delayed_block_rsv;
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
if (!ret) {
trace_btrfs_space_reservation(root->fs_info, "delayed_item",
item->key.objectid,
......@@ -597,6 +597,29 @@ static int btrfs_delayed_inode_reserve_metadata(
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
/*
* If our block_rsv is the delalloc block reserve then check and see if
* we have our extra reservation for updating the inode. If not fall
* through and try to reserve space quickly.
*
* We used to try and steal from the delalloc block rsv or the global
* reserve, but we'd steal a full reservation, which isn't kind. We are
* here through delalloc which means we've likely just cowed down close
* to the leaf that contains the inode, so we would steal less just
* doing the fallback inode update, so if we do end up having to steal
* from the global block rsv we hopefully only steal one or two blocks
* worth which is less likely to hurt us.
*/
if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
spin_lock(&BTRFS_I(inode)->lock);
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags))
release = true;
else
src_rsv = NULL;
spin_unlock(&BTRFS_I(inode)->lock);
}
/*
* btrfs_dirty_inode will update the inode under btrfs_join_transaction
* which doesn't reserve space for speed. This is a problem since we
......@@ -626,51 +649,10 @@ static int btrfs_delayed_inode_reserve_metadata(
num_bytes, 1);
}
return ret;
} else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
spin_lock(&BTRFS_I(inode)->lock);
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags)) {
spin_unlock(&BTRFS_I(inode)->lock);
release = true;
goto migrate;
}
spin_unlock(&BTRFS_I(inode)->lock);
/* Ok we didn't have space pre-reserved. This shouldn't happen
* too often but it can happen if we do delalloc to an existing
* inode which gets dirtied because of the time update, and then
* isn't touched again until after the transaction commits and
* then we try to write out the data. First try to be nice and
* reserve something strictly for us. If not be a pain and try
* to steal from the delalloc block rsv.
*/
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
BTRFS_RESERVE_NO_FLUSH);
if (!ret)
goto out;
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
if (!ret)
goto out;
if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
btrfs_debug(root->fs_info,
"block rsv migrate returned %d", ret);
WARN_ON(1);
}
/*
* Ok this is a problem, let's just steal from the global rsv
* since this really shouldn't happen that often.
*/
ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
dst_rsv, num_bytes);
goto out;
}
migrate:
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
out:
/*
* Migrate only takes a reservation, it doesn't touch the size of the
* block_rsv. This is to simplify people who don't normally have things
......
This diff is collapsed.
......@@ -1629,13 +1629,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
* managed to copy.
*/
if (num_sectors > dirty_sectors) {
/*
* we round down because we don't want to count
* any partial blocks actually sent through the
* IO machines
*/
release_bytes = round_down(release_bytes - copied,
root->sectorsize);
/* release everything except the sectors we dirtied */
release_bytes -= dirty_sectors <<
root->fs_info->sb->s_blocksize_bits;
if (copied > 0) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
......@@ -2479,7 +2477,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
}
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
min_size);
min_size, 0);
BUG_ON(ret);
trans->block_rsv = rsv;
......@@ -2522,7 +2520,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
}
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
rsv, min_size);
rsv, min_size, 0);
BUG_ON(ret); /* shouldn't happen */
trans->block_rsv = rsv;
......
......@@ -5263,7 +5263,7 @@ void btrfs_evict_inode(struct inode *inode)
if (steal_from_global) {
if (!btrfs_check_space_for_delayed_refs(trans, root))
ret = btrfs_block_rsv_migrate(global_rsv, rsv,
min_size);
min_size, 0);
else
ret = -ENOSPC;
}
......@@ -9116,7 +9116,7 @@ static int btrfs_truncate(struct inode *inode)
/* Migrate the slack space for the truncate to our reserve */
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
min_size);
min_size, 0);
BUG_ON(ret);
/*
......@@ -9156,7 +9156,7 @@ static int btrfs_truncate(struct inode *inode)
}
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
rsv, min_size);
rsv, min_size, 0);
BUG_ON(ret); /* shouldn't happen */
trans->block_rsv = rsv;
}
......@@ -9177,7 +9177,6 @@ static int btrfs_truncate(struct inode *inode)
ret = btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
}
out:
btrfs_free_block_rsv(root, rsv);
......
......@@ -2604,25 +2604,28 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
trans->block_rsv = rc->block_rsv;
rc->reserved_bytes += num_bytes;
/*
* We are under a transaction here so we can only do limited flushing.
* If we get an enospc just kick back -EAGAIN so we know to drop the
* transaction and try to refill when we can flush all the things.
*/
ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
BTRFS_RESERVE_FLUSH_ALL);
BTRFS_RESERVE_FLUSH_LIMIT);
if (ret) {
if (ret == -EAGAIN) {
tmp = rc->extent_root->nodesize *
RELOCATION_RESERVED_NODES;
tmp = rc->extent_root->nodesize * RELOCATION_RESERVED_NODES;
while (tmp <= rc->reserved_bytes)
tmp <<= 1;
/*
* only one thread can access block_rsv at this point,
* so we don't need hold lock to protect block_rsv.
* we expand more reservation size here to allow enough
* space for relocation and we will return earlier in
* space for relocation and we will return eailer in
* enospc case.
*/
rc->block_rsv->size = tmp + rc->extent_root->nodesize *
RELOCATION_RESERVED_NODES;
}
return ret;
return -EAGAIN;
}
return 0;
......@@ -3871,6 +3874,7 @@ static noinline_for_stack
int prepare_to_relocate(struct reloc_control *rc)
{
struct btrfs_trans_handle *trans;
int ret;
rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root,
BTRFS_BLOCK_RSV_TEMP);
......@@ -3885,6 +3889,11 @@ int prepare_to_relocate(struct reloc_control *rc)
rc->reserved_bytes = 0;
rc->block_rsv->size = rc->extent_root->nodesize *
RELOCATION_RESERVED_NODES;
ret = btrfs_block_rsv_refill(rc->extent_root,
rc->block_rsv, rc->block_rsv->size,
BTRFS_RESERVE_FLUSH_ALL);
if (ret)
return ret;
rc->create_reloc_tree = 1;
set_reloc_control(rc);
......@@ -4643,7 +4652,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
if (rc->merge_reloc_tree) {
ret = btrfs_block_rsv_migrate(&pending->block_rsv,
rc->block_rsv,
rc->nodes_relocated);
rc->nodes_relocated, 1);
if (ret)
return ret;
}
......
......@@ -440,6 +440,46 @@ TRACE_EVENT(btrfs_sync_fs,
TP_printk("wait = %d", __entry->wait)
);
TRACE_EVENT(btrfs_add_block_group,
TP_PROTO(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group, int create),
TP_ARGS(fs_info, block_group, create),
TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, offset )
__field( u64, size )
__field( u64, flags )
__field( u64, bytes_used )
__field( u64, bytes_super )
__field( int, create )
),
TP_fast_assign(
memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
__entry->offset = block_group->key.objectid;
__entry->size = block_group->key.offset;
__entry->flags = block_group->flags;
__entry->bytes_used =
btrfs_block_group_used(&block_group->item);
__entry->bytes_super = block_group->bytes_super;
__entry->create = create;
),
TP_printk("%pU: block_group offset = %llu, size = %llu, "
"flags = %llu(%s), bytes_used = %llu, bytes_super = %llu, "
"create = %d", __entry->fsid,
(unsigned long long)__entry->offset,
(unsigned long long)__entry->size,
(unsigned long long)__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
(unsigned long long)__entry->bytes_used,
(unsigned long long)__entry->bytes_super, __entry->create)
);
#define show_ref_action(action) \
__print_symbolic(action, \
{ BTRFS_ADD_DELAYED_REF, "ADD_DELAYED_REF" }, \
......@@ -744,6 +784,88 @@ TRACE_EVENT(btrfs_space_reservation,
__entry->bytes)
);
#define show_flush_action(action) \
__print_symbolic(action, \
{ BTRFS_RESERVE_NO_FLUSH, "BTRFS_RESERVE_NO_FLUSH"}, \
{ BTRFS_RESERVE_FLUSH_LIMIT, "BTRFS_RESERVE_FLUSH_LIMIT"}, \
{ BTRFS_RESERVE_FLUSH_ALL, "BTRFS_RESERVE_FLUSH_ALL"})
TRACE_EVENT(btrfs_trigger_flush,
TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 bytes,
int flush, char *reason),
TP_ARGS(fs_info, flags, bytes, flush, reason),
TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, flags )
__field( u64, bytes )
__field( int, flush )
__string( reason, reason )
),
TP_fast_assign(
memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
__entry->flags = flags;
__entry->bytes = bytes;
__entry->flush = flush;
__assign_str(reason, reason)
),
TP_printk("%pU: %s: flush = %d(%s), flags = %llu(%s), bytes = %llu",
__entry->fsid, __get_str(reason), __entry->flush,
show_flush_action(__entry->flush),
(unsigned long long)__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
(unsigned long long)__entry->bytes)
);
#define show_flush_state(state) \
__print_symbolic(state, \
{ FLUSH_DELAYED_ITEMS_NR, "FLUSH_DELAYED_ITEMS_NR"}, \
{ FLUSH_DELAYED_ITEMS, "FLUSH_DELAYED_ITEMS"}, \
{ FLUSH_DELALLOC, "FLUSH_DELALLOC"}, \
{ FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT"}, \
{ ALLOC_CHUNK, "ALLOC_CHUNK"}, \
{ COMMIT_TRANS, "COMMIT_TRANS"})
TRACE_EVENT(btrfs_flush_space,
TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes,
u64 orig_bytes, int state, int ret),
TP_ARGS(fs_info, flags, num_bytes, orig_bytes, state, ret),
TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, flags )
__field( u64, num_bytes )
__field( u64, orig_bytes )
__field( int, state )
__field( int, ret )
),
TP_fast_assign(
memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
__entry->flags = flags;
__entry->num_bytes = num_bytes;
__entry->orig_bytes = orig_bytes;
__entry->state = state;
__entry->ret = ret;
),
TP_printk("%pU: state = %d(%s), flags = %llu(%s), num_bytes = %llu, "
"orig_bytes = %llu, ret = %d", __entry->fsid, __entry->state,
show_flush_state(__entry->state),
(unsigned long long)__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
(unsigned long long)__entry->num_bytes,
(unsigned long long)__entry->orig_bytes, __entry->ret)
);
DECLARE_EVENT_CLASS(btrfs__reserved_extent,
TP_PROTO(struct btrfs_root *root, u64 start, u64 len),
......@@ -751,18 +873,21 @@ DECLARE_EVENT_CLASS(btrfs__reserved_extent,
TP_ARGS(root, start, len),
TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, root_objectid )
__field( u64, start )
__field( u64, len )
),
TP_fast_assign(
memcpy(__entry->fsid, root->fs_info->fsid, BTRFS_UUID_SIZE);
__entry->root_objectid = root->root_key.objectid;
__entry->start = start;
__entry->len = len;
),
TP_printk("root = %llu(%s), start = %llu, len = %llu",
TP_printk("%pU: root = %llu(%s), start = %llu, len = %llu",
__entry->fsid,
show_root_type(__entry->root_objectid),
(unsigned long long)__entry->start,
(unsigned long long)__entry->len)
......@@ -819,6 +944,7 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
TP_ARGS(root, block_group, start, len),
TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__field( u64, root_objectid )
__field( u64, bg_objectid )
__field( u64, flags )
......@@ -827,6 +953,7 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
),
TP_fast_assign(
memcpy(__entry->fsid, root->fs_info->fsid, BTRFS_UUID_SIZE);
__entry->root_objectid = root->root_key.objectid;
__entry->bg_objectid = block_group->key.objectid;
__entry->flags = block_group->flags;
......@@ -834,8 +961,8 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
__entry->len = len;
),
TP_printk("root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
"start = %Lu, len = %Lu",
TP_printk("%pU: root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
"start = %Lu, len = %Lu", __entry->fsid,
show_root_type(__entry->root_objectid), __entry->bg_objectid,
__entry->flags, __print_flags((unsigned long)__entry->flags,
"|", BTRFS_GROUP_FLAGS),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment