Commit 3615db41 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "We've had a busy two weeks of bug fixing.  The biggest patches in here
  are some long standing early-enospc problems (Josef) and a very old
  race where compression and mmap combine forces to lose writes (me).
  I'm fairly sure the mmap bug goes all the way back to the introduction
  of the compression code, which is proof that fsx doesn't trigger every
  possible mmap corner after all.

  I'm sure you'll notice one of these is from this morning, it's a small
  and isolated use-after-free fix in our scrub error reporting.  I
  double checked it here."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: don't drop path when printing out tree errors in scrub
  Btrfs: fix wrong return value of btrfs_lookup_csum()
  Btrfs: fix wrong reservation of csums
  Btrfs: fix double free in the btrfs_qgroup_account_ref()
  Btrfs: limit the global reserve to 512mb
  Btrfs: hold the ordered operations mutex when waiting on ordered extents
  Btrfs: fix space accounting for unlink and rename
  Btrfs: fix space leak when we fail to reserve metadata space
  Btrfs: fix EIO from btrfs send in is_extent_unchanged for punched holes
  Btrfs: fix race between mmap writes and compression
  Btrfs: fix memory leak in btrfs_create_tree()
  Btrfs: fix locking on ROOT_REPLACE operations in tree mod log
  Btrfs: fix missing qgroup reservation before fallocating
  Btrfs: handle a bogus chunk tree nicely
  Btrfs: update to use fs_state bit
parents ed176886 d8fe29e9
......@@ -651,6 +651,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
if (tree_mod_dont_log(fs_info, NULL))
return 0;
__tree_mod_log_free_eb(fs_info, old_root);
ret = tree_mod_alloc(fs_info, flags, &tm);
if (ret < 0)
goto out;
......@@ -736,7 +738,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
static noinline void
tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
struct extent_buffer *src, unsigned long dst_offset,
unsigned long src_offset, int nr_items)
unsigned long src_offset, int nr_items, int log_removal)
{
int ret;
int i;
......@@ -750,10 +752,12 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
}
for (i = 0; i < nr_items; i++) {
if (log_removal) {
ret = tree_mod_log_insert_key_locked(fs_info, src,
i + src_offset,
MOD_LOG_KEY_REMOVE);
BUG_ON(ret < 0);
}
ret = tree_mod_log_insert_key_locked(fs_info, dst,
i + dst_offset,
MOD_LOG_KEY_ADD);
......@@ -927,7 +931,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
ret = btrfs_dec_ref(trans, root, buf, 1, 1);
BUG_ON(ret); /* -ENOMEM */
}
tree_mod_log_free_eb(root->fs_info, buf);
clean_tree_block(trans, root, buf);
*last_ref = 1;
}
......@@ -1046,6 +1049,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
tree_mod_log_free_eb(root->fs_info, buf);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
}
......@@ -1750,7 +1754,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
goto enospc;
}
tree_mod_log_free_eb(root->fs_info, root->node);
tree_mod_log_set_root_pointer(root, child);
rcu_assign_pointer(root->node, child);
......@@ -2995,7 +2998,7 @@ static int push_node_left(struct btrfs_trans_handle *trans,
push_items = min(src_nritems - 8, push_items);
tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
push_items);
push_items, 1);
copy_extent_buffer(dst, src,
btrfs_node_key_ptr_offset(dst_nritems),
btrfs_node_key_ptr_offset(0),
......@@ -3066,7 +3069,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
sizeof(struct btrfs_key_ptr));
tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
src_nritems - push_items, push_items);
src_nritems - push_items, push_items, 1);
copy_extent_buffer(dst, src,
btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(src_nritems - push_items),
......@@ -3218,12 +3221,18 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
int mid;
int ret;
u32 c_nritems;
int tree_mod_log_removal = 1;
c = path->nodes[level];
WARN_ON(btrfs_header_generation(c) != trans->transid);
if (c == root->node) {
/* trying to split the root, lets make a new one */
ret = insert_new_root(trans, root, path, level + 1);
/*
* removal of root nodes has been logged by
* tree_mod_log_set_root_pointer due to locking
*/
tree_mod_log_removal = 0;
if (ret)
return ret;
} else {
......@@ -3261,7 +3270,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
(unsigned long)btrfs_header_chunk_tree_uuid(split),
BTRFS_UUID_SIZE);
tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid,
tree_mod_log_removal);
copy_extent_buffer(split, c,
btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(mid),
......
......@@ -1291,6 +1291,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
0, objectid, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
leaf = NULL;
goto fail;
}
......@@ -1334,11 +1335,16 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(leaf);
return root;
fail:
if (ret)
return ERR_PTR(ret);
if (leaf) {
btrfs_tree_unlock(leaf);
free_extent_buffer(leaf);
}
kfree(root);
return root;
return ERR_PTR(ret);
}
static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
......@@ -3253,7 +3259,7 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
if (btrfs_root_refs(&root->root_item) == 0)
synchronize_srcu(&fs_info->subvol_srcu);
if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
btrfs_free_log(NULL, root);
btrfs_free_log_root_tree(NULL, fs_info);
}
......
......@@ -257,7 +257,8 @@ static int exclude_super_stripes(struct btrfs_root *root,
cache->bytes_super += stripe_len;
ret = add_excluded_extent(root, cache->key.objectid,
stripe_len);
BUG_ON(ret); /* -ENOMEM */
if (ret)
return ret;
}
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
......@@ -265,13 +266,17 @@ static int exclude_super_stripes(struct btrfs_root *root,
ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
cache->key.objectid, bytenr,
0, &logical, &nr, &stripe_len);
BUG_ON(ret); /* -ENOMEM */
if (ret)
return ret;
while (nr--) {
cache->bytes_super += stripe_len;
ret = add_excluded_extent(root, logical[nr],
stripe_len);
BUG_ON(ret); /* -ENOMEM */
if (ret) {
kfree(logical);
return ret;
}
}
kfree(logical);
......@@ -4438,7 +4443,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
spin_lock(&sinfo->lock);
spin_lock(&block_rsv->lock);
block_rsv->size = num_bytes;
block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
sinfo->bytes_reserved + sinfo->bytes_readonly +
......@@ -4793,14 +4798,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
* If the inodes csum_bytes is the same as the original
* csum_bytes then we know we haven't raced with any free()ers
* so we can just reduce our inodes csum bytes and carry on.
* Otherwise we have to do the normal free thing to account for
* the case that the free side didn't free up its reserve
* because of this outstanding reservation.
*/
if (BTRFS_I(inode)->csum_bytes == csum_bytes)
if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
calc_csum_metadata_size(inode, num_bytes, 0);
} else {
u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
u64 bytes;
/*
* This is tricky, but first we need to figure out how much we
* free'd from any free-ers that occured during this
* reservation, so we reset ->csum_bytes to the csum_bytes
* before we dropped our lock, and then call the free for the
* number of bytes that were freed while we were trying our
* reservation.
*/
bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
BTRFS_I(inode)->csum_bytes = csum_bytes;
to_free = calc_csum_metadata_size(inode, bytes, 0);
/*
* Now we need to see how much we would have freed had we not
* been making this reservation and our ->csum_bytes were not
* artificially inflated.
*/
BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
bytes = csum_bytes - orig_csum_bytes;
bytes = calc_csum_metadata_size(inode, bytes, 0);
/*
* Now reset ->csum_bytes to what it should be. If bytes is
* more than to_free then we would have free'd more space had we
* not had an artificially high ->csum_bytes, so we need to free
* the remainder. If bytes is the same or less then we don't
* need to do anything, the other free-ers did the correct
* thing.
*/
BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
if (bytes > to_free)
to_free = bytes - to_free;
else
to_free = calc_csum_metadata_size(inode, num_bytes, 0);
to_free = 0;
}
spin_unlock(&BTRFS_I(inode)->lock);
if (dropped)
to_free += btrfs_calc_trans_metadata_size(root, dropped);
......@@ -7947,7 +7987,17 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* info has super bytes accounted for, otherwise we'll think
* we have more space than we actually do.
*/
exclude_super_stripes(root, cache);
ret = exclude_super_stripes(root, cache);
if (ret) {
/*
* We may have excluded something, so call this just in
* case.
*/
free_excluded_extents(root, cache);
kfree(cache->free_space_ctl);
kfree(cache);
goto error;
}
/*
* check for two cases, either we are full, and therefore
......@@ -8089,7 +8139,17 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
exclude_super_stripes(root, cache);
ret = exclude_super_stripes(root, cache);
if (ret) {
/*
* We may have excluded something, so call this just in
* case.
*/
free_excluded_extents(root, cache);
kfree(cache->free_space_ctl);
kfree(cache);
return ret;
}
add_new_free_space(cache, root->fs_info, chunk_offset,
chunk_offset + size);
......
......@@ -1257,6 +1257,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
GFP_NOFS);
}
int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
{
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = end >> PAGE_CACHE_SHIFT;
struct page *page;
while (index <= end_index) {
page = find_get_page(inode->i_mapping, index);
BUG_ON(!page); /* Pages should be in the extent_io_tree */
clear_page_dirty_for_io(page);
page_cache_release(page);
index++;
}
return 0;
}
int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
{
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = end >> PAGE_CACHE_SHIFT;
struct page *page;
while (index <= end_index) {
page = find_get_page(inode->i_mapping, index);
BUG_ON(!page); /* Pages should be in the extent_io_tree */
account_page_redirty(page);
__set_page_dirty_nobuffers(page);
page_cache_release(page);
index++;
}
return 0;
}
/*
* helper function to set both pages and extents in the tree writeback
*/
......
......@@ -325,6 +325,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
unsigned long *map_len);
int extent_range_uptodate(struct extent_io_tree *tree,
u64 start, u64 end);
int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
int extent_clear_unlock_delalloc(struct inode *inode,
struct extent_io_tree *tree,
u64 start, u64 end, struct page *locked_page,
......
......@@ -118,9 +118,11 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
csums_in_item /= csum_size;
if (csum_offset >= csums_in_item) {
if (csum_offset == csums_in_item) {
ret = -EFBIG;
goto fail;
} else if (csum_offset > csums_in_item) {
goto fail;
}
}
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
......@@ -728,7 +730,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
return -ENOMEM;
sector_sum = sums->sums;
trans->adding_csums = 1;
again:
next_offset = (u64)-1;
found_next = 0;
......@@ -899,7 +900,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
goto again;
}
out:
trans->adding_csums = 0;
btrfs_free_path(path);
return ret;
......
......@@ -2142,6 +2142,7 @@ static long btrfs_fallocate(struct file *file, int mode,
{
struct inode *inode = file_inode(file);
struct extent_state *cached_state = NULL;
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 cur_offset;
u64 last_byte;
u64 alloc_start;
......@@ -2169,6 +2170,11 @@ static long btrfs_fallocate(struct file *file, int mode,
ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
if (ret)
return ret;
if (root->fs_info->quota_enabled) {
ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
if (ret)
goto out_reserve_fail;
}
/*
* wait for ordered IO before we have any locks. We'll loop again
......@@ -2272,6 +2278,9 @@ static long btrfs_fallocate(struct file *file, int mode,
&cached_state, GFP_NOFS);
out:
mutex_unlock(&inode->i_mutex);
if (root->fs_info->quota_enabled)
btrfs_qgroup_free(root, alloc_end - alloc_start);
out_reserve_fail:
/* Let go of our reservation. */
btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
return ret;
......
......@@ -353,6 +353,7 @@ static noinline int compress_file_range(struct inode *inode,
int i;
int will_compress;
int compress_type = root->fs_info->compress_type;
int redirty = 0;
/* if this is a small write inside eof, kick off a defrag */
if ((end - start + 1) < 16 * 1024 &&
......@@ -415,6 +416,17 @@ static noinline int compress_file_range(struct inode *inode,
if (BTRFS_I(inode)->force_compress)
compress_type = BTRFS_I(inode)->force_compress;
/*
* we need to call clear_page_dirty_for_io on each
* page in the range. Otherwise applications with the file
* mmap'd can wander in and change the page contents while
* we are compressing them.
*
* If the compression fails for any reason, we set the pages
* dirty again later on.
*/
extent_range_clear_dirty_for_io(inode, start, end);
redirty = 1;
ret = btrfs_compress_pages(compress_type,
inode->i_mapping, start,
total_compressed, pages,
......@@ -554,6 +566,8 @@ static noinline int compress_file_range(struct inode *inode,
__set_page_dirty_nobuffers(locked_page);
/* unlocked later on in the async handlers */
}
if (redirty)
extent_range_redirty_for_io(inode, start, end);
add_async_extent(async_cow, start, end - start + 1,
0, NULL, 0, BTRFS_COMPRESS_NONE);
*num_added += 1;
......@@ -1743,8 +1757,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum *sum;
list_for_each_entry(sum, list, list) {
trans->adding_csums = 1;
btrfs_csum_file_blocks(trans,
BTRFS_I(inode)->root->fs_info->csum_root, sum);
trans->adding_csums = 0;
}
return 0;
}
......@@ -3679,11 +3695,9 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
* 1 for the dir item
* 1 for the dir index
* 1 for the inode ref
* 1 for the inode ref in the tree log
* 2 for the dir entries in the log
* 1 for the inode
*/
trans = btrfs_start_transaction(root, 8);
trans = btrfs_start_transaction(root, 5);
if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
return trans;
......@@ -8127,7 +8141,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
* inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
* should cover the worst case number of items we'll modify.
*/
trans = btrfs_start_transaction(root, 20);
trans = btrfs_start_transaction(root, 11);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_notrans;
......
......@@ -557,6 +557,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
INIT_LIST_HEAD(&splice);
INIT_LIST_HEAD(&works);
mutex_lock(&root->fs_info->ordered_operations_mutex);
spin_lock(&root->fs_info->ordered_extent_lock);
list_splice_init(&root->fs_info->ordered_extents, &splice);
while (!list_empty(&splice)) {
......@@ -600,6 +601,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
cond_resched();
}
mutex_unlock(&root->fs_info->ordered_operations_mutex);
}
/*
......
......@@ -1153,7 +1153,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,
sgn > 0 ? node->seq - 1 : node->seq, &roots);
if (ret < 0)
goto out;
return ret;
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
......@@ -1275,7 +1275,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
ret = 0;
unlock:
spin_unlock(&fs_info->qgroup_lock);
out:
ulist_free(roots);
ulist_free(tmp);
......
......@@ -542,7 +542,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
eb = path->nodes[0];
ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
item_size = btrfs_item_size_nr(eb, path->slots[0]);
btrfs_release_path(path);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
do {
......@@ -558,7 +557,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
ret < 0 ? -1 : ref_level,
ret < 0 ? -1 : ref_root);
} while (ret != 1);
btrfs_release_path(path);
} else {
btrfs_release_path(path);
swarn.path = path;
swarn.dev = dev;
iterate_extent_inodes(fs_info, found_key.objectid,
......
......@@ -3945,13 +3945,11 @@ static int is_extent_unchanged(struct send_ctx *sctx,
found_key.type != key.type) {
key.offset += right_len;
break;
} else {
}
if (found_key.offset != key.offset + right_len) {
/* Should really not happen */
ret = -EIO;
ret = 0;
goto out;
}
}
key = found_key;
}
......
......@@ -4935,7 +4935,18 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
em = lookup_extent_mapping(em_tree, chunk_start, 1);
read_unlock(&em_tree->lock);
BUG_ON(!em || em->start != chunk_start);
if (!em) {
printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n",
chunk_start);
return -EIO;
}
if (em->start != chunk_start) {
printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n",
em->start, chunk_start);
free_extent_map(em);
return -EIO;
}
map = (struct map_lookup *)em->bdev;
length = em->len;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment