Commit e3a0dd98 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs update from Chris Mason:
 "These are the usual mixture of bugs, cleanups and performance fixes.
  Miao has some really nice tuning of our crc code as well as our
  transaction commits.

  Josef is peeling off more and more problems related to early enospc,
  and has a number of important bug fixes in here too"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (81 commits)
  Btrfs: wait ordered range before doing direct io
  Btrfs: only do the tree_mod_log_free_eb if this is our last ref
  Btrfs: hold the tree mod lock in __tree_mod_log_rewind
  Btrfs: make backref walking code handle skinny metadata
  Btrfs: fix crash regarding to ulist_add_merge
  Btrfs: fix several potential problems in copy_nocow_pages_for_inode
  Btrfs: cleanup the code of copy_nocow_pages_for_inode()
  Btrfs: fix oops when recovering the file data by scrub function
  Btrfs: make the chunk allocator completely tree lockless
  Btrfs: cleanup orphaned root orphan item
  Btrfs: fix wrong mirror number tuning
  Btrfs: cleanup redundant code in btrfs_submit_direct()
  Btrfs: remove btrfs_sector_sum structure
  Btrfs: check if we can nocow if we don't have data space
  Btrfs: stop using try_to_writeback_inodes_sb_nr to flush delalloc
  Btrfs: use a percpu to keep track of possibly pinned bytes
  Btrfs: check for actual acls rather than just xattrs when caching no acl
  Btrfs: move btrfs_truncate_page to btrfs_cont_expand instead of btrfs_truncate
  Btrfs: optimize reada_for_balance
  Btrfs: optimize read_block_for_search
  ...
parents da89bd21 0e267c44
......@@ -255,13 +255,11 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
* to a logical address
*/
static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
int search_commit_root,
u64 time_seq,
struct __prelim_ref *ref,
struct ulist *parents,
const u64 *extent_item_pos)
struct btrfs_path *path, u64 time_seq,
struct __prelim_ref *ref,
struct ulist *parents,
const u64 *extent_item_pos)
{
struct btrfs_path *path;
struct btrfs_root *root;
struct btrfs_key root_key;
struct extent_buffer *eb;
......@@ -269,11 +267,6 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
int root_level;
int level = ref->level;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->search_commit_root = !!search_commit_root;
root_key.objectid = ref->root_id;
root_key.type = BTRFS_ROOT_ITEM_KEY;
root_key.offset = (u64)-1;
......@@ -314,7 +307,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
time_seq, ref->wanted_disk_byte,
extent_item_pos);
out:
btrfs_free_path(path);
path->lowest_level = 0;
btrfs_release_path(path);
return ret;
}
......@@ -322,7 +316,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
* resolve all indirect backrefs from the list
*/
static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
int search_commit_root, u64 time_seq,
struct btrfs_path *path, u64 time_seq,
struct list_head *head,
const u64 *extent_item_pos)
{
......@@ -349,9 +343,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
continue;
if (ref->count == 0)
continue;
err = __resolve_indirect_ref(fs_info, search_commit_root,
time_seq, ref, parents,
extent_item_pos);
err = __resolve_indirect_ref(fs_info, path, time_seq, ref,
parents, extent_item_pos);
if (err == -ENOMEM)
goto out;
if (err)
......@@ -604,6 +597,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
int slot;
struct extent_buffer *leaf;
struct btrfs_key key;
struct btrfs_key found_key;
unsigned long ptr;
unsigned long end;
struct btrfs_extent_item *ei;
......@@ -621,17 +615,21 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
flags = btrfs_extent_flags(leaf, ei);
btrfs_item_key_to_cpu(leaf, &found_key, slot);
ptr = (unsigned long)(ei + 1);
end = (unsigned long)ei + item_size;
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
struct btrfs_tree_block_info *info;
info = (struct btrfs_tree_block_info *)ptr;
*info_level = btrfs_tree_block_level(leaf, info);
ptr += sizeof(struct btrfs_tree_block_info);
BUG_ON(ptr > end);
} else if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
*info_level = found_key.offset;
} else {
BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
}
......@@ -795,7 +793,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head;
int info_level = 0;
int ret;
int search_commit_root = (trans == BTRFS_BACKREF_SEARCH_COMMIT_ROOT);
struct list_head prefs_delayed;
struct list_head prefs;
struct __prelim_ref *ref;
......@@ -804,13 +801,17 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
INIT_LIST_HEAD(&prefs_delayed);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = (u64)-1;
if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
key.type = BTRFS_METADATA_ITEM_KEY;
else
key.type = BTRFS_EXTENT_ITEM_KEY;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->search_commit_root = !!search_commit_root;
if (!trans)
path->search_commit_root = 1;
/*
* grab both a lock on the path and a lock on the delayed ref head.
......@@ -825,7 +826,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
goto out;
BUG_ON(ret == 0);
if (trans != BTRFS_BACKREF_SEARCH_COMMIT_ROOT) {
if (trans) {
/*
* look if there are updates for this ref queued and lock the
* head
......@@ -869,7 +870,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
slot = path->slots[0];
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid == bytenr &&
key.type == BTRFS_EXTENT_ITEM_KEY) {
(key.type == BTRFS_EXTENT_ITEM_KEY ||
key.type == BTRFS_METADATA_ITEM_KEY)) {
ret = __add_inline_refs(fs_info, path, bytenr,
&info_level, &prefs);
if (ret)
......@@ -890,8 +892,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
__merge_refs(&prefs, 1);
ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq,
&prefs, extent_item_pos);
ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs,
extent_item_pos);
if (ret)
goto out;
......@@ -1283,12 +1285,16 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
{
int ret;
u64 flags;
u64 size = 0;
u32 item_size;
struct extent_buffer *eb;
struct btrfs_extent_item *ei;
struct btrfs_key key;
key.type = BTRFS_EXTENT_ITEM_KEY;
if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
key.type = BTRFS_METADATA_ITEM_KEY;
else
key.type = BTRFS_EXTENT_ITEM_KEY;
key.objectid = logical;
key.offset = (u64)-1;
......@@ -1301,9 +1307,15 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
return ret;
btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]);
if (found_key->type != BTRFS_EXTENT_ITEM_KEY ||
if (found_key->type == BTRFS_METADATA_ITEM_KEY)
size = fs_info->extent_root->leafsize;
else if (found_key->type == BTRFS_EXTENT_ITEM_KEY)
size = found_key->offset;
if ((found_key->type != BTRFS_EXTENT_ITEM_KEY &&
found_key->type != BTRFS_METADATA_ITEM_KEY) ||
found_key->objectid > logical ||
found_key->objectid + found_key->offset <= logical) {
found_key->objectid + size <= logical) {
pr_debug("logical %llu is not within any extent\n",
(unsigned long long)logical);
return -ENOENT;
......@@ -1459,7 +1471,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
iterate_extent_inodes_t *iterate, void *ctx)
{
int ret;
struct btrfs_trans_handle *trans;
struct btrfs_trans_handle *trans = NULL;
struct ulist *refs = NULL;
struct ulist *roots = NULL;
struct ulist_node *ref_node = NULL;
......@@ -1471,9 +1483,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
pr_debug("resolving all inodes for extent %llu\n",
extent_item_objectid);
if (search_commit_root) {
trans = BTRFS_BACKREF_SEARCH_COMMIT_ROOT;
} else {
if (!search_commit_root) {
trans = btrfs_join_transaction(fs_info->extent_root);
if (IS_ERR(trans))
return PTR_ERR(trans);
......
......@@ -23,8 +23,6 @@
#include "ulist.h"
#include "extent_io.h"
#define BTRFS_BACKREF_SEARCH_COMMIT_ROOT ((struct btrfs_trans_handle *)0)
struct inode_fs_paths {
struct btrfs_path *btrfs_path;
struct btrfs_root *fs_root;
......
......@@ -1089,7 +1089,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
tree_mod_log_free_eb(root->fs_info, buf);
if (last_ref)
tree_mod_log_free_eb(root->fs_info, buf);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
}
......@@ -1161,8 +1162,8 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
* time_seq).
*/
static void
__tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
struct tree_mod_elem *first_tm)
__tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
u64 time_seq, struct tree_mod_elem *first_tm)
{
u32 n;
struct rb_node *next;
......@@ -1172,6 +1173,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
unsigned long p_size = sizeof(struct btrfs_key_ptr);
n = btrfs_header_nritems(eb);
tree_mod_log_read_lock(fs_info);
while (tm && tm->seq >= time_seq) {
/*
* all the operations are recorded with the operator used for
......@@ -1226,6 +1228,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
if (tm->index != first_tm->index)
break;
}
tree_mod_log_read_unlock(fs_info);
btrfs_set_header_nritems(eb, n);
}
......@@ -1274,7 +1277,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
extent_buffer_get(eb_rewin);
btrfs_tree_read_lock(eb_rewin);
__tree_mod_log_rewind(eb_rewin, time_seq, tm);
__tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) >
BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root));
......@@ -1350,7 +1353,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_set_header_generation(eb, old_generation);
}
if (tm)
__tree_mod_log_rewind(eb, time_seq, tm);
__tree_mod_log_rewind(root->fs_info, eb, time_seq, tm);
else
WARN_ON(btrfs_header_level(eb) != 0);
WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root));
......@@ -2178,12 +2181,8 @@ static void reada_for_search(struct btrfs_root *root,
}
}
/*
* returns -EAGAIN if it had to drop the path, or zero if everything was in
* cache
*/
static noinline int reada_for_balance(struct btrfs_root *root,
struct btrfs_path *path, int level)
static noinline void reada_for_balance(struct btrfs_root *root,
struct btrfs_path *path, int level)
{
int slot;
int nritems;
......@@ -2192,12 +2191,11 @@ static noinline int reada_for_balance(struct btrfs_root *root,
u64 gen;
u64 block1 = 0;
u64 block2 = 0;
int ret = 0;
int blocksize;
parent = path->nodes[level + 1];
if (!parent)
return 0;
return;
nritems = btrfs_header_nritems(parent);
slot = path->slots[level + 1];
......@@ -2224,28 +2222,11 @@ static noinline int reada_for_balance(struct btrfs_root *root,
block2 = 0;
free_extent_buffer(eb);
}
if (block1 || block2) {
ret = -EAGAIN;
/* release the whole path */
btrfs_release_path(path);
/* read the blocks */
if (block1)
readahead_tree_block(root, block1, blocksize, 0);
if (block2)
readahead_tree_block(root, block2, blocksize, 0);
if (block1) {
eb = read_tree_block(root, block1, blocksize, 0);
free_extent_buffer(eb);
}
if (block2) {
eb = read_tree_block(root, block2, blocksize, 0);
free_extent_buffer(eb);
}
}
return ret;
if (block1)
readahead_tree_block(root, block1, blocksize, 0);
if (block2)
readahead_tree_block(root, block2, blocksize, 0);
}
......@@ -2359,35 +2340,28 @@ read_block_for_search(struct btrfs_trans_handle *trans,
tmp = btrfs_find_tree_block(root, blocknr, blocksize);
if (tmp) {
/* first we do an atomic uptodate check */
if (btrfs_buffer_uptodate(tmp, 0, 1) > 0) {
if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
/*
* we found an up to date block without
* sleeping, return
* right away
*/
*eb_ret = tmp;
return 0;
}
/* the pages were up to date, but we failed
* the generation number check. Do a full
* read for the generation number that is correct.
* We must do this without dropping locks so
* we can trust our generation number
*/
free_extent_buffer(tmp);
btrfs_set_path_blocking(p);
if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
*eb_ret = tmp;
return 0;
}
/* now we're allowed to do a blocking uptodate check */
tmp = read_tree_block(root, blocknr, blocksize, gen);
if (tmp && btrfs_buffer_uptodate(tmp, gen, 0) > 0) {
*eb_ret = tmp;
return 0;
}
free_extent_buffer(tmp);
btrfs_release_path(p);
return -EIO;
/* the pages were up to date, but we failed
* the generation number check. Do a full
* read for the generation number that is correct.
* We must do this without dropping locks so
* we can trust our generation number
*/
btrfs_set_path_blocking(p);
/* now we're allowed to do a blocking uptodate check */
ret = btrfs_read_buffer(tmp, gen);
if (!ret) {
*eb_ret = tmp;
return 0;
}
free_extent_buffer(tmp);
btrfs_release_path(p);
return -EIO;
}
/*
......@@ -2448,11 +2422,8 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
goto again;
}
sret = reada_for_balance(root, p, level);
if (sret)
goto again;
btrfs_set_path_blocking(p);
reada_for_balance(root, p, level);
sret = split_node(trans, root, p, level);
btrfs_clear_path_blocking(p, NULL, 0);
......@@ -2472,11 +2443,8 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
goto again;
}
sret = reada_for_balance(root, p, level);
if (sret)
goto again;
btrfs_set_path_blocking(p);
reada_for_balance(root, p, level);
sret = balance_level(trans, root, p, level);
btrfs_clear_path_blocking(p, NULL, 0);
......@@ -3143,7 +3111,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
*/
static noinline int insert_new_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int level, int log_removal)
struct btrfs_path *path, int level)
{
u64 lower_gen;
struct extent_buffer *lower;
......@@ -3194,7 +3162,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(c);
old = root->node;
tree_mod_log_set_root_pointer(root, c, log_removal);
tree_mod_log_set_root_pointer(root, c, 0);
rcu_assign_pointer(root->node, c);
/* the super has an extra ref to root->node */
......@@ -3278,14 +3246,14 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
/*
* trying to split the root, lets make a new one
*
* tree mod log: We pass 0 as log_removal parameter to
* tree mod log: We don't log_removal old root in
* insert_new_root, because that root buffer will be kept as a
* normal node. We are going to log removal of half of the
* elements below with tree_mod_log_eb_copy. We're holding a
* tree lock on the buffer, which is why we cannot race with
* other tree_mod_log users.
*/
ret = insert_new_root(trans, root, path, level + 1, 0);
ret = insert_new_root(trans, root, path, level + 1);
if (ret)
return ret;
} else {
......@@ -3986,7 +3954,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
return -EOVERFLOW;
/* first try to make some room by pushing left and right */
if (data_size) {
if (data_size && path->nodes[1]) {
wret = push_leaf_right(trans, root, path, data_size,
data_size, 0, 0);
if (wret < 0)
......@@ -4005,7 +3973,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
}
if (!path->nodes[1]) {
ret = insert_new_root(trans, root, path, 1, 1);
ret = insert_new_root(trans, root, path, 1);
if (ret)
return ret;
}
......
......@@ -961,8 +961,8 @@ struct btrfs_dev_replace_item {
#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
#define BTRFS_BLOCK_GROUP_RAID5 (1 << 7)
#define BTRFS_BLOCK_GROUP_RAID6 (1 << 8)
#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7)
#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8)
#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE
enum btrfs_raid_types {
......@@ -1101,6 +1101,18 @@ struct btrfs_space_info {
u64 disk_total; /* total bytes on disk, takes mirrors into
account */
/*
* bytes_pinned is kept in line with what is actually pinned, as in
* we've called update_block_group and dropped the bytes_used counter
* and increased the bytes_pinned counter. However this means that
* bytes_pinned does not reflect the bytes that will be pinned once the
* delayed refs are flushed, so this counter is inc'ed everytime we call
* btrfs_free_extent so it is a realtime count of what will be freed
* once the transaction is committed. It will be zero'ed everytime the
* transaction commits.
*/
struct percpu_counter total_bytes_pinned;
/*
* we bump reservation progress every time we decrement
* bytes_reserved. This way people waiting for reservations
......@@ -1437,25 +1449,22 @@ struct btrfs_fs_info {
atomic_t open_ioctl_trans;
/*
* this is used by the balancing code to wait for all the pending
* ordered extents
* this is used to protect the following list -- ordered_roots.
*/
spinlock_t ordered_extent_lock;
spinlock_t ordered_root_lock;
/*
* all of the data=ordered extents pending writeback
* all fs/file tree roots in which there are data=ordered extents
* pending writeback are added into this list.
*
* these can span multiple transactions and basically include
* every dirty data page that isn't from nodatacow
*/
struct list_head ordered_extents;
struct list_head ordered_roots;
spinlock_t delalloc_lock;
/*
* all of the inodes that have delalloc bytes. It is possible for
* this list to be empty even when there is still dirty data=ordered
* extents waiting to finish IO.
*/
struct list_head delalloc_inodes;
spinlock_t delalloc_root_lock;
/* all fs/file tree roots that have delalloc inodes. */
struct list_head delalloc_roots;
/*
* there is a pool of worker threads for checksumming during writes
......@@ -1498,8 +1507,6 @@ struct btrfs_fs_info {
int do_barriers;
int closing;
int log_root_recovering;
int enospc_unlink;
int trans_no_join;
u64 total_pinned;
......@@ -1594,6 +1601,12 @@ struct btrfs_fs_info {
struct rb_root qgroup_tree;
spinlock_t qgroup_lock;
/*
* used to avoid frequently calling ulist_alloc()/ulist_free()
* when doing qgroup accounting, it must be protected by qgroup_lock.
*/
struct ulist *qgroup_ulist;
/* protect user change for quota operations */
struct mutex qgroup_ioctl_lock;
......@@ -1607,6 +1620,8 @@ struct btrfs_fs_info {
struct mutex qgroup_rescan_lock; /* protects the progress item */
struct btrfs_key qgroup_rescan_progress;
struct btrfs_workers qgroup_rescan_workers;
struct completion qgroup_rescan_completion;
struct btrfs_work qgroup_rescan_work;
/* filesystem state */
unsigned long fs_state;
......@@ -1739,6 +1754,31 @@ struct btrfs_root {
int force_cow;
spinlock_t root_item_lock;
atomic_t refs;
spinlock_t delalloc_lock;
/*
* all of the inodes that have delalloc bytes. It is possible for
* this list to be empty even when there is still dirty data=ordered
* extents waiting to finish IO.
*/
struct list_head delalloc_inodes;
struct list_head delalloc_root;
u64 nr_delalloc_inodes;
/*
* this is used by the balancing code to wait for all the pending
* ordered extents
*/
spinlock_t ordered_extent_lock;
/*
* all of the data=ordered extents pending writeback
* these can span multiple transactions and basically include
* every dirty data page that isn't from nodatacow
*/
struct list_head ordered_extents;
struct list_head ordered_root;
u64 nr_ordered_extents;
};
struct btrfs_ioctl_defrag_range_args {
......@@ -3028,6 +3068,8 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root,
num_items;
}
int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
......@@ -3039,6 +3081,8 @@ int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num, int reserved);
int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
u64 bytenr, u64 num_bytes);
int btrfs_exclude_logged_extents(struct btrfs_root *root,
struct extent_buffer *eb);
int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 offset, u64 bytenr);
......@@ -3155,6 +3199,9 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes);
int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *dest, u64 num_bytes,
int min_factor);
void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
......@@ -3311,6 +3358,18 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
smp_mb();
return fs_info->closing;
}
/*
* If we remount the fs to be R/O or umount the fs, the cleaner needn't do
* anything except sleeping. This function is used to check the status of
* the fs.
*/
static inline int btrfs_need_cleaner_sleep(struct btrfs_root *root)
{
return (root->fs_info->sb->s_flags & MS_RDONLY ||
btrfs_fs_closing(root->fs_info));
}
static inline void free_fs_info(struct btrfs_fs_info *fs_info)
{
kfree(fs_info->balance_ctl);
......@@ -3357,9 +3416,9 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
struct btrfs_root_item *item);
void btrfs_read_root_item(struct extent_buffer *eb, int slot,
struct btrfs_root_item *item);
int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
btrfs_root_item *item, struct btrfs_key *key);
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key,
struct btrfs_path *path, struct btrfs_root_item *root_item,
struct btrfs_key *root_key);
int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
void btrfs_set_root_node(struct btrfs_root_item *item,
struct extent_buffer *node);
......@@ -3493,6 +3552,10 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
size_t pg_offset, u64 start, u64 len,
int create);
noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes);
/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
......@@ -3530,6 +3593,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u32 min_type);
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info,
int delay_iput);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
......@@ -3814,6 +3879,8 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
......
......@@ -535,20 +535,6 @@ static struct btrfs_delayed_item *__btrfs_next_delayed_item(
return next;
}
static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
u64 root_id)
{
struct btrfs_key root_key;
if (root->objectid == root_id)
return root;
root_key.objectid = root_id;
root_key.type = BTRFS_ROOT_ITEM_KEY;
root_key.offset = (u64)-1;
return btrfs_read_fs_root_no_name(root->fs_info, &root_key);
}
static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_delayed_item *item)
......
......@@ -400,7 +400,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
btrfs_dev_replace_unlock(dev_replace);
btrfs_wait_ordered_extents(root, 0);
btrfs_wait_all_ordered_extents(root->fs_info, 0);
/* force writing the updated state information to disk */
trans = btrfs_start_transaction(root, 0);
......@@ -470,12 +470,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
* flush all outstanding I/O and inode extent mappings before the
* copy operation is declared as being finished
*/
ret = btrfs_start_delalloc_inodes(root, 0);
ret = btrfs_start_all_delalloc_inodes(root->fs_info, 0);
if (ret) {
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret;
}
btrfs_wait_ordered_extents(root, 0);
btrfs_wait_all_ordered_extents(root->fs_info, 0);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
......
This diff is collapsed.
......@@ -63,14 +63,40 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_commit_super(struct btrfs_root *root);
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize);
struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
struct btrfs_key *location);
struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
struct btrfs_key *location);
int btrfs_init_fs_root(struct btrfs_root *root);
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
struct btrfs_key *location);
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty(struct btrfs_root *root);
void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root);
void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
void btrfs_free_fs_root(struct btrfs_root *root);
/*
* This function is used to grab the root, and avoid it is freed when we
* access it. But it doesn't ensure that the tree is not dropped.
*
* If you want to ensure the whole tree is safe, you should use
* fs_info->subvol_srcu
*/
static inline struct btrfs_root *btrfs_grab_fs_root(struct btrfs_root *root)
{
if (atomic_inc_not_zero(&root->refs))
return root;
return NULL;
}
static inline void btrfs_put_fs_root(struct btrfs_root *root)
{
if (atomic_dec_and_test(&root->refs))
kfree(root);
}
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
......
......@@ -82,11 +82,6 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
goto fail;
}
if (btrfs_root_refs(&root->root_item) == 0) {
err = -ENOENT;
goto fail;
}
key.objectid = objectid;
btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
key.offset = 0;
......
This diff is collapsed.
......@@ -77,10 +77,29 @@ void btrfs_leak_debug_check(void)
kmem_cache_free(extent_buffer_cache, eb);
}
}
#define btrfs_debug_check_extent_io_range(inode, start, end) \
__btrfs_debug_check_extent_io_range(__func__, (inode), (start), (end))
static inline void __btrfs_debug_check_extent_io_range(const char *caller,
struct inode *inode, u64 start, u64 end)
{
u64 isize = i_size_read(inode);
if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
printk_ratelimited(KERN_DEBUG
"btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
caller,
(unsigned long long)btrfs_ino(inode),
(unsigned long long)isize,
(unsigned long long)start,
(unsigned long long)end);
}
}
#else
#define btrfs_leak_debug_add(new, head) do {} while (0)
#define btrfs_leak_debug_del(entry) do {} while (0)
#define btrfs_leak_debug_check() do {} while (0)
#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
#endif
#define BUFFER_LRU_MAX 64
......@@ -522,6 +541,11 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int err;
int clear = 0;
btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
if (bits & EXTENT_DELALLOC)
bits |= EXTENT_NORESERVE;
if (delete)
bits |= ~EXTENT_CTLBITS;
bits |= EXTENT_FIRST_DELALLOC;
......@@ -677,6 +701,8 @@ static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *state;
struct rb_node *node;
btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
spin_lock(&tree->lock);
again:
while (1) {
......@@ -769,6 +795,8 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u64 last_start;
u64 last_end;
btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
bits |= EXTENT_FIRST_DELALLOC;
again:
if (!prealloc && (mask & __GFP_WAIT)) {
......@@ -989,6 +1017,8 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u64 last_start;
u64 last_end;
btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
again:
if (!prealloc && (mask & __GFP_WAIT)) {
prealloc = alloc_extent_state(mask);
......@@ -2450,11 +2480,12 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
struct extent_state *cached = NULL;
struct extent_state *state;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct inode *inode = page->mapping->host;
pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
"mirror=%lu\n", (u64)bio->bi_sector, err,
io_bio->mirror_num);
tree = &BTRFS_I(page->mapping->host)->io_tree;
tree = &BTRFS_I(inode)->io_tree;
/* We always issue full-page reads, but if some block
* in a page fails to read, blk_update_request() will
......@@ -2528,6 +2559,14 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
if (uptodate) {
loff_t i_size = i_size_read(inode);
pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
unsigned offset;
/* Zero out the end if this page straddles i_size */
offset = i_size & (PAGE_CACHE_SIZE-1);
if (page->index == end_index && offset)
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
SetPageUptodate(page);
} else {
ClearPageUptodate(page);
......
......@@ -19,6 +19,7 @@
#define EXTENT_FIRST_DELALLOC (1 << 12)
#define EXTENT_NEED_WAIT (1 << 13)
#define EXTENT_DAMAGED (1 << 14)
#define EXTENT_NORESERVE (1 << 15)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
......
......@@ -34,8 +34,7 @@
#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \
sizeof(struct btrfs_ordered_sum)) / \
sizeof(struct btrfs_sector_sum) * \
(r)->sectorsize - (r)->sectorsize)
sizeof(u32) * (r)->sectorsize)
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
......@@ -297,7 +296,6 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_ordered_sum *sums;
struct btrfs_sector_sum *sector_sum;
struct btrfs_csum_item *item;
LIST_HEAD(tmplist);
unsigned long offset;
......@@ -368,34 +366,28 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct btrfs_csum_item);
while (start < csum_end) {
size = min_t(size_t, csum_end - start,
MAX_ORDERED_SUM_BYTES(root));
MAX_ORDERED_SUM_BYTES(root));
sums = kzalloc(btrfs_ordered_sum_size(root, size),
GFP_NOFS);
GFP_NOFS);
if (!sums) {
ret = -ENOMEM;
goto fail;
}
sector_sum = sums->sums;
sums->bytenr = start;
sums->len = size;
sums->len = (int)size;
offset = (start - key.offset) >>
root->fs_info->sb->s_blocksize_bits;
offset *= csum_size;
size >>= root->fs_info->sb->s_blocksize_bits;
while (size > 0) {
read_extent_buffer(path->nodes[0],
&sector_sum->sum,
((unsigned long)item) +
offset, csum_size);
sector_sum->bytenr = start;
size -= root->sectorsize;
start += root->sectorsize;
offset += csum_size;
sector_sum++;
}
read_extent_buffer(path->nodes[0],
sums->sums,
((unsigned long)item) + offset,
csum_size * size);
start += root->sectorsize * size;
list_add_tail(&sums->list, &tmplist);
}
path->slots[0]++;
......@@ -417,23 +409,20 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u64 file_start, int contig)
{
struct btrfs_ordered_sum *sums;
struct btrfs_sector_sum *sector_sum;
struct btrfs_ordered_extent *ordered;
char *data;
struct bio_vec *bvec = bio->bi_io_vec;
int bio_index = 0;
int index;
unsigned long total_bytes = 0;
unsigned long this_sum_bytes = 0;
u64 offset;
u64 disk_bytenr;
WARN_ON(bio->bi_vcnt <= 0);
sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
if (!sums)
return -ENOMEM;
sector_sum = sums->sums;
disk_bytenr = (u64)bio->bi_sector << 9;
sums->len = bio->bi_size;
INIT_LIST_HEAD(&sums->list);
......@@ -444,7 +433,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
ordered = btrfs_lookup_ordered_extent(inode, offset);
BUG_ON(!ordered); /* Logic error */
sums->bytenr = ordered->start;
sums->bytenr = (u64)bio->bi_sector << 9;
index = 0;
while (bio_index < bio->bi_vcnt) {
if (!contig)
......@@ -463,28 +453,27 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
GFP_NOFS);
BUG_ON(!sums); /* -ENOMEM */
sector_sum = sums->sums;
sums->len = bytes_left;
ordered = btrfs_lookup_ordered_extent(inode, offset);
BUG_ON(!ordered); /* Logic error */
sums->bytenr = ordered->start;
sums->bytenr = ((u64)bio->bi_sector << 9) +
total_bytes;
index = 0;
}
data = kmap_atomic(bvec->bv_page);
sector_sum->sum = ~(u32)0;
sector_sum->sum = btrfs_csum_data(data + bvec->bv_offset,
sector_sum->sum,
bvec->bv_len);
sums->sums[index] = ~(u32)0;
sums->sums[index] = btrfs_csum_data(data + bvec->bv_offset,
sums->sums[index],
bvec->bv_len);
kunmap_atomic(data);
btrfs_csum_final(sector_sum->sum,
(char *)&sector_sum->sum);
sector_sum->bytenr = disk_bytenr;
btrfs_csum_final(sums->sums[index],
(char *)(sums->sums + index));
sector_sum++;
bio_index++;
index++;
total_bytes += bvec->bv_len;
this_sum_bytes += bvec->bv_len;
disk_bytenr += bvec->bv_len;
offset += bvec->bv_len;
bvec++;
}
......@@ -672,62 +661,46 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
return ret;
}
static u64 btrfs_sector_sum_left(struct btrfs_ordered_sum *sums,
struct btrfs_sector_sum *sector_sum,
u64 total_bytes, u64 sectorsize)
{
u64 tmp = sectorsize;
u64 next_sector = sector_sum->bytenr;
struct btrfs_sector_sum *next = sector_sum + 1;
while ((tmp + total_bytes) < sums->len) {
if (next_sector + sectorsize != next->bytenr)
break;
tmp += sectorsize;
next_sector = next->bytenr;
next++;
}
return tmp;
}
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums)
{
u64 bytenr;
int ret;
struct btrfs_key file_key;
struct btrfs_key found_key;
u64 next_offset;
u64 total_bytes = 0;
int found_next;
struct btrfs_path *path;
struct btrfs_csum_item *item;
struct btrfs_csum_item *item_end;
struct extent_buffer *leaf = NULL;
u64 next_offset;
u64 total_bytes = 0;
u64 csum_offset;
struct btrfs_sector_sum *sector_sum;
u64 bytenr;
u32 nritems;
u32 ins_size;
int index = 0;
int found_next;
int ret;
u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
sector_sum = sums->sums;
again:
next_offset = (u64)-1;
found_next = 0;
bytenr = sums->bytenr + total_bytes;
file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
file_key.offset = sector_sum->bytenr;
bytenr = sector_sum->bytenr;
file_key.offset = bytenr;
btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
item = btrfs_lookup_csum(trans, root, path, sector_sum->bytenr, 1);
item = btrfs_lookup_csum(trans, root, path, bytenr, 1);
if (!IS_ERR(item)) {
leaf = path->nodes[0];
ret = 0;
leaf = path->nodes[0];
item_end = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_csum_item);
item_end = (struct btrfs_csum_item *)((char *)item_end +
btrfs_item_size_nr(leaf, path->slots[0]));
goto found;
}
ret = PTR_ERR(item);
......@@ -807,8 +780,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
free_space = btrfs_leaf_free_space(root, leaf) -
sizeof(struct btrfs_item) - csum_size;
tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes,
root->sectorsize);
tmp = sums->len - total_bytes;
tmp >>= root->fs_info->sb->s_blocksize_bits;
WARN_ON(tmp < 1);
......@@ -822,6 +794,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
diff *= csum_size;
btrfs_extend_item(root, path, diff);
ret = 0;
goto csum;
}
......@@ -831,8 +804,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
if (found_next) {
u64 tmp;
tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes,
root->sectorsize);
tmp = sums->len - total_bytes;
tmp >>= root->fs_info->sb->s_blocksize_bits;
tmp = min(tmp, (next_offset - file_key.offset) >>
root->fs_info->sb->s_blocksize_bits);
......@@ -853,31 +825,25 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
WARN_ON(1);
goto fail_unlock;
}
csum:
leaf = path->nodes[0];
csum:
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
ret = 0;
item_end = (struct btrfs_csum_item *)((unsigned char *)item +
btrfs_item_size_nr(leaf, path->slots[0]));
item = (struct btrfs_csum_item *)((unsigned char *)item +
csum_offset * csum_size);
found:
item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
btrfs_item_size_nr(leaf, path->slots[0]));
next_sector:
write_extent_buffer(leaf, &sector_sum->sum, (unsigned long)item, csum_size);
total_bytes += root->sectorsize;
sector_sum++;
if (total_bytes < sums->len) {
item = (struct btrfs_csum_item *)((char *)item +
csum_size);
if (item < item_end && bytenr + PAGE_CACHE_SIZE ==
sector_sum->bytenr) {
bytenr = sector_sum->bytenr;
goto next_sector;
}
}
ins_size = (u32)(sums->len - total_bytes) >>
root->fs_info->sb->s_blocksize_bits;
ins_size *= csum_size;
ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item,
ins_size);
write_extent_buffer(leaf, sums->sums + index, (unsigned long)item,
ins_size);
ins_size /= csum_size;
total_bytes += ins_size * root->sectorsize;
index += ins_size;
btrfs_mark_buffer_dirty(path->nodes[0]);
if (total_bytes < sums->len) {
......
......@@ -309,10 +309,6 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
ret = PTR_ERR(inode_root);
goto cleanup;
}
if (btrfs_root_refs(&inode_root->root_item) == 0) {
ret = -ENOENT;
goto cleanup;
}
key.objectid = defrag->ino;
btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
......@@ -1317,6 +1313,56 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
}
static noinline int check_can_nocow(struct inode *inode, loff_t pos,
size_t *write_bytes)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ordered_extent *ordered;
u64 lockstart, lockend;
u64 num_bytes;
int ret;
lockstart = round_down(pos, root->sectorsize);
lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1;
while (1) {
lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
ordered = btrfs_lookup_ordered_range(inode, lockstart,
lockend - lockstart + 1);
if (!ordered) {
break;
}
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
}
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
return PTR_ERR(trans);
}
num_bytes = lockend - lockstart + 1;
ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL,
NULL);
btrfs_end_transaction(trans, root);
if (ret <= 0) {
ret = 0;
} else {
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
NULL, GFP_NOFS);
*write_bytes = min_t(size_t, *write_bytes, num_bytes);
}
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
return ret;
}
static noinline ssize_t __btrfs_buffered_write(struct file *file,
struct iov_iter *i,
loff_t pos)
......@@ -1324,10 +1370,12 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
struct inode *inode = file_inode(file);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
u64 release_bytes = 0;
unsigned long first_index;
size_t num_written = 0;
int nrptrs;
int ret = 0;
bool only_release_metadata = false;
bool force_page_uptodate = false;
nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
......@@ -1348,6 +1396,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
offset);
size_t num_pages = (write_bytes + offset +
PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
size_t reserve_bytes;
size_t dirty_pages;
size_t copied;
......@@ -1362,11 +1411,41 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
break;
}
ret = btrfs_delalloc_reserve_space(inode,
num_pages << PAGE_CACHE_SHIFT);
reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
ret = btrfs_check_data_free_space(inode, reserve_bytes);
if (ret == -ENOSPC &&
(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC))) {
ret = check_can_nocow(inode, pos, &write_bytes);
if (ret > 0) {
only_release_metadata = true;
/*
* our prealloc extent may be smaller than
* write_bytes, so scale down.
*/
num_pages = (write_bytes + offset +
PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
ret = 0;
} else {
ret = -ENOSPC;
}
}
if (ret)
break;
ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
if (ret) {
if (!only_release_metadata)
btrfs_free_reserved_data_space(inode,
reserve_bytes);
break;
}
release_bytes = reserve_bytes;
/*
* This is going to setup the pages array with the number of
* pages we want, so we don't really need to worry about the
......@@ -1375,11 +1454,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
ret = prepare_pages(root, file, pages, num_pages,
pos, first_index, write_bytes,
force_page_uptodate);
if (ret) {
btrfs_delalloc_release_space(inode,
num_pages << PAGE_CACHE_SHIFT);
if (ret)
break;
}
copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, i);
......@@ -1409,30 +1485,46 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
* managed to copy.
*/
if (num_pages > dirty_pages) {
release_bytes = (num_pages - dirty_pages) <<
PAGE_CACHE_SHIFT;
if (copied > 0) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
btrfs_delalloc_release_space(inode,
(num_pages - dirty_pages) <<
PAGE_CACHE_SHIFT);
if (only_release_metadata)
btrfs_delalloc_release_metadata(inode,
release_bytes);
else
btrfs_delalloc_release_space(inode,
release_bytes);
}
release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
if (copied > 0) {
ret = btrfs_dirty_pages(root, inode, pages,
dirty_pages, pos, copied,
NULL);
if (ret) {
btrfs_delalloc_release_space(inode,
dirty_pages << PAGE_CACHE_SHIFT);
btrfs_drop_pages(pages, num_pages);
break;
}
}
release_bytes = 0;
btrfs_drop_pages(pages, num_pages);
if (only_release_metadata && copied > 0) {
u64 lockstart = round_down(pos, root->sectorsize);
u64 lockend = lockstart +
(dirty_pages << PAGE_CACHE_SHIFT) - 1;
set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, EXTENT_NORESERVE, NULL,
NULL, GFP_NOFS);
only_release_metadata = false;
}
cond_resched();
balance_dirty_pages_ratelimited(inode->i_mapping);
......@@ -1445,6 +1537,13 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
kfree(pages);
if (release_bytes) {
if (only_release_metadata)
btrfs_delalloc_release_metadata(inode, release_bytes);
else
btrfs_delalloc_release_space(inode, release_bytes);
}
return num_written ? num_written : ret;
}
......@@ -2175,12 +2274,6 @@ static long btrfs_fallocate(struct file *file, int mode,
goto out_reserve_fail;
}
/*
* wait for ordered IO before we have any locks. We'll loop again
* below with the locks held.
*/
btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
mutex_lock(&inode->i_mutex);
ret = inode_newsize_ok(inode, alloc_end);
if (ret)
......@@ -2191,8 +2284,23 @@ static long btrfs_fallocate(struct file *file, int mode,
alloc_start);
if (ret)
goto out;
} else {
/*
* If we are fallocating from the end of the file onward we
* need to zero out the end of the page if i_size lands in the
* middle of a page.
*/
ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
if (ret)
goto out;
}
/*
* wait for ordered IO before we have any locks. We'll loop again
* below with the locks held.
*/
btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
locked_end = alloc_end - 1;
while (1) {
struct btrfs_ordered_extent *ordered;
......
This diff is collapsed.
......@@ -113,8 +113,6 @@ int btrfs_return_cluster_to_free_space(
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
void btrfs_test_free_space_cache(void);
#endif
#endif
This diff is collapsed.
......@@ -555,6 +555,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (!root->ref_cows)
return -EINVAL;
ret = btrfs_start_delalloc_inodes(root, 0);
if (ret)
return ret;
btrfs_wait_ordered_extents(root, 0);
pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
if (!pending_snapshot)
return -ENOMEM;
......@@ -2354,14 +2360,6 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
if (ret)
return ret;
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
mnt_drop_write_file(file);
return -EINVAL;
}
mutex_lock(&root->fs_info->volume_mutex);
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
......@@ -2369,12 +2367,20 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_rm_device(root, vol_args->name);
kfree(vol_args);
out:
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
goto out;
}
mutex_lock(&root->fs_info->volume_mutex);
ret = btrfs_rm_device(root, vol_args->name);
mutex_unlock(&root->fs_info->volume_mutex);
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
out:
kfree(vol_args);
mnt_drop_write_file(file);
return ret;
}
......@@ -2480,6 +2486,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
int ret;
u64 len = olen;
u64 bs = root->fs_info->sb->s_blocksize;
int same_inode = 0;
/*
* TODO:
......@@ -2516,7 +2523,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
ret = -EINVAL;
if (src == inode)
goto out_fput;
same_inode = 1;
/* the src must be open for reading */
if (!(src_file.file->f_mode & FMODE_READ))
......@@ -2547,12 +2554,16 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
}
path->reada = 2;
if (inode < src) {
mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
if (!same_inode) {
if (inode < src) {
mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
} else {
mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
}
} else {
mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
mutex_lock(&src->i_mutex);
}
/* determine range to clone */
......@@ -2570,6 +2581,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
!IS_ALIGNED(destoff, bs))
goto out_unlock;
/* verify if ranges are overlapped within the same file */
if (same_inode) {
if (destoff + len > off && destoff < off + len)
goto out_unlock;
}
if (destoff > inode->i_size) {
ret = btrfs_cont_expand(inode, inode->i_size, destoff);
if (ret)
......@@ -2846,7 +2863,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
out_unlock:
mutex_unlock(&src->i_mutex);
mutex_unlock(&inode->i_mutex);
if (!same_inode)
mutex_unlock(&inode->i_mutex);
vfree(buf);
btrfs_free_path(path);
out_fput:
......@@ -2951,11 +2969,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
goto out;
}
if (btrfs_root_refs(&new_root->root_item) == 0) {
ret = -ENOENT;
goto out;
}
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
......@@ -3719,9 +3732,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
break;
}
if (copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
err = btrfs_commit_transaction(trans, root->fs_info->tree_root);
if (err && !ret)
ret = err;
......@@ -3937,6 +3947,16 @@ static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
return ret;
}
static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
return btrfs_qgroup_wait_for_completion(root->fs_info);
}
static long btrfs_ioctl_set_received_subvol(struct file *file,
void __user *arg)
{
......@@ -4179,6 +4199,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_quota_rescan(file, argp);
case BTRFS_IOC_QUOTA_RESCAN_STATUS:
return btrfs_ioctl_quota_rescan_status(file, argp);
case BTRFS_IOC_QUOTA_RESCAN_WAIT:
return btrfs_ioctl_quota_rescan_wait(file, argp);
case BTRFS_IOC_DEV_REPLACE:
return btrfs_ioctl_dev_replace(root, argp);
case BTRFS_IOC_GET_FSLABEL:
......
......@@ -31,8 +31,8 @@
struct workspace {
void *mem;
void *buf; /* where compressed data goes */
void *cbuf; /* where decompressed data goes */
void *buf; /* where decompressed data goes */
void *cbuf; /* where compressed data goes */
struct list_head list;
};
......
......@@ -24,6 +24,7 @@
#include "transaction.h"
#include "btrfs_inode.h"
#include "extent_io.h"
#include "disk-io.h"
static struct kmem_cache *btrfs_ordered_extent_cache;
......@@ -184,6 +185,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len,
int type, int dio, int compress_type)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry;
......@@ -227,10 +229,18 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
ordered_data_tree_panic(inode, -EEXIST, file_offset);
spin_unlock_irq(&tree->lock);
spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
spin_lock(&root->ordered_extent_lock);
list_add_tail(&entry->root_extent_list,
&BTRFS_I(inode)->root->fs_info->ordered_extents);
spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
&root->ordered_extents);
root->nr_ordered_extents++;
if (root->nr_ordered_extents == 1) {
spin_lock(&root->fs_info->ordered_root_lock);
BUG_ON(!list_empty(&root->ordered_root));
list_add_tail(&root->ordered_root,
&root->fs_info->ordered_roots);
spin_unlock(&root->fs_info->ordered_root_lock);
}
spin_unlock(&root->ordered_extent_lock);
return 0;
}
......@@ -516,8 +526,9 @@ void btrfs_remove_ordered_extent(struct inode *inode,
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
spin_unlock_irq(&tree->lock);
spin_lock(&root->fs_info->ordered_extent_lock);
spin_lock(&root->ordered_extent_lock);
list_del_init(&entry->root_extent_list);
root->nr_ordered_extents--;
trace_btrfs_ordered_extent_remove(inode, entry);
......@@ -530,7 +541,14 @@ void btrfs_remove_ordered_extent(struct inode *inode,
!mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
list_del_init(&BTRFS_I(inode)->ordered_operations);
}
spin_unlock(&root->fs_info->ordered_extent_lock);
if (!root->nr_ordered_extents) {
spin_lock(&root->fs_info->ordered_root_lock);
BUG_ON(list_empty(&root->ordered_root));
list_del_init(&root->ordered_root);
spin_unlock(&root->fs_info->ordered_root_lock);
}
spin_unlock(&root->ordered_extent_lock);
wake_up(&entry->wait);
}
......@@ -550,7 +568,6 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
{
struct list_head splice, works;
struct list_head *cur;
struct btrfs_ordered_extent *ordered, *next;
struct inode *inode;
......@@ -558,35 +575,34 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
INIT_LIST_HEAD(&works);
mutex_lock(&root->fs_info->ordered_operations_mutex);
spin_lock(&root->fs_info->ordered_extent_lock);
list_splice_init(&root->fs_info->ordered_extents, &splice);
spin_lock(&root->ordered_extent_lock);
list_splice_init(&root->ordered_extents, &splice);
while (!list_empty(&splice)) {
cur = splice.next;
ordered = list_entry(cur, struct btrfs_ordered_extent,
root_extent_list);
list_del_init(&ordered->root_extent_list);
atomic_inc(&ordered->refs);
ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
root_extent_list);
list_move_tail(&ordered->root_extent_list,
&root->ordered_extents);
/*
* the inode may be getting freed (in sys_unlink path).
*/
inode = igrab(ordered->inode);
if (!inode) {
cond_resched_lock(&root->ordered_extent_lock);
continue;
}
spin_unlock(&root->fs_info->ordered_extent_lock);
atomic_inc(&ordered->refs);
spin_unlock(&root->ordered_extent_lock);
if (inode) {
ordered->flush_work.func = btrfs_run_ordered_extent_work;
list_add_tail(&ordered->work_list, &works);
btrfs_queue_worker(&root->fs_info->flush_workers,
&ordered->flush_work);
} else {
btrfs_put_ordered_extent(ordered);
}
ordered->flush_work.func = btrfs_run_ordered_extent_work;
list_add_tail(&ordered->work_list, &works);
btrfs_queue_worker(&root->fs_info->flush_workers,
&ordered->flush_work);
cond_resched();
spin_lock(&root->fs_info->ordered_extent_lock);
spin_lock(&root->ordered_extent_lock);
}
spin_unlock(&root->fs_info->ordered_extent_lock);
spin_unlock(&root->ordered_extent_lock);
list_for_each_entry_safe(ordered, next, &works, work_list) {
list_del_init(&ordered->work_list);
......@@ -604,6 +620,33 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
mutex_unlock(&root->fs_info->ordered_operations_mutex);
}
void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
int delay_iput)
{
struct btrfs_root *root;
struct list_head splice;
INIT_LIST_HEAD(&splice);
spin_lock(&fs_info->ordered_root_lock);
list_splice_init(&fs_info->ordered_roots, &splice);
while (!list_empty(&splice)) {
root = list_first_entry(&splice, struct btrfs_root,
ordered_root);
root = btrfs_grab_fs_root(root);
BUG_ON(!root);
list_move_tail(&root->ordered_root,
&fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock);
btrfs_wait_ordered_extents(root, delay_iput);
btrfs_put_fs_root(root);
spin_lock(&fs_info->ordered_root_lock);
}
spin_unlock(&fs_info->ordered_root_lock);
}
/*
* this is used during transaction commit to write all the inodes
* added to the ordered operation list. These files must be fully on
......@@ -629,7 +672,7 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
INIT_LIST_HEAD(&works);
mutex_lock(&root->fs_info->ordered_operations_mutex);
spin_lock(&root->fs_info->ordered_extent_lock);
spin_lock(&root->fs_info->ordered_root_lock);
list_splice_init(&cur_trans->ordered_operations, &splice);
while (!list_empty(&splice)) {
btrfs_inode = list_entry(splice.next, struct btrfs_inode,
......@@ -648,17 +691,17 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
if (!wait)
list_add_tail(&BTRFS_I(inode)->ordered_operations,
&cur_trans->ordered_operations);
spin_unlock(&root->fs_info->ordered_extent_lock);
spin_unlock(&root->fs_info->ordered_root_lock);
work = btrfs_alloc_delalloc_work(inode, wait, 1);
if (!work) {
spin_lock(&root->fs_info->ordered_extent_lock);
spin_lock(&root->fs_info->ordered_root_lock);
if (list_empty(&BTRFS_I(inode)->ordered_operations))
list_add_tail(&btrfs_inode->ordered_operations,
&splice);
list_splice_tail(&splice,
&cur_trans->ordered_operations);
spin_unlock(&root->fs_info->ordered_extent_lock);
spin_unlock(&root->fs_info->ordered_root_lock);
ret = -ENOMEM;
goto out;
}
......@@ -667,9 +710,9 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
&work->work);
cond_resched();
spin_lock(&root->fs_info->ordered_extent_lock);
spin_lock(&root->fs_info->ordered_root_lock);
}
spin_unlock(&root->fs_info->ordered_extent_lock);
spin_unlock(&root->fs_info->ordered_root_lock);
out:
list_for_each_entry_safe(work, next, &works, list) {
list_del_init(&work->list);
......@@ -989,7 +1032,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
u32 *sum, int len)
{
struct btrfs_ordered_sum *ordered_sum;
struct btrfs_sector_sum *sector_sums;
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
unsigned long num_sectors;
......@@ -1007,18 +1049,16 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
disk_bytenr < ordered_sum->bytenr + ordered_sum->len) {
i = (disk_bytenr - ordered_sum->bytenr) >>
inode->i_sb->s_blocksize_bits;
sector_sums = ordered_sum->sums + i;
num_sectors = ordered_sum->len >>
inode->i_sb->s_blocksize_bits;
for (; i < num_sectors; i++) {
if (sector_sums[i].bytenr == disk_bytenr) {
sum[index] = sector_sums[i].sum;
index++;
if (index == len)
goto out;
disk_bytenr += sectorsize;
}
}
num_sectors = min_t(int, len - index, num_sectors - i);
memcpy(sum + index, ordered_sum->sums + i,
num_sectors);
index += (int)num_sectors;
if (index == len)
goto out;
disk_bytenr += num_sectors * sectorsize;
}
}
out:
......@@ -1055,12 +1095,12 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
if (last_mod < root->fs_info->last_trans_committed)
return;
spin_lock(&root->fs_info->ordered_extent_lock);
spin_lock(&root->fs_info->ordered_root_lock);
if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
list_add_tail(&BTRFS_I(inode)->ordered_operations,
&cur_trans->ordered_operations);
}
spin_unlock(&root->fs_info->ordered_extent_lock);
spin_unlock(&root->fs_info->ordered_root_lock);
}
int __init ordered_data_init(void)
......
......@@ -26,18 +26,6 @@ struct btrfs_ordered_inode_tree {
struct rb_node *last;
};
/*
* these are used to collect checksums done just before bios submission.
* They are attached via a list into the ordered extent, and
* checksum items are inserted into the tree after all the blocks in
* the ordered extent are on disk
*/
struct btrfs_sector_sum {
/* bytenr on disk */
u64 bytenr;
u32 sum;
};
struct btrfs_ordered_sum {
/* bytenr is the start of this extent on disk */
u64 bytenr;
......@@ -45,10 +33,10 @@ struct btrfs_ordered_sum {
/*
* this is the length in bytes covered by the sums array below.
*/
unsigned long len;
int len;
struct list_head list;
/* last field is a variable length array of btrfs_sector_sums */
struct btrfs_sector_sum sums[];
/* last field is a variable length array of csums */
u32 sums[];
};
/*
......@@ -149,11 +137,8 @@ struct btrfs_ordered_extent {
static inline int btrfs_ordered_sum_size(struct btrfs_root *root,
unsigned long bytes)
{
unsigned long num_sectors = (bytes + root->sectorsize - 1) /
root->sectorsize;
num_sectors++;
return sizeof(struct btrfs_ordered_sum) +
num_sectors * sizeof(struct btrfs_sector_sum);
int num_sectors = (int)DIV_ROUND_UP(bytes, root->sectorsize);
return sizeof(struct btrfs_ordered_sum) + num_sectors * sizeof(u32);
}
static inline void
......@@ -204,6 +189,8 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput);
void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
int delay_iput);
void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
......
This diff is collapsed.
......@@ -1305,6 +1305,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
struct extent_buffer *eb;
struct btrfs_root_item *root_item;
struct btrfs_key root_key;
u64 last_snap = 0;
int ret;
root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
......@@ -1320,6 +1321,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
BTRFS_TREE_RELOC_OBJECTID);
BUG_ON(ret);
last_snap = btrfs_root_last_snapshot(&root->root_item);
btrfs_set_root_last_snapshot(&root->root_item,
trans->transid - 1);
} else {
......@@ -1345,6 +1347,12 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
memset(&root_item->drop_progress, 0,
sizeof(struct btrfs_disk_key));
root_item->drop_level = 0;
/*
* abuse rtransid, it is safe because it is impossible to
* receive data into a relocation tree.
*/
btrfs_set_root_rtransid(root_item, last_snap);
btrfs_set_root_otransid(root_item, trans->transid);
}
btrfs_tree_unlock(eb);
......@@ -1355,8 +1363,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
BUG_ON(ret);
kfree(root_item);
reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
&root_key);
reloc_root = btrfs_read_fs_root(root->fs_info->tree_root, &root_key);
BUG_ON(IS_ERR(reloc_root));
reloc_root->last_trans = trans->transid;
return reloc_root;
......@@ -2273,8 +2280,12 @@ void free_reloc_roots(struct list_head *list)
static noinline_for_stack
int merge_reloc_roots(struct reloc_control *rc)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root;
struct btrfs_root *reloc_root;
u64 last_snap;
u64 otransid;
u64 objectid;
LIST_HEAD(reloc_roots);
int found = 0;
int ret = 0;
......@@ -2308,12 +2319,44 @@ int merge_reloc_roots(struct reloc_control *rc)
} else {
list_del_init(&reloc_root->root_list);
}
/*
* we keep the old last snapshod transid in rtranid when we
* created the relocation tree.
*/
last_snap = btrfs_root_rtransid(&reloc_root->root_item);
otransid = btrfs_root_otransid(&reloc_root->root_item);
objectid = reloc_root->root_key.offset;
ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
if (ret < 0) {
if (list_empty(&reloc_root->root_list))
list_add_tail(&reloc_root->root_list,
&reloc_roots);
goto out;
} else if (!ret) {
/*
* recover the last snapshot tranid to avoid
* the space balance break NOCOW.
*/
root = read_fs_root(rc->extent_root->fs_info,
objectid);
if (IS_ERR(root))
continue;
if (btrfs_root_refs(&root->root_item) == 0)
continue;
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
/* Check if the fs/file tree was snapshoted or not. */
if (btrfs_root_last_snapshot(&root->root_item) ==
otransid - 1)
btrfs_set_root_last_snapshot(&root->root_item,
last_snap);
btrfs_end_transaction(trans, root);
}
}
......@@ -3266,6 +3309,8 @@ static int __add_tree_block(struct reloc_control *rc,
struct btrfs_path *path;
struct btrfs_key key;
int ret;
bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info,
SKINNY_METADATA);
if (tree_block_processed(bytenr, blocksize, rc))
return 0;
......@@ -3276,10 +3321,15 @@ static int __add_tree_block(struct reloc_control *rc,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
again:
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = blocksize;
if (skinny) {
key.type = BTRFS_METADATA_ITEM_KEY;
key.offset = (u64)-1;
} else {
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = blocksize;
}
path->search_commit_root = 1;
path->skip_locking = 1;
......@@ -3287,11 +3337,23 @@ static int __add_tree_block(struct reloc_control *rc,
if (ret < 0)
goto out;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (ret > 0) {
if (key.objectid == bytenr &&
key.type == BTRFS_METADATA_ITEM_KEY)
ret = 0;
if (ret > 0 && skinny) {
if (path->slots[0]) {
path->slots[0]--;
btrfs_item_key_to_cpu(path->nodes[0], &key,
path->slots[0]);
if (key.objectid == bytenr &&
(key.type == BTRFS_METADATA_ITEM_KEY ||
(key.type == BTRFS_EXTENT_ITEM_KEY &&
key.offset == blocksize)))
ret = 0;
}
if (ret) {
skinny = false;
btrfs_release_path(path);
goto again;
}
}
BUG_ON(ret);
......@@ -4160,12 +4222,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
(unsigned long long)rc->block_group->key.objectid,
(unsigned long long)rc->block_group->flags);
ret = btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
ret = btrfs_start_all_delalloc_inodes(fs_info, 0);
if (ret < 0) {
err = ret;
goto out;
}
btrfs_wait_ordered_extents(fs_info->tree_root, 0);
btrfs_wait_all_ordered_extents(fs_info, 0);
while (1) {
mutex_lock(&fs_info->cleaner_mutex);
......@@ -4277,7 +4339,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
key.type != BTRFS_ROOT_ITEM_KEY)
break;
reloc_root = btrfs_read_fs_root_no_radix(root, &key);
reloc_root = btrfs_read_fs_root(root, &key);
if (IS_ERR(reloc_root)) {
err = PTR_ERR(reloc_root);
goto out;
......@@ -4396,10 +4458,8 @@ int btrfs_recover_relocation(struct btrfs_root *root)
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
{
struct btrfs_ordered_sum *sums;
struct btrfs_sector_sum *sector_sum;
struct btrfs_ordered_extent *ordered;
struct btrfs_root *root = BTRFS_I(inode)->root;
size_t offset;
int ret;
u64 disk_bytenr;
LIST_HEAD(list);
......@@ -4413,19 +4473,13 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
if (ret)
goto out;
disk_bytenr = ordered->start;
while (!list_empty(&list)) {
sums = list_entry(list.next, struct btrfs_ordered_sum, list);
list_del_init(&sums->list);
sector_sum = sums->sums;
sums->bytenr = ordered->start;
offset = 0;
while (offset < sums->len) {
sector_sum->bytenr += ordered->start - disk_bytenr;
sector_sum++;
offset += root->sectorsize;
}
sums->bytenr = disk_bytenr;
disk_bytenr += sums->len;
btrfs_add_ordered_sum(inode, ordered, sums);
}
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -205,6 +205,10 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
u64 new_alloced = ulist->nodes_alloced + 128;
struct ulist_node *new_nodes;
void *old = NULL;
int i;
for (i = 0; i < ulist->nnodes; i++)
rb_erase(&ulist->nodes[i].rb_node, &ulist->root);
/*
* if nodes_alloced == ULIST_SIZE no memory has been allocated
......@@ -224,6 +228,17 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
ulist->nodes = new_nodes;
ulist->nodes_alloced = new_alloced;
/*
* krealloc actually uses memcpy, which does not copy rb_node
* pointers, so we have to do it ourselves. Otherwise we may
* be bitten by crashes.
*/
for (i = 0; i < ulist->nnodes; i++) {
ret = ulist_rbtree_insert(ulist, &ulist->nodes[i]);
if (ret < 0)
return ret;
}
}
ulist->nodes[ulist->nnodes].val = val;
ulist->nodes[ulist->nnodes].aux = aux;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment