Commit fcebe456 authored by Josef Bacik's avatar Josef Bacik Committed by Chris Mason

Btrfs: rework qgroup accounting

Currently qgroups account for space by intercepting delayed ref updates to fs
trees.  It does this by adding sequence numbers to delayed ref updates so that
it can figure out how the tree looked before the update so we can adjust the
counters properly.  The problem with this is that it does not allow delayed refs
to be merged, so if you say are defragging an extent with 5k snapshots pointing
to it we will thrash the delayed ref lock because we need to go back and
manually merge these things together.  Instead we want to process quota changes
when we know they are going to happen, like when we first allocate an extent, we
free a reference for an extent, we add new references etc.  This patch
accomplishes this by only adding qgroup operations for real ref changes.  We
only modify the sequence number when we need to lookup roots for bytenrs, this
reduces the amount of churn on the sequence number and allows us to merge
delayed refs as we add them most of the time.  This patch encompasses a bunch of
architectural changes

1) qgroup ref operations: instead of tracking qgroup operations through the
delayed refs we simply add new ref operations whenever we notice that we need to
when we've modified the refs themselves.

2) tree mod seq:  we no longer have this separation of major/minor counters.
this makes the sequence number stuff much more sane and we can remove some
locking that was needed to protect the counter.

3) delayed ref seq: we now read the tree mod seq number and use that as our
sequence.  This means each new delayed ref doesn't have it's own unique sequence
number, rather whenever we go to lookup backrefs we inc the sequence number so
we can make sure to keep any new operations from screwing up our world view at
that given point.  This allows us to merge delayed refs during runtime.

With all of these changes the delayed ref stuff is a little saner and the qgroup
accounting stuff no longer goes negative in some cases like it was before.
Thanks,
Signed-off-by: default avatarJosef Bacik <jbacik@fb.com>
Signed-off-by: default avatarChris Mason <clm@fb.com>
parent 5dca6eea
...@@ -55,8 +55,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, ...@@ -55,8 +55,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans, int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots); u64 time_seq, struct ulist **roots);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off, u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent, struct extent_buffer *eb_in, u64 parent,
......
...@@ -356,43 +356,13 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info) ...@@ -356,43 +356,13 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
} }
/* /*
* Increment the upper half of tree_mod_seq, set lower half zero. * Pull a new tree mod seq number for our operation.
*
* Must be called with fs_info->tree_mod_seq_lock held.
*/
static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info)
{
u64 seq = atomic64_read(&fs_info->tree_mod_seq);
seq &= 0xffffffff00000000ull;
seq += 1ull << 32;
atomic64_set(&fs_info->tree_mod_seq, seq);
return seq;
}
/*
* Increment the lower half of tree_mod_seq.
*
* Must be called with fs_info->tree_mod_seq_lock held. The way major numbers
* are generated should not technically require a spin lock here. (Rationale:
* incrementing the minor while incrementing the major seq number is between its
* atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it
* just returns a unique sequence number as usual.) We have decided to leave
* that requirement in here and rethink it once we notice it really imposes a
* problem on some workload.
*/ */
static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info) static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
{ {
return atomic64_inc_return(&fs_info->tree_mod_seq); return atomic64_inc_return(&fs_info->tree_mod_seq);
} }
/*
* return the last minor in the previous major tree_mod_seq number
*/
u64 btrfs_tree_mod_seq_prev(u64 seq)
{
return (seq & 0xffffffff00000000ull) - 1ull;
}
/* /*
* This adds a new blocker to the tree mod log's blocker list if the @elem * This adds a new blocker to the tree mod log's blocker list if the @elem
* passed does not already have a sequence number set. So when a caller expects * passed does not already have a sequence number set. So when a caller expects
...@@ -404,19 +374,16 @@ u64 btrfs_tree_mod_seq_prev(u64 seq) ...@@ -404,19 +374,16 @@ u64 btrfs_tree_mod_seq_prev(u64 seq)
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem) struct seq_list *elem)
{ {
u64 seq;
tree_mod_log_write_lock(fs_info); tree_mod_log_write_lock(fs_info);
spin_lock(&fs_info->tree_mod_seq_lock); spin_lock(&fs_info->tree_mod_seq_lock);
if (!elem->seq) { if (!elem->seq) {
elem->seq = btrfs_inc_tree_mod_seq_major(fs_info); elem->seq = btrfs_inc_tree_mod_seq(fs_info);
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
} }
seq = btrfs_inc_tree_mod_seq_minor(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock); spin_unlock(&fs_info->tree_mod_seq_lock);
tree_mod_log_write_unlock(fs_info); tree_mod_log_write_unlock(fs_info);
return seq; return elem->seq;
} }
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
...@@ -489,9 +456,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) ...@@ -489,9 +456,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
BUG_ON(!tm); BUG_ON(!tm);
spin_lock(&fs_info->tree_mod_seq_lock); tm->seq = btrfs_inc_tree_mod_seq(fs_info);
tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
tm_root = &fs_info->tree_mod_log; tm_root = &fs_info->tree_mod_log;
new = &tm_root->rb_node; new = &tm_root->rb_node;
......
...@@ -1648,7 +1648,10 @@ struct btrfs_fs_info { ...@@ -1648,7 +1648,10 @@ struct btrfs_fs_info {
/* holds configuration and tracking. Protected by qgroup_lock */ /* holds configuration and tracking. Protected by qgroup_lock */
struct rb_root qgroup_tree; struct rb_root qgroup_tree;
struct rb_root qgroup_op_tree;
spinlock_t qgroup_lock; spinlock_t qgroup_lock;
spinlock_t qgroup_op_lock;
atomic_t qgroup_op_seq;
/* /*
* used to avoid frequently calling ulist_alloc()/ulist_free() * used to avoid frequently calling ulist_alloc()/ulist_free()
...@@ -3300,9 +3303,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, ...@@ -3300,9 +3303,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
u64 min_alloc_size, u64 empty_size, u64 hint_byte, u64 min_alloc_size, u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data); struct btrfs_key *ins, int is_data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref, int for_cow); struct extent_buffer *buf, int full_backref, int no_quota);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref, int for_cow); struct extent_buffer *buf, int full_backref, int no_quota);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 flags, u64 bytenr, u64 num_bytes, u64 flags,
...@@ -3310,7 +3313,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, ...@@ -3310,7 +3313,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
int btrfs_free_extent(struct btrfs_trans_handle *trans, int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset, int for_cow); u64 owner, u64 offset, int no_quota);
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
...@@ -3322,7 +3325,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, ...@@ -3322,7 +3325,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset, int for_cow); u64 root_objectid, u64 owner, u64 offset, int no_quota);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root); struct btrfs_root *root);
...@@ -3410,7 +3413,6 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info); ...@@ -3410,7 +3413,6 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info); struct btrfs_fs_info *fs_info);
int __get_raid_index(u64 flags); int __get_raid_index(u64 flags);
int btrfs_start_nocow_write(struct btrfs_root *root); int btrfs_start_nocow_write(struct btrfs_root *root);
void btrfs_end_nocow_write(struct btrfs_root *root); void btrfs_end_nocow_write(struct btrfs_root *root);
/* ctree.c */ /* ctree.c */
...@@ -3586,7 +3588,6 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, ...@@ -3586,7 +3588,6 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem); struct seq_list *elem);
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem); struct seq_list *elem);
u64 btrfs_tree_mod_seq_prev(u64 seq);
int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
/* root-item.c */ /* root-item.c */
...@@ -4094,52 +4095,6 @@ void btrfs_reada_detach(void *handle); ...@@ -4094,52 +4095,6 @@ void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
u64 start, int err); u64 start, int err);
/* qgroup.c */
struct qgroup_update {
struct list_head list;
struct btrfs_delayed_ref_node *node;
struct btrfs_delayed_extent_op *extent_op;
};
int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
char *name);
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid);
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
struct btrfs_qgroup_limit *limit);
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
struct btrfs_delayed_extent_op;
int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
struct btrfs_qgroup_inherit *inherit);
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
static inline int is_fstree(u64 rootid) static inline int is_fstree(u64 rootid)
{ {
if (rootid == BTRFS_FS_TREE_OBJECTID || if (rootid == BTRFS_FS_TREE_OBJECTID ||
......
...@@ -106,6 +106,10 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2, ...@@ -106,6 +106,10 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
return -1; return -1;
if (ref1->type > ref2->type) if (ref1->type > ref2->type)
return 1; return 1;
if (ref1->no_quota > ref2->no_quota)
return 1;
if (ref1->no_quota < ref2->no_quota)
return -1;
/* merging of sequenced refs is not allowed */ /* merging of sequenced refs is not allowed */
if (compare_seq) { if (compare_seq) {
if (ref1->seq < ref2->seq) if (ref1->seq < ref2->seq)
...@@ -635,7 +639,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ...@@ -635,7 +639,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref, struct btrfs_delayed_ref_head *head_ref,
struct btrfs_delayed_ref_node *ref, u64 bytenr, struct btrfs_delayed_ref_node *ref, u64 bytenr,
u64 num_bytes, u64 parent, u64 ref_root, int level, u64 num_bytes, u64 parent, u64 ref_root, int level,
int action, int for_cow) int action, int no_quota)
{ {
struct btrfs_delayed_ref_node *existing; struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_tree_ref *full_ref; struct btrfs_delayed_tree_ref *full_ref;
...@@ -645,6 +649,8 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ...@@ -645,6 +649,8 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
if (action == BTRFS_ADD_DELAYED_EXTENT) if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF; action = BTRFS_ADD_DELAYED_REF;
if (is_fstree(ref_root))
seq = atomic64_read(&fs_info->tree_mod_seq);
delayed_refs = &trans->transaction->delayed_refs; delayed_refs = &trans->transaction->delayed_refs;
/* first set the basic ref node struct up */ /* first set the basic ref node struct up */
...@@ -655,9 +661,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ...@@ -655,9 +661,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->action = action; ref->action = action;
ref->is_head = 0; ref->is_head = 0;
ref->in_tree = 1; ref->in_tree = 1;
ref->no_quota = no_quota;
if (need_ref_seq(for_cow, ref_root))
seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
ref->seq = seq; ref->seq = seq;
full_ref = btrfs_delayed_node_to_tree_ref(ref); full_ref = btrfs_delayed_node_to_tree_ref(ref);
...@@ -697,7 +701,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, ...@@ -697,7 +701,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref, struct btrfs_delayed_ref_head *head_ref,
struct btrfs_delayed_ref_node *ref, u64 bytenr, struct btrfs_delayed_ref_node *ref, u64 bytenr,
u64 num_bytes, u64 parent, u64 ref_root, u64 owner, u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
u64 offset, int action, int for_cow) u64 offset, int action, int no_quota)
{ {
struct btrfs_delayed_ref_node *existing; struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_data_ref *full_ref; struct btrfs_delayed_data_ref *full_ref;
...@@ -709,6 +713,9 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, ...@@ -709,6 +713,9 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
delayed_refs = &trans->transaction->delayed_refs; delayed_refs = &trans->transaction->delayed_refs;
if (is_fstree(ref_root))
seq = atomic64_read(&fs_info->tree_mod_seq);
/* first set the basic ref node struct up */ /* first set the basic ref node struct up */
atomic_set(&ref->refs, 1); atomic_set(&ref->refs, 1);
ref->bytenr = bytenr; ref->bytenr = bytenr;
...@@ -717,9 +724,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, ...@@ -717,9 +724,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->action = action; ref->action = action;
ref->is_head = 0; ref->is_head = 0;
ref->in_tree = 1; ref->in_tree = 1;
ref->no_quota = no_quota;
if (need_ref_seq(for_cow, ref_root))
seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
ref->seq = seq; ref->seq = seq;
full_ref = btrfs_delayed_node_to_data_ref(ref); full_ref = btrfs_delayed_node_to_data_ref(ref);
...@@ -762,12 +767,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ...@@ -762,12 +767,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 parent, u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action, u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op, struct btrfs_delayed_extent_op *extent_op,
int for_cow) int no_quota)
{ {
struct btrfs_delayed_tree_ref *ref; struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_root *delayed_refs;
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
no_quota = 0;
BUG_ON(extent_op && extent_op->is_data); BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
if (!ref) if (!ref)
...@@ -793,10 +801,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ...@@ -793,10 +801,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action, num_bytes, parent, ref_root, level, action,
for_cow); no_quota);
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
return 0; return 0;
} }
...@@ -810,12 +816,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, ...@@ -810,12 +816,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
u64 parent, u64 ref_root, u64 parent, u64 ref_root,
u64 owner, u64 offset, int action, u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op, struct btrfs_delayed_extent_op *extent_op,
int for_cow) int no_quota)
{ {
struct btrfs_delayed_data_ref *ref; struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_root *delayed_refs;
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
no_quota = 0;
BUG_ON(extent_op && !extent_op->is_data); BUG_ON(extent_op && !extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref) if (!ref)
...@@ -841,10 +850,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, ...@@ -841,10 +850,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset, num_bytes, parent, ref_root, owner, offset,
action, for_cow); action, no_quota);
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
return 0; return 0;
} }
......
...@@ -52,6 +52,7 @@ struct btrfs_delayed_ref_node { ...@@ -52,6 +52,7 @@ struct btrfs_delayed_ref_node {
unsigned int action:8; unsigned int action:8;
unsigned int type:8; unsigned int type:8;
unsigned int no_quota:1;
/* is this node still in the rbtree? */ /* is this node still in the rbtree? */
unsigned int is_head:1; unsigned int is_head:1;
unsigned int in_tree:1; unsigned int in_tree:1;
...@@ -196,14 +197,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ...@@ -196,14 +197,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 parent, u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action, u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op, struct btrfs_delayed_extent_op *extent_op,
int for_cow); int no_quota);
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans, struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root, u64 parent, u64 ref_root,
u64 owner, u64 offset, int action, u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op, struct btrfs_delayed_extent_op *extent_op,
int for_cow); int no_quota);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans, struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 bytenr, u64 num_bytes,
...@@ -230,25 +231,6 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, ...@@ -230,25 +231,6 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_root *delayed_refs,
u64 seq); u64 seq);
/*
* delayed refs with a ref_seq > 0 must be held back during backref walking.
* this only applies to items in one of the fs-trees. for_cow items never need
* to be held back, so they won't get a ref_seq number.
*/
static inline int need_ref_seq(int for_cow, u64 rootid)
{
if (for_cow)
return 0;
if (rootid == BTRFS_FS_TREE_OBJECTID)
return 1;
if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
return 1;
return 0;
}
/* /*
* a node might live in a head or a regular ref, this lets you * a node might live in a head or a regular ref, this lets you
* test for the proper type to use. * test for the proper type to use.
......
...@@ -49,6 +49,7 @@ ...@@ -49,6 +49,7 @@
#include "dev-replace.h" #include "dev-replace.h"
#include "raid56.h" #include "raid56.h"
#include "sysfs.h" #include "sysfs.h"
#include "qgroup.h"
#ifdef CONFIG_X86 #ifdef CONFIG_X86
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
...@@ -2219,6 +2220,7 @@ int open_ctree(struct super_block *sb, ...@@ -2219,6 +2220,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->free_chunk_lock); spin_lock_init(&fs_info->free_chunk_lock);
spin_lock_init(&fs_info->tree_mod_seq_lock); spin_lock_init(&fs_info->tree_mod_seq_lock);
spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->super_lock);
spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->buffer_lock);
rwlock_init(&fs_info->tree_mod_log_lock); rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->reloc_mutex);
...@@ -2244,6 +2246,7 @@ int open_ctree(struct super_block *sb, ...@@ -2244,6 +2246,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->async_submit_draining, 0);
atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->nr_async_bios, 0);
atomic_set(&fs_info->defrag_running, 0); atomic_set(&fs_info->defrag_running, 0);
atomic_set(&fs_info->qgroup_op_seq, 0);
atomic64_set(&fs_info->tree_mod_seq, 0); atomic64_set(&fs_info->tree_mod_seq, 0);
fs_info->sb = sb; fs_info->sb = sb;
fs_info->max_inline = 8192 * 1024; fs_info->max_inline = 8192 * 1024;
...@@ -2353,6 +2356,7 @@ int open_ctree(struct super_block *sb, ...@@ -2353,6 +2356,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->qgroup_lock); spin_lock_init(&fs_info->qgroup_lock);
mutex_init(&fs_info->qgroup_ioctl_lock); mutex_init(&fs_info->qgroup_ioctl_lock);
fs_info->qgroup_tree = RB_ROOT; fs_info->qgroup_tree = RB_ROOT;
fs_info->qgroup_op_tree = RB_ROOT;
INIT_LIST_HEAD(&fs_info->dirty_qgroups); INIT_LIST_HEAD(&fs_info->dirty_qgroups);
fs_info->qgroup_seq = 1; fs_info->qgroup_seq = 1;
fs_info->quota_enabled = 0; fs_info->quota_enabled = 0;
......
This diff is collapsed.
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include "tree-log.h" #include "tree-log.h"
#include "locking.h" #include "locking.h"
#include "volumes.h" #include "volumes.h"
#include "qgroup.h"
static struct kmem_cache *btrfs_inode_defrag_cachep; static struct kmem_cache *btrfs_inode_defrag_cachep;
/* /*
...@@ -849,7 +850,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, ...@@ -849,7 +850,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
disk_bytenr, num_bytes, 0, disk_bytenr, num_bytes, 0,
root->root_key.objectid, root->root_key.objectid,
new_key.objectid, new_key.objectid,
start - extent_offset, 0); start - extent_offset, 1);
BUG_ON(ret); /* -ENOMEM */ BUG_ON(ret); /* -ENOMEM */
} }
key.offset = start; key.offset = start;
...@@ -1206,7 +1207,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, ...@@ -1206,7 +1207,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
root->root_key.objectid, root->root_key.objectid,
ino, orig_offset, 0); ino, orig_offset, 1);
BUG_ON(ret); /* -ENOMEM */ BUG_ON(ret); /* -ENOMEM */
if (split == start) { if (split == start) {
......
...@@ -58,6 +58,7 @@ ...@@ -58,6 +58,7 @@
#include "dev-replace.h" #include "dev-replace.h"
#include "props.h" #include "props.h"
#include "sysfs.h" #include "sysfs.h"
#include "qgroup.h"
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
...@@ -2941,6 +2942,41 @@ static long btrfs_ioctl_file_extent_same(struct file *file, ...@@ -2941,6 +2942,41 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
return ret; return ret;
} }
/* Helper to check and see if this root currently has a ref on the given disk
* bytenr. If it does then we need to update the quota for this root. This
* doesn't do anything if quotas aren't enabled.
*/
static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 disko)
{
struct seq_list tree_mod_seq_elem = {};
struct ulist *roots;
struct ulist_iterator uiter;
struct ulist_node *root_node = NULL;
int ret;
if (!root->fs_info->quota_enabled)
return 1;
btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
ret = btrfs_find_all_roots(trans, root->fs_info, disko,
tree_mod_seq_elem.seq, &roots);
if (ret < 0)
goto out;
ret = 0;
ULIST_ITER_INIT(&uiter);
while ((root_node = ulist_next(roots, &uiter))) {
if (root_node->val == root->objectid) {
ret = 1;
break;
}
}
ulist_free(roots);
out:
btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
return ret;
}
/** /**
* btrfs_clone() - clone a range from inode file to another * btrfs_clone() - clone a range from inode file to another
* *
...@@ -2964,7 +3000,9 @@ static int btrfs_clone(struct inode *src, struct inode *inode, ...@@ -2964,7 +3000,9 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
u32 nritems; u32 nritems;
int slot; int slot;
int ret; int ret;
int no_quota;
u64 len = olen_aligned; u64 len = olen_aligned;
u64 last_disko = 0;
ret = -ENOMEM; ret = -ENOMEM;
buf = vmalloc(btrfs_level_size(root, 0)); buf = vmalloc(btrfs_level_size(root, 0));
...@@ -2996,6 +3034,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, ...@@ -2996,6 +3034,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
nritems = btrfs_header_nritems(path->nodes[0]); nritems = btrfs_header_nritems(path->nodes[0]);
process_slot: process_slot:
no_quota = 1;
if (path->slots[0] >= nritems) { if (path->slots[0] >= nritems) {
ret = btrfs_next_leaf(BTRFS_I(src)->root, path); ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
if (ret < 0) if (ret < 0)
...@@ -3128,6 +3167,28 @@ static int btrfs_clone(struct inode *src, struct inode *inode, ...@@ -3128,6 +3167,28 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
datao); datao);
btrfs_set_file_extent_num_bytes(leaf, extent, btrfs_set_file_extent_num_bytes(leaf, extent,
datal); datal);
/*
* We need to look up the roots that point at
* this bytenr and see if the new root does. If
* it does not we need to make sure we update
* quotas appropriately.
*/
if (disko && root != BTRFS_I(src)->root &&
disko != last_disko) {
no_quota = check_ref(trans, root,
disko);
if (no_quota < 0) {
btrfs_abort_transaction(trans,
root,
ret);
btrfs_end_transaction(trans,
root);
ret = no_quota;
goto out;
}
}
if (disko) { if (disko) {
inode_add_bytes(inode, datal); inode_add_bytes(inode, datal);
ret = btrfs_inc_extent_ref(trans, root, ret = btrfs_inc_extent_ref(trans, root,
...@@ -3135,7 +3196,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, ...@@ -3135,7 +3196,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
root->root_key.objectid, root->root_key.objectid,
btrfs_ino(inode), btrfs_ino(inode),
new_key.offset - datao, new_key.offset - datao,
0); no_quota);
if (ret) { if (ret) {
btrfs_abort_transaction(trans, btrfs_abort_transaction(trans,
root, root,
......
This diff is collapsed.
/*
* Copyright (C) 2014 Facebook. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_QGROUP__
#define __BTRFS_QGROUP__
/*
* A description of the operations, all of these operations only happen when we
* are adding the 1st reference for that subvolume in the case of adding space
* or on the last reference delete in the case of subtraction. The only
* exception is the last one, which is added for confusion.
*
* BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only
* one pointing at the bytes we are adding. This is called on the first
* allocation.
*
* BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be
* shared between subvols. This is called on the creation of a ref that already
* has refs from a different subvolume, so basically reflink.
*
* BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only
* one referencing the range.
*
* BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with
* refs with other subvolumes.
*/
enum btrfs_qgroup_operation_type {
BTRFS_QGROUP_OPER_ADD_EXCL,
BTRFS_QGROUP_OPER_ADD_SHARED,
BTRFS_QGROUP_OPER_SUB_EXCL,
BTRFS_QGROUP_OPER_SUB_SHARED,
};
struct btrfs_qgroup_operation {
u64 ref_root;
u64 bytenr;
u64 num_bytes;
u64 seq;
enum btrfs_qgroup_operation_type type;
struct seq_list elem;
struct rb_node n;
struct list_head list;
};
int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
char *name);
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid);
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
struct btrfs_qgroup_limit *limit);
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
struct btrfs_delayed_extent_op;
int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 ref_root,
u64 bytenr, u64 num_bytes,
enum btrfs_qgroup_operation_type type,
int mod_seq);
int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_operation *oper);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
struct btrfs_qgroup_inherit *inherit);
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
u64 rfer, u64 excl);
#endif
#endif /* __BTRFS_QGROUP__ */
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "inode-map.h" #include "inode-map.h"
#include "volumes.h" #include "volumes.h"
#include "dev-replace.h" #include "dev-replace.h"
#include "qgroup.h"
#define BTRFS_ROOT_TRANS_TAG 0 #define BTRFS_ROOT_TRANS_TAG 0
...@@ -703,23 +704,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, ...@@ -703,23 +704,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
return 0; return 0;
} }
/*
* do the qgroup accounting as early as possible
*/
err = btrfs_delayed_refs_qgroup_accounting(trans, info);
btrfs_trans_release_metadata(trans, root); btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL; trans->block_rsv = NULL;
if (trans->qgroup_reserved) {
/*
* the same root has to be passed here between start_transaction
* and end_transaction. Subvolume quota depends on this.
*/
btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
trans->qgroup_reserved = 0;
}
if (!list_empty(&trans->new_bgs)) if (!list_empty(&trans->new_bgs))
btrfs_create_pending_block_groups(trans, root); btrfs_create_pending_block_groups(trans, root);
...@@ -730,6 +717,15 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, ...@@ -730,6 +717,15 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_run_delayed_refs(trans, root, cur); btrfs_run_delayed_refs(trans, root, cur);
} }
if (trans->qgroup_reserved) {
/*
* the same root has to be passed here between start_transaction
* and end_transaction. Subvolume quota depends on this.
*/
btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
trans->qgroup_reserved = 0;
}
btrfs_trans_release_metadata(trans, root); btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL; trans->block_rsv = NULL;
...@@ -1169,12 +1165,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ...@@ -1169,12 +1165,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
goto no_free_objectid; goto no_free_objectid;
} }
pending->error = btrfs_qgroup_inherit(trans, fs_info,
root->root_key.objectid,
objectid, pending->inherit);
if (pending->error)
goto no_free_objectid;
key.objectid = objectid; key.objectid = objectid;
key.offset = (u64)-1; key.offset = (u64)-1;
key.type = BTRFS_ROOT_ITEM_KEY; key.type = BTRFS_ROOT_ITEM_KEY;
...@@ -1271,6 +1261,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ...@@ -1271,6 +1261,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
goto fail; goto fail;
} }
/*
* We need to flush delayed refs in order to make sure all of our quota
* operations have been done before we call btrfs_qgroup_inherit.
*/
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto fail;
}
pending->error = btrfs_qgroup_inherit(trans, fs_info,
root->root_key.objectid,
objectid, pending->inherit);
if (pending->error)
goto no_free_objectid;
/* see comments in should_cow_block() */ /* see comments in should_cow_block() */
set_bit(BTRFS_ROOT_FORCE_COW, &root->state); set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
smp_wmb(); smp_wmb();
...@@ -1599,12 +1605,6 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, ...@@ -1599,12 +1605,6 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
* them now so that they hinder processing of more delayed refs * them now so that they hinder processing of more delayed refs
* as little as possible. * as little as possible.
*/ */
if (ret) {
btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
return ret;
}
ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
if (ret) if (ret)
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment