Commit adff377b authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (24 commits)
  Btrfs: fix free space cache leak
  Btrfs: avoid taking the chunk_mutex in do_chunk_alloc
  Btrfs end_bio_extent_readpage should look for locked bits
  Btrfs: don't force chunk allocation in find_free_extent
  Btrfs: Check validity before setting an acl
  Btrfs: Fix incorrect inode nlink in btrfs_link()
  Btrfs: Check if btrfs_next_leaf() returns error in btrfs_real_readdir()
  Btrfs: Check if btrfs_next_leaf() returns error in btrfs_listxattr()
  Btrfs: make uncache_state unconditional
  btrfs: using cached extent_state in set/unlock combinations
  Btrfs: avoid taking the trans_mutex in btrfs_end_transaction
  Btrfs: fix subvolume mount by name problem when default mount subvolume is set
  fix user annotation in ioctl.c
  Btrfs: check for duplicate iov_base's when doing dio reads
  btrfs: properly handle overlapping areas in memmove_extent_buffer
  Btrfs: fix memory leaks in btrfs_new_inode()
  Btrfs: check for duplicate iov_base's when doing dio reads
  Btrfs: reuse the extent_map we found when calling btrfs_get_extent
  Btrfs: do not use async submit for small DIO io's
  Btrfs: don't split dio bios if we don't have to
  ...
parents d8bdc59f f65647c2
...@@ -178,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, ...@@ -178,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
if (value) { if (value) {
acl = posix_acl_from_xattr(value, size); acl = posix_acl_from_xattr(value, size);
if (acl == NULL) { if (acl) {
value = NULL; ret = posix_acl_valid(acl);
size = 0; if (ret)
goto out;
} else if (IS_ERR(acl)) { } else if (IS_ERR(acl)) {
return PTR_ERR(acl); return PTR_ERR(acl);
} }
} }
ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
out:
posix_acl_release(acl); posix_acl_release(acl);
return ret; return ret;
......
...@@ -740,8 +740,10 @@ struct btrfs_space_info { ...@@ -740,8 +740,10 @@ struct btrfs_space_info {
*/ */
unsigned long reservation_progress; unsigned long reservation_progress;
int full; /* indicates that we cannot allocate any more int full:1; /* indicates that we cannot allocate any more
chunks for this space */ chunks for this space */
int chunk_alloc:1; /* set if we are allocating a chunk */
int force_alloc; /* set if we need to force a chunk alloc for int force_alloc; /* set if we need to force a chunk alloc for
this space */ this space */
...@@ -2576,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, ...@@ -2576,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct inode *inode, u64 start, u64 end); struct inode *inode, u64 start, u64 end);
int btrfs_release_file(struct inode *inode, struct file *file); int btrfs_release_file(struct inode *inode, struct file *file);
void btrfs_drop_pages(struct page **pages, size_t num_pages);
int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
struct page **pages, size_t num_pages,
loff_t pos, size_t write_bytes,
struct extent_state **cached);
/* tree-defrag.c */ /* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
......
...@@ -3057,7 +3057,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) ...@@ -3057,7 +3057,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
btrfs_destroy_pinned_extent(root, btrfs_destroy_pinned_extent(root,
root->fs_info->pinned_extents); root->fs_info->pinned_extents);
t->use_count = 0; atomic_set(&t->use_count, 0);
list_del_init(&t->list); list_del_init(&t->list);
memset(t, 0, sizeof(*t)); memset(t, 0, sizeof(*t));
kmem_cache_free(btrfs_transaction_cachep, t); kmem_cache_free(btrfs_transaction_cachep, t);
......
...@@ -33,6 +33,25 @@ ...@@ -33,6 +33,25 @@
#include "locking.h" #include "locking.h"
#include "free-space-cache.h" #include "free-space-cache.h"
/* control flags for do_chunk_alloc's force field
* CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
* if we really need one.
*
* CHUNK_ALLOC_FORCE means it must try to allocate one
*
* CHUNK_ALLOC_LIMITED means to only try and allocate one
* if we have very few chunks already allocated. This is
* used as part of the clustering code to help make sure
* we have a good pool of storage to cluster in, without
* filling the FS with empty chunks
*
*/
enum {
CHUNK_ALLOC_NO_FORCE = 0,
CHUNK_ALLOC_FORCE = 1,
CHUNK_ALLOC_LIMITED = 2,
};
static int update_block_group(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc); u64 bytenr, u64 num_bytes, int alloc);
...@@ -3019,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, ...@@ -3019,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->bytes_readonly = 0; found->bytes_readonly = 0;
found->bytes_may_use = 0; found->bytes_may_use = 0;
found->full = 0; found->full = 0;
found->force_alloc = 0; found->force_alloc = CHUNK_ALLOC_NO_FORCE;
found->chunk_alloc = 0;
*space_info = found; *space_info = found;
list_add_rcu(&found->list, &info->space_info); list_add_rcu(&found->list, &info->space_info);
atomic_set(&found->caching_threads, 0); atomic_set(&found->caching_threads, 0);
...@@ -3150,7 +3170,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) ...@@ -3150,7 +3170,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
if (!data_sinfo->full && alloc_chunk) { if (!data_sinfo->full && alloc_chunk) {
u64 alloc_target; u64 alloc_target;
data_sinfo->force_alloc = 1; data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
spin_unlock(&data_sinfo->lock); spin_unlock(&data_sinfo->lock);
alloc: alloc:
alloc_target = btrfs_get_alloc_profile(root, 1); alloc_target = btrfs_get_alloc_profile(root, 1);
...@@ -3160,7 +3180,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) ...@@ -3160,7 +3180,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
ret = do_chunk_alloc(trans, root->fs_info->extent_root, ret = do_chunk_alloc(trans, root->fs_info->extent_root,
bytes + 2 * 1024 * 1024, bytes + 2 * 1024 * 1024,
alloc_target, 0); alloc_target,
CHUNK_ALLOC_NO_FORCE);
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
if (ret < 0) { if (ret < 0) {
if (ret != -ENOSPC) if (ret != -ENOSPC)
...@@ -3239,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) ...@@ -3239,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(found, head, list) { list_for_each_entry_rcu(found, head, list) {
if (found->flags & BTRFS_BLOCK_GROUP_METADATA) if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
found->force_alloc = 1; found->force_alloc = CHUNK_ALLOC_FORCE;
} }
rcu_read_unlock(); rcu_read_unlock();
} }
static int should_alloc_chunk(struct btrfs_root *root, static int should_alloc_chunk(struct btrfs_root *root,
struct btrfs_space_info *sinfo, u64 alloc_bytes) struct btrfs_space_info *sinfo, u64 alloc_bytes,
int force)
{ {
u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
u64 thresh; u64 thresh;
if (sinfo->bytes_used + sinfo->bytes_reserved + if (force == CHUNK_ALLOC_FORCE)
alloc_bytes + 256 * 1024 * 1024 < num_bytes) return 1;
/*
* in limited mode, we want to have some free space up to
* about 1% of the FS size.
*/
if (force == CHUNK_ALLOC_LIMITED) {
thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
thresh = max_t(u64, 64 * 1024 * 1024,
div_factor_fine(thresh, 1));
if (num_bytes - num_allocated < thresh)
return 1;
}
/*
* we have two similar checks here, one based on percentage
* and once based on a hard number of 256MB. The idea
* is that if we have a good amount of free
* room, don't allocate a chunk. A good mount is
* less than 80% utilized of the chunks we have allocated,
* or more than 256MB free
*/
if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
return 0; return 0;
if (sinfo->bytes_used + sinfo->bytes_reserved + if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
alloc_bytes < div_factor(num_bytes, 8))
return 0; return 0;
thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
/* 256MB or 5% of the FS */
thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
return 0; return 0;
return 1; return 1;
} }
...@@ -3273,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, ...@@ -3273,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
{ {
struct btrfs_space_info *space_info; struct btrfs_space_info *space_info;
struct btrfs_fs_info *fs_info = extent_root->fs_info; struct btrfs_fs_info *fs_info = extent_root->fs_info;
int wait_for_alloc = 0;
int ret = 0; int ret = 0;
mutex_lock(&fs_info->chunk_mutex);
flags = btrfs_reduce_alloc_profile(extent_root, flags); flags = btrfs_reduce_alloc_profile(extent_root, flags);
space_info = __find_space_info(extent_root->fs_info, flags); space_info = __find_space_info(extent_root->fs_info, flags);
...@@ -3287,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, ...@@ -3287,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
} }
BUG_ON(!space_info); BUG_ON(!space_info);
again:
spin_lock(&space_info->lock); spin_lock(&space_info->lock);
if (space_info->force_alloc) if (space_info->force_alloc)
force = 1; force = space_info->force_alloc;
if (space_info->full) { if (space_info->full) {
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
goto out; return 0;
} }
if (!force && !should_alloc_chunk(extent_root, space_info, if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
alloc_bytes)) {
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
goto out; return 0;
} else if (space_info->chunk_alloc) {
wait_for_alloc = 1;
} else {
space_info->chunk_alloc = 1;
} }
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
mutex_lock(&fs_info->chunk_mutex);
/*
* The chunk_mutex is held throughout the entirety of a chunk
* allocation, so once we've acquired the chunk_mutex we know that the
* other guy is done and we need to recheck and see if we should
* allocate.
*/
if (wait_for_alloc) {
mutex_unlock(&fs_info->chunk_mutex);
wait_for_alloc = 0;
goto again;
}
/* /*
* If we have mixed data/metadata chunks we want to make sure we keep * If we have mixed data/metadata chunks we want to make sure we keep
* allocating mixed chunks instead of individual chunks. * allocating mixed chunks instead of individual chunks.
...@@ -3327,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, ...@@ -3327,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
space_info->full = 1; space_info->full = 1;
else else
ret = 1; ret = 1;
space_info->force_alloc = 0;
space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
space_info->chunk_alloc = 0;
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
out:
mutex_unlock(&extent_root->fs_info->chunk_mutex); mutex_unlock(&extent_root->fs_info->chunk_mutex);
return ret; return ret;
} }
...@@ -5303,11 +5368,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -5303,11 +5368,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
if (allowed_chunk_alloc) { if (allowed_chunk_alloc) {
ret = do_chunk_alloc(trans, root, num_bytes + ret = do_chunk_alloc(trans, root, num_bytes +
2 * 1024 * 1024, data, 1); 2 * 1024 * 1024, data,
CHUNK_ALLOC_LIMITED);
allowed_chunk_alloc = 0; allowed_chunk_alloc = 0;
done_chunk_alloc = 1; done_chunk_alloc = 1;
} else if (!done_chunk_alloc) { } else if (!done_chunk_alloc &&
space_info->force_alloc = 1; space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
space_info->force_alloc = CHUNK_ALLOC_LIMITED;
} }
if (loop < LOOP_NO_EMPTY_SIZE) { if (loop < LOOP_NO_EMPTY_SIZE) {
...@@ -5393,7 +5460,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, ...@@ -5393,7 +5460,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
*/ */
if (empty_size || root->ref_cows) if (empty_size || root->ref_cows)
ret = do_chunk_alloc(trans, root->fs_info->extent_root, ret = do_chunk_alloc(trans, root->fs_info->extent_root,
num_bytes + 2 * 1024 * 1024, data, 0); num_bytes + 2 * 1024 * 1024, data,
CHUNK_ALLOC_NO_FORCE);
WARN_ON(num_bytes < root->sectorsize); WARN_ON(num_bytes < root->sectorsize);
ret = find_free_extent(trans, root, num_bytes, empty_size, ret = find_free_extent(trans, root, num_bytes, empty_size,
...@@ -5405,7 +5473,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, ...@@ -5405,7 +5473,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
num_bytes = num_bytes & ~(root->sectorsize - 1); num_bytes = num_bytes & ~(root->sectorsize - 1);
num_bytes = max(num_bytes, min_alloc_size); num_bytes = max(num_bytes, min_alloc_size);
do_chunk_alloc(trans, root->fs_info->extent_root, do_chunk_alloc(trans, root->fs_info->extent_root,
num_bytes, data, 1); num_bytes, data, CHUNK_ALLOC_FORCE);
goto again; goto again;
} }
if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
...@@ -8109,13 +8177,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, ...@@ -8109,13 +8177,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
alloc_flags = update_block_group_flags(root, cache->flags); alloc_flags = update_block_group_flags(root, cache->flags);
if (alloc_flags != cache->flags) if (alloc_flags != cache->flags)
do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
CHUNK_ALLOC_FORCE);
ret = set_block_group_ro(cache); ret = set_block_group_ro(cache);
if (!ret) if (!ret)
goto out; goto out;
alloc_flags = get_alloc_profile(root, cache->space_info->flags); alloc_flags = get_alloc_profile(root, cache->space_info->flags);
ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
CHUNK_ALLOC_FORCE);
if (ret < 0) if (ret < 0)
goto out; goto out;
ret = set_block_group_ro(cache); ret = set_block_group_ro(cache);
...@@ -8128,7 +8198,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, ...@@ -8128,7 +8198,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 type) struct btrfs_root *root, u64 type)
{ {
u64 alloc_flags = get_alloc_profile(root, type); u64 alloc_flags = get_alloc_profile(root, type);
return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
CHUNK_ALLOC_FORCE);
} }
/* /*
......
...@@ -690,6 +690,15 @@ static void cache_state(struct extent_state *state, ...@@ -690,6 +690,15 @@ static void cache_state(struct extent_state *state,
} }
} }
static void uncache_state(struct extent_state **cached_ptr)
{
if (cached_ptr && (*cached_ptr)) {
struct extent_state *state = *cached_ptr;
*cached_ptr = NULL;
free_extent_state(state);
}
}
/* /*
* set some bits on a range in the tree. This may require allocations or * set some bits on a range in the tree. This may require allocations or
* sleeping, so the gfp mask is used to indicate what is allowed. * sleeping, so the gfp mask is used to indicate what is allowed.
...@@ -940,10 +949,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -940,10 +949,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
} }
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask) struct extent_state **cached_state, gfp_t mask)
{ {
return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
NULL, mask); NULL, cached_state, mask);
} }
static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
...@@ -1012,8 +1021,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -1012,8 +1021,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
mask); mask);
} }
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
gfp_t mask)
{ {
return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
mask); mask);
...@@ -1735,6 +1743,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) ...@@ -1735,6 +1743,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
do { do {
struct page *page = bvec->bv_page; struct page *page = bvec->bv_page;
struct extent_state *cached = NULL;
struct extent_state *state;
tree = &BTRFS_I(page->mapping->host)->io_tree; tree = &BTRFS_I(page->mapping->host)->io_tree;
start = ((u64)page->index << PAGE_CACHE_SHIFT) + start = ((u64)page->index << PAGE_CACHE_SHIFT) +
...@@ -1749,9 +1760,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err) ...@@ -1749,9 +1760,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
if (++bvec <= bvec_end) if (++bvec <= bvec_end)
prefetchw(&bvec->bv_page->flags); prefetchw(&bvec->bv_page->flags);
spin_lock(&tree->lock);
state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
if (state && state->start == start) {
/*
* take a reference on the state, unlock will drop
* the ref
*/
cache_state(state, &cached);
}
spin_unlock(&tree->lock);
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end, ret = tree->ops->readpage_end_io_hook(page, start, end,
NULL); state);
if (ret) if (ret)
uptodate = 0; uptodate = 0;
} }
...@@ -1764,15 +1786,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err) ...@@ -1764,15 +1786,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
test_bit(BIO_UPTODATE, &bio->bi_flags); test_bit(BIO_UPTODATE, &bio->bi_flags);
if (err) if (err)
uptodate = 0; uptodate = 0;
uncache_state(&cached);
continue; continue;
} }
} }
if (uptodate) { if (uptodate) {
set_extent_uptodate(tree, start, end, set_extent_uptodate(tree, start, end, &cached,
GFP_ATOMIC); GFP_ATOMIC);
} }
unlock_extent(tree, start, end, GFP_ATOMIC); unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
if (whole_page) { if (whole_page) {
if (uptodate) { if (uptodate) {
...@@ -1811,6 +1834,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) ...@@ -1811,6 +1834,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
do { do {
struct page *page = bvec->bv_page; struct page *page = bvec->bv_page;
struct extent_state *cached = NULL;
tree = &BTRFS_I(page->mapping->host)->io_tree; tree = &BTRFS_I(page->mapping->host)->io_tree;
start = ((u64)page->index << PAGE_CACHE_SHIFT) + start = ((u64)page->index << PAGE_CACHE_SHIFT) +
...@@ -1821,13 +1845,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) ...@@ -1821,13 +1845,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
prefetchw(&bvec->bv_page->flags); prefetchw(&bvec->bv_page->flags);
if (uptodate) { if (uptodate) {
set_extent_uptodate(tree, start, end, GFP_ATOMIC); set_extent_uptodate(tree, start, end, &cached,
GFP_ATOMIC);
} else { } else {
ClearPageUptodate(page); ClearPageUptodate(page);
SetPageError(page); SetPageError(page);
} }
unlock_extent(tree, start, end, GFP_ATOMIC); unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
} while (bvec >= bio->bi_io_vec); } while (bvec >= bio->bi_io_vec);
...@@ -2016,14 +2041,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree, ...@@ -2016,14 +2041,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
while (cur <= end) { while (cur <= end) {
if (cur >= last_byte) { if (cur >= last_byte) {
char *userpage; char *userpage;
struct extent_state *cached = NULL;
iosize = PAGE_CACHE_SIZE - page_offset; iosize = PAGE_CACHE_SIZE - page_offset;
userpage = kmap_atomic(page, KM_USER0); userpage = kmap_atomic(page, KM_USER0);
memset(userpage + page_offset, 0, iosize); memset(userpage + page_offset, 0, iosize);
flush_dcache_page(page); flush_dcache_page(page);
kunmap_atomic(userpage, KM_USER0); kunmap_atomic(userpage, KM_USER0);
set_extent_uptodate(tree, cur, cur + iosize - 1, set_extent_uptodate(tree, cur, cur + iosize - 1,
GFP_NOFS); &cached, GFP_NOFS);
unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); unlock_extent_cached(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
break; break;
} }
em = get_extent(inode, page, page_offset, cur, em = get_extent(inode, page, page_offset, cur,
...@@ -2063,14 +2091,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree, ...@@ -2063,14 +2091,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
/* we've found a hole, just zero and go on */ /* we've found a hole, just zero and go on */
if (block_start == EXTENT_MAP_HOLE) { if (block_start == EXTENT_MAP_HOLE) {
char *userpage; char *userpage;
struct extent_state *cached = NULL;
userpage = kmap_atomic(page, KM_USER0); userpage = kmap_atomic(page, KM_USER0);
memset(userpage + page_offset, 0, iosize); memset(userpage + page_offset, 0, iosize);
flush_dcache_page(page); flush_dcache_page(page);
kunmap_atomic(userpage, KM_USER0); kunmap_atomic(userpage, KM_USER0);
set_extent_uptodate(tree, cur, cur + iosize - 1, set_extent_uptodate(tree, cur, cur + iosize - 1,
GFP_NOFS); &cached, GFP_NOFS);
unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); unlock_extent_cached(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
cur = cur + iosize; cur = cur + iosize;
page_offset += iosize; page_offset += iosize;
continue; continue;
...@@ -2789,9 +2820,12 @@ int extent_prepare_write(struct extent_io_tree *tree, ...@@ -2789,9 +2820,12 @@ int extent_prepare_write(struct extent_io_tree *tree,
iocount++; iocount++;
block_start = block_start + iosize; block_start = block_start + iosize;
} else { } else {
set_extent_uptodate(tree, block_start, cur_end, struct extent_state *cached = NULL;
set_extent_uptodate(tree, block_start, cur_end, &cached,
GFP_NOFS); GFP_NOFS);
unlock_extent(tree, block_start, cur_end, GFP_NOFS); unlock_extent_cached(tree, block_start, cur_end,
&cached, GFP_NOFS);
block_start = cur_end + 1; block_start = cur_end + 1;
} }
page_offset = block_start & (PAGE_CACHE_SIZE - 1); page_offset = block_start & (PAGE_CACHE_SIZE - 1);
...@@ -3457,7 +3491,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, ...@@ -3457,7 +3491,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
num_pages = num_extent_pages(eb->start, eb->len); num_pages = num_extent_pages(eb->start, eb->len);
set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
GFP_NOFS); NULL, GFP_NOFS);
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i); page = extent_buffer_page(eb, i);
if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
...@@ -3885,6 +3919,12 @@ static void move_pages(struct page *dst_page, struct page *src_page, ...@@ -3885,6 +3919,12 @@ static void move_pages(struct page *dst_page, struct page *src_page,
kunmap_atomic(dst_kaddr, KM_USER0); kunmap_atomic(dst_kaddr, KM_USER0);
} }
static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
{
unsigned long distance = (src > dst) ? src - dst : dst - src;
return distance < len;
}
static void copy_pages(struct page *dst_page, struct page *src_page, static void copy_pages(struct page *dst_page, struct page *src_page,
unsigned long dst_off, unsigned long src_off, unsigned long dst_off, unsigned long src_off,
unsigned long len) unsigned long len)
...@@ -3892,10 +3932,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page, ...@@ -3892,10 +3932,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
char *src_kaddr; char *src_kaddr;
if (dst_page != src_page) if (dst_page != src_page) {
src_kaddr = kmap_atomic(src_page, KM_USER1); src_kaddr = kmap_atomic(src_page, KM_USER1);
else } else {
src_kaddr = dst_kaddr; src_kaddr = dst_kaddr;
BUG_ON(areas_overlap(src_off, dst_off, len));
}
memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
kunmap_atomic(dst_kaddr, KM_USER0); kunmap_atomic(dst_kaddr, KM_USER0);
...@@ -3970,7 +4012,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, ...@@ -3970,7 +4012,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
"len %lu len %lu\n", dst_offset, len, dst->len); "len %lu len %lu\n", dst_offset, len, dst->len);
BUG_ON(1); BUG_ON(1);
} }
if (dst_offset < src_offset) { if (!areas_overlap(src_offset, dst_offset, len)) {
memcpy_extent_buffer(dst, dst_offset, src_offset, len); memcpy_extent_buffer(dst, dst_offset, src_offset, len);
return; return;
} }
......
...@@ -208,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -208,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int exclusive_bits, u64 *failed_start, int bits, int exclusive_bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask); struct extent_state **cached_state, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask); struct extent_state **cached_state, gfp_t mask);
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask); gfp_t mask);
int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
......
...@@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, ...@@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
/* /*
* unlocks pages after btrfs_file_write is done with them * unlocks pages after btrfs_file_write is done with them
*/ */
static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) void btrfs_drop_pages(struct page **pages, size_t num_pages)
{ {
size_t i; size_t i;
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
...@@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) ...@@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
* this also makes the decision about creating an inline extent vs * this also makes the decision about creating an inline extent vs
* doing real data extents, marking pages dirty and delalloc as required. * doing real data extents, marking pages dirty and delalloc as required.
*/ */
static noinline int dirty_and_release_pages(struct btrfs_root *root, int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
struct file *file, struct page **pages, size_t num_pages,
struct page **pages, loff_t pos, size_t write_bytes,
size_t num_pages, struct extent_state **cached)
loff_t pos,
size_t write_bytes)
{ {
int err = 0; int err = 0;
int i; int i;
struct inode *inode = fdentry(file)->d_inode;
u64 num_bytes; u64 num_bytes;
u64 start_pos; u64 start_pos;
u64 end_of_last_block; u64 end_of_last_block;
...@@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root, ...@@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root,
end_of_last_block = start_pos + num_bytes - 1; end_of_last_block = start_pos + num_bytes - 1;
err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
NULL); cached);
if (err) if (err)
return err; return err;
...@@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, ...@@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
} }
if (copied > 0) { if (copied > 0) {
ret = dirty_and_release_pages(root, file, pages, ret = btrfs_dirty_pages(root, inode, pages,
dirty_pages, pos, dirty_pages, pos, copied,
copied); NULL);
if (ret) { if (ret) {
btrfs_delalloc_release_space(inode, btrfs_delalloc_release_space(inode,
dirty_pages << PAGE_CACHE_SHIFT); dirty_pages << PAGE_CACHE_SHIFT);
......
...@@ -508,6 +508,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, ...@@ -508,6 +508,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
struct inode *inode; struct inode *inode;
struct rb_node *node; struct rb_node *node;
struct list_head *pos, *n; struct list_head *pos, *n;
struct page **pages;
struct page *page; struct page *page;
struct extent_state *cached_state = NULL; struct extent_state *cached_state = NULL;
struct btrfs_free_cluster *cluster = NULL; struct btrfs_free_cluster *cluster = NULL;
...@@ -517,13 +518,13 @@ int btrfs_write_out_cache(struct btrfs_root *root, ...@@ -517,13 +518,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
u64 start, end, len; u64 start, end, len;
u64 bytes = 0; u64 bytes = 0;
u32 *crc, *checksums; u32 *crc, *checksums;
pgoff_t index = 0, last_index = 0;
unsigned long first_page_offset; unsigned long first_page_offset;
int num_checksums; int index = 0, num_pages = 0;
int entries = 0; int entries = 0;
int bitmaps = 0; int bitmaps = 0;
int ret = 0; int ret = 0;
bool next_page = false; bool next_page = false;
bool out_of_space = false;
root = root->fs_info->tree_root; root = root->fs_info->tree_root;
...@@ -551,24 +552,31 @@ int btrfs_write_out_cache(struct btrfs_root *root, ...@@ -551,24 +552,31 @@ int btrfs_write_out_cache(struct btrfs_root *root,
return 0; return 0;
} }
last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
filemap_write_and_wait(inode->i_mapping); filemap_write_and_wait(inode->i_mapping);
btrfs_wait_ordered_range(inode, inode->i_size & btrfs_wait_ordered_range(inode, inode->i_size &
~(root->sectorsize - 1), (u64)-1); ~(root->sectorsize - 1), (u64)-1);
/* We need a checksum per page. */ /* We need a checksum per page. */
num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
if (!crc) { if (!crc) {
iput(inode); iput(inode);
return 0; return 0;
} }
pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
if (!pages) {
kfree(crc);
iput(inode);
return 0;
}
/* Since the first page has all of our checksums and our generation we /* Since the first page has all of our checksums and our generation we
* need to calculate the offset into the page that we can start writing * need to calculate the offset into the page that we can start writing
* our entries. * our entries.
*/ */
first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
/* Get the cluster for this block_group if it exists */ /* Get the cluster for this block_group if it exists */
if (!list_empty(&block_group->cluster_list)) if (!list_empty(&block_group->cluster_list))
...@@ -590,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root, ...@@ -590,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
* after find_get_page at this point. Just putting this here so people * after find_get_page at this point. Just putting this here so people
* know and don't freak out. * know and don't freak out.
*/ */
while (index <= last_index) { while (index < num_pages) {
page = grab_cache_page(inode->i_mapping, index); page = grab_cache_page(inode->i_mapping, index);
if (!page) { if (!page) {
pgoff_t i = 0; int i;
while (i < index) { for (i = 0; i < num_pages; i++) {
page = find_get_page(inode->i_mapping, i); unlock_page(pages[i]);
unlock_page(page); page_cache_release(pages[i]);
page_cache_release(page);
page_cache_release(page);
i++;
} }
goto out_free; goto out_free;
} }
pages[index] = page;
index++; index++;
} }
...@@ -631,7 +637,12 @@ int btrfs_write_out_cache(struct btrfs_root *root, ...@@ -631,7 +637,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
offset = start_offset; offset = start_offset;
} }
page = find_get_page(inode->i_mapping, index); if (index >= num_pages) {
out_of_space = true;
break;
}
page = pages[index];
addr = kmap(page); addr = kmap(page);
entry = addr + start_offset; entry = addr + start_offset;
...@@ -708,23 +719,6 @@ int btrfs_write_out_cache(struct btrfs_root *root, ...@@ -708,23 +719,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
bytes += PAGE_CACHE_SIZE; bytes += PAGE_CACHE_SIZE;
ClearPageChecked(page);
set_page_extent_mapped(page);
SetPageUptodate(page);
set_page_dirty(page);
/*
* We need to release our reference we got for grab_cache_page,
* except for the first page which will hold our checksums, we
* do that below.
*/
if (index != 0) {
unlock_page(page);
page_cache_release(page);
}
page_cache_release(page);
index++; index++;
} while (node || next_page); } while (node || next_page);
...@@ -734,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root, ...@@ -734,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root,
struct btrfs_free_space *entry = struct btrfs_free_space *entry =
list_entry(pos, struct btrfs_free_space, list); list_entry(pos, struct btrfs_free_space, list);
page = find_get_page(inode->i_mapping, index); if (index >= num_pages) {
out_of_space = true;
break;
}
page = pages[index];
addr = kmap(page); addr = kmap(page);
memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
...@@ -745,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root, ...@@ -745,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root,
crc++; crc++;
bytes += PAGE_CACHE_SIZE; bytes += PAGE_CACHE_SIZE;
ClearPageChecked(page);
set_page_extent_mapped(page);
SetPageUptodate(page);
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
page_cache_release(page);
list_del_init(&entry->list); list_del_init(&entry->list);
index++; index++;
} }
if (out_of_space) {
btrfs_drop_pages(pages, num_pages);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, &cached_state,
GFP_NOFS);
ret = 0;
goto out_free;
}
/* Zero out the rest of the pages just to make sure */ /* Zero out the rest of the pages just to make sure */
while (index <= last_index) { while (index < num_pages) {
void *addr; void *addr;
page = find_get_page(inode->i_mapping, index); page = pages[index];
addr = kmap(page); addr = kmap(page);
memset(addr, 0, PAGE_CACHE_SIZE); memset(addr, 0, PAGE_CACHE_SIZE);
kunmap(page); kunmap(page);
ClearPageChecked(page);
set_page_extent_mapped(page);
SetPageUptodate(page);
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
page_cache_release(page);
bytes += PAGE_CACHE_SIZE; bytes += PAGE_CACHE_SIZE;
index++; index++;
} }
btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
/* Write the checksums and trans id to the first page */ /* Write the checksums and trans id to the first page */
{ {
void *addr; void *addr;
u64 *gen; u64 *gen;
page = find_get_page(inode->i_mapping, 0); page = pages[0];
addr = kmap(page); addr = kmap(page);
memcpy(addr, checksums, sizeof(u32) * num_checksums); memcpy(addr, checksums, sizeof(u32) * num_pages);
gen = addr + (sizeof(u32) * num_checksums); gen = addr + (sizeof(u32) * num_pages);
*gen = trans->transid; *gen = trans->transid;
kunmap(page); kunmap(page);
ClearPageChecked(page);
set_page_extent_mapped(page);
SetPageUptodate(page);
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
page_cache_release(page);
} }
BTRFS_I(inode)->generation = trans->transid;
ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
bytes, &cached_state);
btrfs_drop_pages(pages, num_pages);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, &cached_state, GFP_NOFS); i_size_read(inode) - 1, &cached_state, GFP_NOFS);
if (ret) {
ret = 0;
goto out_free;
}
BTRFS_I(inode)->generation = trans->transid;
filemap_write_and_wait(inode->i_mapping); filemap_write_and_wait(inode->i_mapping);
key.objectid = BTRFS_FREE_SPACE_OBJECTID; key.objectid = BTRFS_FREE_SPACE_OBJECTID;
...@@ -853,6 +845,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, ...@@ -853,6 +845,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
BTRFS_I(inode)->generation = 0; BTRFS_I(inode)->generation = 0;
} }
kfree(checksums); kfree(checksums);
kfree(pages);
btrfs_update_inode(trans, root, inode); btrfs_update_inode(trans, root, inode);
iput(inode); iput(inode);
return ret; return ret;
......
This diff is collapsed.
...@@ -2287,7 +2287,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) ...@@ -2287,7 +2287,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
struct btrfs_ioctl_space_info space; struct btrfs_ioctl_space_info space;
struct btrfs_ioctl_space_info *dest; struct btrfs_ioctl_space_info *dest;
struct btrfs_ioctl_space_info *dest_orig; struct btrfs_ioctl_space_info *dest_orig;
struct btrfs_ioctl_space_info *user_dest; struct btrfs_ioctl_space_info __user *user_dest;
struct btrfs_space_info *info; struct btrfs_space_info *info;
u64 types[] = {BTRFS_BLOCK_GROUP_DATA, u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
BTRFS_BLOCK_GROUP_SYSTEM, BTRFS_BLOCK_GROUP_SYSTEM,
......
...@@ -159,7 +159,7 @@ enum { ...@@ -159,7 +159,7 @@ enum {
Opt_compress_type, Opt_compress_force, Opt_compress_force_type, Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
Opt_enospc_debug, Opt_err, Opt_enospc_debug, Opt_subvolrootid, Opt_err,
}; };
static match_table_t tokens = { static match_table_t tokens = {
...@@ -189,6 +189,7 @@ static match_table_t tokens = { ...@@ -189,6 +189,7 @@ static match_table_t tokens = {
{Opt_clear_cache, "clear_cache"}, {Opt_clear_cache, "clear_cache"},
{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
{Opt_enospc_debug, "enospc_debug"}, {Opt_enospc_debug, "enospc_debug"},
{Opt_subvolrootid, "subvolrootid=%d"},
{Opt_err, NULL}, {Opt_err, NULL},
}; };
...@@ -232,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) ...@@ -232,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
break; break;
case Opt_subvol: case Opt_subvol:
case Opt_subvolid: case Opt_subvolid:
case Opt_subvolrootid:
case Opt_device: case Opt_device:
/* /*
* These are parsed by btrfs_parse_early_options * These are parsed by btrfs_parse_early_options
...@@ -388,7 +390,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) ...@@ -388,7 +390,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
*/ */
static int btrfs_parse_early_options(const char *options, fmode_t flags, static int btrfs_parse_early_options(const char *options, fmode_t flags,
void *holder, char **subvol_name, u64 *subvol_objectid, void *holder, char **subvol_name, u64 *subvol_objectid,
struct btrfs_fs_devices **fs_devices) u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
{ {
substring_t args[MAX_OPT_ARGS]; substring_t args[MAX_OPT_ARGS];
char *opts, *orig, *p; char *opts, *orig, *p;
...@@ -429,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, ...@@ -429,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
*subvol_objectid = intarg; *subvol_objectid = intarg;
} }
break; break;
case Opt_subvolrootid:
intarg = 0;
error = match_int(&args[0], &intarg);
if (!error) {
/* we want the original fs_tree */
if (!intarg)
*subvol_rootid =
BTRFS_FS_TREE_OBJECTID;
else
*subvol_rootid = intarg;
}
break;
case Opt_device: case Opt_device:
error = btrfs_scan_one_device(match_strdup(&args[0]), error = btrfs_scan_one_device(match_strdup(&args[0]),
flags, holder, fs_devices); flags, holder, fs_devices);
...@@ -736,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, ...@@ -736,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
fmode_t mode = FMODE_READ; fmode_t mode = FMODE_READ;
char *subvol_name = NULL; char *subvol_name = NULL;
u64 subvol_objectid = 0; u64 subvol_objectid = 0;
u64 subvol_rootid = 0;
int error = 0; int error = 0;
if (!(flags & MS_RDONLY)) if (!(flags & MS_RDONLY))
...@@ -743,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, ...@@ -743,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
error = btrfs_parse_early_options(data, mode, fs_type, error = btrfs_parse_early_options(data, mode, fs_type,
&subvol_name, &subvol_objectid, &subvol_name, &subvol_objectid,
&fs_devices); &subvol_rootid, &fs_devices);
if (error) if (error)
return ERR_PTR(error); return ERR_PTR(error);
...@@ -807,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, ...@@ -807,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
s->s_flags |= MS_ACTIVE; s->s_flags |= MS_ACTIVE;
} }
root = get_default_root(s, subvol_objectid); /* if they gave us a subvolume name bind mount into that */
if (strcmp(subvol_name, ".")) {
struct dentry *new_root;
root = get_default_root(s, subvol_rootid);
if (IS_ERR(root)) { if (IS_ERR(root)) {
error = PTR_ERR(root); error = PTR_ERR(root);
deactivate_locked_super(s); deactivate_locked_super(s);
goto error_free_subvol_name; goto error_free_subvol_name;
} }
/* if they gave us a subvolume name bind mount into that */
if (strcmp(subvol_name, ".")) {
struct dentry *new_root;
mutex_lock(&root->d_inode->i_mutex); mutex_lock(&root->d_inode->i_mutex);
new_root = lookup_one_len(subvol_name, root, new_root = lookup_one_len(subvol_name, root,
strlen(subvol_name)); strlen(subvol_name));
...@@ -836,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, ...@@ -836,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
} }
dput(root); dput(root);
root = new_root; root = new_root;
} else {
root = get_default_root(s, subvol_objectid);
if (IS_ERR(root)) {
error = PTR_ERR(root);
deactivate_locked_super(s);
goto error_free_subvol_name;
}
} }
kfree(subvol_name); kfree(subvol_name);
......
...@@ -32,10 +32,8 @@ ...@@ -32,10 +32,8 @@
static noinline void put_transaction(struct btrfs_transaction *transaction) static noinline void put_transaction(struct btrfs_transaction *transaction)
{ {
WARN_ON(transaction->use_count == 0); WARN_ON(atomic_read(&transaction->use_count) == 0);
transaction->use_count--; if (atomic_dec_and_test(&transaction->use_count)) {
if (transaction->use_count == 0) {
list_del_init(&transaction->list);
memset(transaction, 0, sizeof(*transaction)); memset(transaction, 0, sizeof(*transaction));
kmem_cache_free(btrfs_transaction_cachep, transaction); kmem_cache_free(btrfs_transaction_cachep, transaction);
} }
...@@ -60,14 +58,14 @@ static noinline int join_transaction(struct btrfs_root *root) ...@@ -60,14 +58,14 @@ static noinline int join_transaction(struct btrfs_root *root)
if (!cur_trans) if (!cur_trans)
return -ENOMEM; return -ENOMEM;
root->fs_info->generation++; root->fs_info->generation++;
cur_trans->num_writers = 1; atomic_set(&cur_trans->num_writers, 1);
cur_trans->num_joined = 0; cur_trans->num_joined = 0;
cur_trans->transid = root->fs_info->generation; cur_trans->transid = root->fs_info->generation;
init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->writer_wait);
init_waitqueue_head(&cur_trans->commit_wait); init_waitqueue_head(&cur_trans->commit_wait);
cur_trans->in_commit = 0; cur_trans->in_commit = 0;
cur_trans->blocked = 0; cur_trans->blocked = 0;
cur_trans->use_count = 1; atomic_set(&cur_trans->use_count, 1);
cur_trans->commit_done = 0; cur_trans->commit_done = 0;
cur_trans->start_time = get_seconds(); cur_trans->start_time = get_seconds();
...@@ -88,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root) ...@@ -88,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root)
root->fs_info->running_transaction = cur_trans; root->fs_info->running_transaction = cur_trans;
spin_unlock(&root->fs_info->new_trans_lock); spin_unlock(&root->fs_info->new_trans_lock);
} else { } else {
cur_trans->num_writers++; atomic_inc(&cur_trans->num_writers);
cur_trans->num_joined++; cur_trans->num_joined++;
} }
...@@ -145,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root) ...@@ -145,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root)
cur_trans = root->fs_info->running_transaction; cur_trans = root->fs_info->running_transaction;
if (cur_trans && cur_trans->blocked) { if (cur_trans && cur_trans->blocked) {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
cur_trans->use_count++; atomic_inc(&cur_trans->use_count);
while (1) { while (1) {
prepare_to_wait(&root->fs_info->transaction_wait, &wait, prepare_to_wait(&root->fs_info->transaction_wait, &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
...@@ -181,6 +179,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, ...@@ -181,6 +179,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
{ {
struct btrfs_trans_handle *h; struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans; struct btrfs_transaction *cur_trans;
int retries = 0;
int ret; int ret;
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
...@@ -204,7 +203,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, ...@@ -204,7 +203,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
} }
cur_trans = root->fs_info->running_transaction; cur_trans = root->fs_info->running_transaction;
cur_trans->use_count++; atomic_inc(&cur_trans->use_count);
if (type != TRANS_JOIN_NOLOCK) if (type != TRANS_JOIN_NOLOCK)
mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->trans_mutex);
...@@ -224,10 +223,18 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, ...@@ -224,10 +223,18 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
if (num_items > 0) { if (num_items > 0) {
ret = btrfs_trans_reserve_metadata(h, root, num_items); ret = btrfs_trans_reserve_metadata(h, root, num_items);
if (ret == -EAGAIN) { if (ret == -EAGAIN && !retries) {
retries++;
btrfs_commit_transaction(h, root); btrfs_commit_transaction(h, root);
goto again; goto again;
} else if (ret == -EAGAIN) {
/*
* We have already retried and got EAGAIN, so really we
* don't have space, so set ret to -ENOSPC.
*/
ret = -ENOSPC;
} }
if (ret < 0) { if (ret < 0) {
btrfs_end_transaction(h, root); btrfs_end_transaction(h, root);
return ERR_PTR(ret); return ERR_PTR(ret);
...@@ -327,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) ...@@ -327,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
goto out_unlock; /* nothing committing|committed */ goto out_unlock; /* nothing committing|committed */
} }
cur_trans->use_count++; atomic_inc(&cur_trans->use_count);
mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->trans_mutex);
wait_for_commit(root, cur_trans); wait_for_commit(root, cur_trans);
...@@ -457,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, ...@@ -457,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
wake_up_process(info->transaction_kthread); wake_up_process(info->transaction_kthread);
} }
if (lock)
mutex_lock(&info->trans_mutex);
WARN_ON(cur_trans != info->running_transaction); WARN_ON(cur_trans != info->running_transaction);
WARN_ON(cur_trans->num_writers < 1); WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
cur_trans->num_writers--; atomic_dec(&cur_trans->num_writers);
smp_mb(); smp_mb();
if (waitqueue_active(&cur_trans->writer_wait)) if (waitqueue_active(&cur_trans->writer_wait))
wake_up(&cur_trans->writer_wait); wake_up(&cur_trans->writer_wait);
put_transaction(cur_trans); put_transaction(cur_trans);
if (lock)
mutex_unlock(&info->trans_mutex);
if (current->journal_info == trans) if (current->journal_info == trans)
current->journal_info = NULL; current->journal_info = NULL;
...@@ -1178,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, ...@@ -1178,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
/* take transaction reference */ /* take transaction reference */
mutex_lock(&root->fs_info->trans_mutex); mutex_lock(&root->fs_info->trans_mutex);
cur_trans = trans->transaction; cur_trans = trans->transaction;
cur_trans->use_count++; atomic_inc(&cur_trans->use_count);
mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->trans_mutex);
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
...@@ -1237,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1237,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
mutex_lock(&root->fs_info->trans_mutex); mutex_lock(&root->fs_info->trans_mutex);
if (cur_trans->in_commit) { if (cur_trans->in_commit) {
cur_trans->use_count++; atomic_inc(&cur_trans->use_count);
mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->trans_mutex);
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
...@@ -1259,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1259,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
prev_trans = list_entry(cur_trans->list.prev, prev_trans = list_entry(cur_trans->list.prev,
struct btrfs_transaction, list); struct btrfs_transaction, list);
if (!prev_trans->commit_done) { if (!prev_trans->commit_done) {
prev_trans->use_count++; atomic_inc(&prev_trans->use_count);
mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->trans_mutex);
wait_for_commit(root, prev_trans); wait_for_commit(root, prev_trans);
...@@ -1300,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1300,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
smp_mb(); smp_mb();
if (cur_trans->num_writers > 1) if (atomic_read(&cur_trans->num_writers) > 1)
schedule_timeout(MAX_SCHEDULE_TIMEOUT); schedule_timeout(MAX_SCHEDULE_TIMEOUT);
else if (should_grow) else if (should_grow)
schedule_timeout(1); schedule_timeout(1);
mutex_lock(&root->fs_info->trans_mutex); mutex_lock(&root->fs_info->trans_mutex);
finish_wait(&cur_trans->writer_wait, &wait); finish_wait(&cur_trans->writer_wait, &wait);
} while (cur_trans->num_writers > 1 || } while (atomic_read(&cur_trans->num_writers) > 1 ||
(should_grow && cur_trans->num_joined != joined)); (should_grow && cur_trans->num_joined != joined));
ret = create_pending_snapshots(trans, root->fs_info); ret = create_pending_snapshots(trans, root->fs_info);
...@@ -1394,6 +1397,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1394,6 +1397,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
wake_up(&cur_trans->commit_wait); wake_up(&cur_trans->commit_wait);
list_del_init(&cur_trans->list);
put_transaction(cur_trans); put_transaction(cur_trans);
put_transaction(cur_trans); put_transaction(cur_trans);
......
...@@ -27,11 +27,11 @@ struct btrfs_transaction { ...@@ -27,11 +27,11 @@ struct btrfs_transaction {
* total writers in this transaction, it must be zero before the * total writers in this transaction, it must be zero before the
* transaction can end * transaction can end
*/ */
unsigned long num_writers; atomic_t num_writers;
unsigned long num_joined; unsigned long num_joined;
int in_commit; int in_commit;
int use_count; atomic_t use_count;
int commit_done; int commit_done;
int blocked; int blocked;
struct list_head list; struct list_head list;
......
...@@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ...@@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
struct btrfs_path *path; struct btrfs_path *path;
struct extent_buffer *leaf; struct extent_buffer *leaf;
struct btrfs_dir_item *di; struct btrfs_dir_item *di;
int ret = 0, slot, advance; int ret = 0, slot;
size_t total_size = 0, size_left = size; size_t total_size = 0, size_left = size;
unsigned long name_ptr; unsigned long name_ptr;
size_t name_len; size_t name_len;
u32 nritems;
/* /*
* ok we want all objects associated with this id. * ok we want all objects associated with this id.
...@@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ...@@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) if (ret < 0)
goto err; goto err;
advance = 0;
while (1) { while (1) {
leaf = path->nodes[0]; leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
slot = path->slots[0]; slot = path->slots[0];
/* this is where we start walking through the path */ /* this is where we start walking through the path */
if (advance || slot >= nritems) { if (slot >= btrfs_header_nritems(leaf)) {
/* /*
* if we've reached the last slot in this leaf we need * if we've reached the last slot in this leaf we need
* to go to the next leaf and reset everything * to go to the next leaf and reset everything
*/ */
if (slot >= nritems-1) {
ret = btrfs_next_leaf(root, path); ret = btrfs_next_leaf(root, path);
if (ret) if (ret < 0)
goto err;
else if (ret > 0)
break; break;
leaf = path->nodes[0]; continue;
nritems = btrfs_header_nritems(leaf);
slot = path->slots[0];
} else {
/*
* just walking through the slots on this leaf
*/
slot++;
path->slots[0]++;
}
} }
advance = 1;
btrfs_item_key_to_cpu(leaf, &found_key, slot); btrfs_item_key_to_cpu(leaf, &found_key, slot);
...@@ -250,7 +239,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ...@@ -250,7 +239,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
/* we are just looking for how big our buffer needs to be */ /* we are just looking for how big our buffer needs to be */
if (!size) if (!size)
continue; goto next;
if (!buffer || (name_len + 1) > size_left) { if (!buffer || (name_len + 1) > size_left) {
ret = -ERANGE; ret = -ERANGE;
...@@ -263,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ...@@ -263,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
size_left -= name_len + 1; size_left -= name_len + 1;
buffer += name_len + 1; buffer += name_len + 1;
next:
path->slots[0]++;
} }
ret = total_size; ret = total_size;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment