Commit 22712200 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: make sure reserve_metadata_bytes doesn't leak out strange errors
  Btrfs: use the commit_root for reading free_space_inode crcs
  Btrfs: reduce extent_state lock contention for metadata
  Btrfs: remove lockdep magic from btrfs_next_leaf
  Btrfs: make a lockdep class for each root
  Btrfs: switch the btrfs tree locks to reader/writer
  Btrfs: fix deadlock when throttling transactions
  Btrfs: stop using highmem for extent_buffers
  Btrfs: fix BUG_ON() caused by ENOSPC when relocating space
  Btrfs: tag pages for writeback in sync
  Btrfs: fix enospc problems with delalloc
  Btrfs: don't flush delalloc arbitrarily
  Btrfs: use find_or_create_page instead of grab_cache_page
  Btrfs: use a worker thread to do caching
  Btrfs: fix how we merge extent states and deal with cached states
  Btrfs: use the normal checksumming infrastructure for free space cache
  Btrfs: serialize flushers in reserve_metadata_bytes
  Btrfs: do transaction space reservation before joining the transaction
  Btrfs: try to only do one btrfs_search_slot in do_setxattr
parents 597a67e0 ff95acb6
......@@ -34,6 +34,9 @@ struct btrfs_inode {
*/
struct btrfs_key location;
/* Lock for counters */
spinlock_t lock;
/* the extent_tree has caches of all the extent mappings to disk */
struct extent_map_tree extent_tree;
......@@ -134,8 +137,8 @@ struct btrfs_inode {
* items we think we'll end up using, and reserved_extents is the number
* of extent items we've reserved metadata for.
*/
atomic_t outstanding_extents;
atomic_t reserved_extents;
unsigned outstanding_extents;
unsigned reserved_extents;
/*
* ordered_data_close is set by truncate when a file that used
......@@ -184,4 +187,13 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size)
BTRFS_I(inode)->disk_i_size = size;
}
static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
struct inode *inode)
{
if (root == root->fs_info->tree_root ||
BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
return true;
return false;
}
#endif
This diff is collapsed.
......@@ -755,6 +755,8 @@ struct btrfs_space_info {
chunks for this space */
unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
unsigned int flush:1; /* set if we are trying to make space */
unsigned int force_alloc; /* set if we need to force a chunk
alloc for this space */
......@@ -764,7 +766,7 @@ struct btrfs_space_info {
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
spinlock_t lock;
struct rw_semaphore groups_sem;
atomic_t caching_threads;
wait_queue_head_t wait;
};
struct btrfs_block_rsv {
......@@ -824,6 +826,7 @@ struct btrfs_caching_control {
struct list_head list;
struct mutex mutex;
wait_queue_head_t wait;
struct btrfs_work work;
struct btrfs_block_group_cache *block_group;
u64 progress;
atomic_t count;
......@@ -1032,6 +1035,8 @@ struct btrfs_fs_info {
struct btrfs_workers endio_write_workers;
struct btrfs_workers endio_freespace_worker;
struct btrfs_workers submit_workers;
struct btrfs_workers caching_workers;
/*
* fixup workers take dirty pages that didn't properly go through
* the cow mechanism and make them safe to write. It happens
......@@ -2128,7 +2133,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
/* extent-tree.c */
static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
int num_items)
unsigned num_items)
{
return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3 * num_items;
......@@ -2222,9 +2227,6 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int num_items);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
......@@ -2330,7 +2332,7 @@ struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p);
void btrfs_set_path_blocking(struct btrfs_path *p);
void btrfs_clear_path_blocking(struct btrfs_path *p,
struct extent_buffer *held);
struct extent_buffer *held, int held_rw);
void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
......
......@@ -735,7 +735,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
}
/* reset all the locked nodes in the patch to spinning locks. */
btrfs_clear_path_blocking(path, NULL);
btrfs_clear_path_blocking(path, NULL, 0);
/* insert the keys of the items */
ret = setup_items_for_insert(trans, root, path, keys, data_size,
......
......@@ -89,13 +89,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
data_size = sizeof(*dir_item) + name_len + data_len;
dir_item = insert_with_overflow(trans, root, path, &key, data_size,
name, name_len);
/*
* FIXME: at some point we should handle xattr's that are larger than
* what we can fit in our leaf. We set location to NULL b/c we arent
* pointing at anything else, that will change if we store the xattr
* data in a separate inode.
*/
BUG_ON(IS_ERR(dir_item));
if (IS_ERR(dir_item))
return PTR_ERR(dir_item);
memset(&location, 0, sizeof(location));
leaf = path->nodes[0];
......
......@@ -100,38 +100,83 @@ struct async_submit_bio {
struct btrfs_work work;
};
/* These are used to set the lockdep class on the extent buffer locks.
* The class is set by the readpage_end_io_hook after the buffer has
* passed csum validation but before the pages are unlocked.
/*
* Lockdep class keys for extent_buffer->lock's in this root. For a given
* eb, the lockdep key is determined by the btrfs_root it belongs to and
* the level the eb occupies in the tree.
*
* Different roots are used for different purposes and may nest inside each
* other and they require separate keysets. As lockdep keys should be
* static, assign keysets according to the purpose of the root as indicated
* by btrfs_root->objectid. This ensures that all special purpose roots
* have separate keysets.
*
* The lockdep class is also set by btrfs_init_new_buffer on freshly
* allocated blocks.
* Lock-nesting across peer nodes is always done with the immediate parent
* node locked thus preventing deadlock. As lockdep doesn't know this, use
* subclass to avoid triggering lockdep warning in such cases.
*
* The class is based on the level in the tree block, which allows lockdep
* to know that lower nodes nest inside the locks of higher nodes.
* The key is set by the readpage_end_io_hook after the buffer has passed
* csum validation but before the pages are unlocked. It is also set by
* btrfs_init_new_buffer on freshly allocated blocks.
*
* We also add a check to make sure the highest level of the tree is
* the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this
* code needs update as well.
* We also add a check to make sure the highest level of the tree is the
* same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
* needs update as well.
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# if BTRFS_MAX_LEVEL != 8
# error
# endif
static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
/* leaf */
"btrfs-extent-00",
"btrfs-extent-01",
"btrfs-extent-02",
"btrfs-extent-03",
"btrfs-extent-04",
"btrfs-extent-05",
"btrfs-extent-06",
"btrfs-extent-07",
/* highest possible level */
"btrfs-extent-08",
static struct btrfs_lockdep_keyset {
u64 id; /* root objectid */
const char *name_stem; /* lock name stem */
char names[BTRFS_MAX_LEVEL + 1][20];
struct lock_class_key keys[BTRFS_MAX_LEVEL + 1];
} btrfs_lockdep_keysets[] = {
{ .id = BTRFS_ROOT_TREE_OBJECTID, .name_stem = "root" },
{ .id = BTRFS_EXTENT_TREE_OBJECTID, .name_stem = "extent" },
{ .id = BTRFS_CHUNK_TREE_OBJECTID, .name_stem = "chunk" },
{ .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" },
{ .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" },
{ .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" },
{ .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" },
{ .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
{ .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
{ .id = 0, .name_stem = "tree" },
};
void __init btrfs_init_lockdep(void)
{
int i, j;
/* initialize lockdep class names */
for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) {
struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i];
for (j = 0; j < ARRAY_SIZE(ks->names); j++)
snprintf(ks->names[j], sizeof(ks->names[j]),
"btrfs-%s-%02d", ks->name_stem, j);
}
}
void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
int level)
{
struct btrfs_lockdep_keyset *ks;
BUG_ON(level >= ARRAY_SIZE(ks->keys));
/* find the matching keyset, id 0 is the default entry */
for (ks = btrfs_lockdep_keysets; ks->id; ks++)
if (ks->id == objectid)
break;
lockdep_set_class_and_name(&eb->lock,
&ks->keys[level], ks->names[level]);
}
#endif
/*
......@@ -217,7 +262,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
unsigned long len;
unsigned long cur_len;
unsigned long offset = BTRFS_CSUM_SIZE;
char *map_token = NULL;
char *kaddr;
unsigned long map_start;
unsigned long map_len;
......@@ -228,8 +272,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
len = buf->len - offset;
while (len > 0) {
err = map_private_extent_buffer(buf, offset, 32,
&map_token, &kaddr,
&map_start, &map_len, KM_USER0);
&kaddr, &map_start, &map_len);
if (err)
return 1;
cur_len = min(len, map_len - (offset - map_start));
......@@ -237,7 +280,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
crc, cur_len);
len -= cur_len;
offset += cur_len;
unmap_extent_buffer(buf, map_token, KM_USER0);
}
if (csum_size > sizeof(inline_result)) {
result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
......@@ -494,15 +536,6 @@ static noinline int check_leaf(struct btrfs_root *root,
return 0;
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
{
lockdep_set_class_and_name(&eb->lock,
&btrfs_eb_class[level],
btrfs_eb_name[level]);
}
#endif
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state)
{
......@@ -553,7 +586,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
}
found_level = btrfs_header_level(eb);
btrfs_set_buffer_lockdep_class(eb, found_level);
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
eb, found_level);
ret = csum_tree_block(root, eb, 1);
if (ret) {
......@@ -1598,7 +1632,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
goto fail_bdi;
}
fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
......@@ -1802,6 +1836,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->thread_pool_size),
&fs_info->generic_worker);
btrfs_init_workers(&fs_info->caching_workers, "cache",
2, &fs_info->generic_worker);
/* a higher idle thresh on the submit workers makes it much more
* likely that bios will be send down in a sane order to the
* devices
......@@ -1855,6 +1892,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_start_workers(&fs_info->endio_write_workers, 1);
btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
btrfs_start_workers(&fs_info->delayed_workers, 1);
btrfs_start_workers(&fs_info->caching_workers, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
......@@ -2112,6 +2150,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->delayed_workers);
btrfs_stop_workers(&fs_info->caching_workers);
fail_alloc:
kfree(fs_info->delayed_root);
fail_iput:
......@@ -2577,6 +2616,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->delayed_workers);
btrfs_stop_workers(&fs_info->caching_workers);
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
......
......@@ -87,10 +87,14 @@ int btree_lock_page_hook(struct page *page);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level);
void btrfs_init_lockdep(void);
void btrfs_set_buffer_lockdep_class(u64 objectid,
struct extent_buffer *eb, int level);
#else
static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb,
int level)
static inline void btrfs_init_lockdep(void)
{ }
static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
struct extent_buffer *eb, int level)
{
}
#endif
......
This diff is collapsed.
This diff is collapsed.
......@@ -120,8 +120,6 @@ struct extent_state {
struct extent_buffer {
u64 start;
unsigned long len;
char *map_token;
char *kaddr;
unsigned long map_start;
unsigned long map_len;
struct page *first_page;
......@@ -130,14 +128,26 @@ struct extent_buffer {
struct rcu_head rcu_head;
atomic_t refs;
/* the spinlock is used to protect most operations */
spinlock_t lock;
/* count of read lock holders on the extent buffer */
atomic_t write_locks;
atomic_t read_locks;
atomic_t blocking_writers;
atomic_t blocking_readers;
atomic_t spinning_readers;
atomic_t spinning_writers;
/* protects write locks */
rwlock_t lock;
/*
* when we keep the lock held while blocking, waiters go onto
* the wq
/* readers use lock_wq while they wait for the write
* lock holders to unlock
*/
wait_queue_head_t lock_wq;
wait_queue_head_t write_lock_wq;
/* writers use read_lock_wq while they wait for readers
* to unlock
*/
wait_queue_head_t read_lock_wq;
};
static inline void extent_set_compress_type(unsigned long *bio_flags,
......@@ -279,15 +289,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
int extent_buffer_uptodate(struct extent_io_tree *tree,
struct extent_buffer *eb,
struct extent_state *cached_state);
int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
unsigned long min_len, char **token, char **map,
unsigned long *map_start,
unsigned long *map_len, int km);
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
unsigned long min_len, char **token, char **map,
unsigned long min_len, char **map,
unsigned long *map_start,
unsigned long *map_len, int km);
void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
unsigned long *map_len);
int extent_range_uptodate(struct extent_io_tree *tree,
u64 start, u64 end);
int extent_clear_unlock_delalloc(struct inode *inode,
......
......@@ -177,6 +177,15 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
WARN_ON(bio->bi_vcnt <= 0);
/*
* the free space stuff is only read when it hasn't been
* updated in the current transaction. So, we can safely
* read from the commit root and sidestep a nasty deadlock
* between reading the free space cache and updating the csum tree.
*/
if (btrfs_is_free_space_inode(root, inode))
path->search_commit_root = 1;
disk_bytenr = (u64)bio->bi_sector << 9;
if (dio)
offset = logical_offset;
......@@ -664,10 +673,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_sector_sum *sector_sum;
u32 nritems;
u32 ins_size;
char *eb_map;
char *eb_token;
unsigned long map_len;
unsigned long map_start;
u16 csum_size =
btrfs_super_csum_size(&root->fs_info->super_copy);
......@@ -814,30 +819,9 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
btrfs_item_size_nr(leaf, path->slots[0]));
eb_token = NULL;
next_sector:
if (!eb_token ||
(unsigned long)item + csum_size >= map_start + map_len) {
int err;
if (eb_token)
unmap_extent_buffer(leaf, eb_token, KM_USER1);
eb_token = NULL;
err = map_private_extent_buffer(leaf, (unsigned long)item,
csum_size,
&eb_token, &eb_map,
&map_start, &map_len, KM_USER1);
if (err)
eb_token = NULL;
}
if (eb_token) {
memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
&sector_sum->sum, csum_size);
} else {
write_extent_buffer(leaf, &sector_sum->sum,
(unsigned long)item, csum_size);
}
write_extent_buffer(leaf, &sector_sum->sum, (unsigned long)item, csum_size);
total_bytes += root->sectorsize;
sector_sum++;
......@@ -850,10 +834,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
goto next_sector;
}
}
if (eb_token) {
unmap_extent_buffer(leaf, eb_token, KM_USER1);
eb_token = NULL;
}
btrfs_mark_buffer_dirty(path->nodes[0]);
if (total_bytes < sums->len) {
btrfs_release_path(path);
......
......@@ -1081,7 +1081,8 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
again:
for (i = 0; i < num_pages; i++) {
pages[i] = grab_cache_page(inode->i_mapping, index + i);
pages[i] = find_or_create_page(inode->i_mapping, index + i,
GFP_NOFS);
if (!pages[i]) {
faili = i - 1;
err = -ENOMEM;
......@@ -1238,9 +1239,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
* managed to copy.
*/
if (num_pages > dirty_pages) {
if (copied > 0)
atomic_inc(
&BTRFS_I(inode)->outstanding_extents);
if (copied > 0) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
btrfs_delalloc_release_space(inode,
(num_pages - dirty_pages) <<
PAGE_CACHE_SHIFT);
......
This diff is collapsed.
......@@ -750,15 +750,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
return alloc_hint;
}
static inline bool is_free_space_inode(struct btrfs_root *root,
struct inode *inode)
{
if (root == root->fs_info->tree_root ||
BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
return true;
return false;
}
/*
* when extent_io.c finds a delayed allocation range in the file,
* the call backs end up in this code. The basic idea is to
......@@ -791,7 +782,7 @@ static noinline int cow_file_range(struct inode *inode,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
BUG_ON(is_free_space_inode(root, inode));
BUG_ON(btrfs_is_free_space_inode(root, inode));
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
......@@ -1072,7 +1063,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
path = btrfs_alloc_path();
BUG_ON(!path);
nolock = is_free_space_inode(root, inode);
nolock = btrfs_is_free_space_inode(root, inode);
if (nolock)
trans = btrfs_join_transaction_nolock(root);
......@@ -1298,7 +1289,9 @@ static int btrfs_split_extent_hook(struct inode *inode,
if (!(orig->state & EXTENT_DELALLOC))
return 0;
atomic_inc(&BTRFS_I(inode)->outstanding_extents);
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
return 0;
}
......@@ -1316,7 +1309,9 @@ static int btrfs_merge_extent_hook(struct inode *inode,
if (!(other->state & EXTENT_DELALLOC))
return 0;
atomic_dec(&BTRFS_I(inode)->outstanding_extents);
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->lock);
return 0;
}
......@@ -1337,12 +1332,15 @@ static int btrfs_set_bit_hook(struct inode *inode,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
bool do_list = !is_free_space_inode(root, inode);
bool do_list = !btrfs_is_free_space_inode(root, inode);
if (*bits & EXTENT_FIRST_DELALLOC)
if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
else
atomic_inc(&BTRFS_I(inode)->outstanding_extents);
} else {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
spin_lock(&root->fs_info->delalloc_lock);
BTRFS_I(inode)->delalloc_bytes += len;
......@@ -1370,12 +1368,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
bool do_list = !is_free_space_inode(root, inode);
bool do_list = !btrfs_is_free_space_inode(root, inode);
if (*bits & EXTENT_FIRST_DELALLOC)
if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
else if (!(*bits & EXTENT_DO_ACCOUNTING))
atomic_dec(&BTRFS_I(inode)->outstanding_extents);
} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->lock);
}
if (*bits & EXTENT_DO_ACCOUNTING)
btrfs_delalloc_release_metadata(inode, len);
......@@ -1477,7 +1478,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
if (is_free_space_inode(root, inode))
if (btrfs_is_free_space_inode(root, inode))
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
else
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
......@@ -1726,7 +1727,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
return 0;
BUG_ON(!ordered_extent);
nolock = is_free_space_inode(root, inode);
nolock = btrfs_is_free_space_inode(root, inode);
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list));
......@@ -2531,13 +2532,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
if (!leaf->map_token)
map_private_extent_buffer(leaf, (unsigned long)inode_item,
sizeof(struct btrfs_inode_item),
&leaf->map_token, &leaf->kaddr,
&leaf->map_start, &leaf->map_len,
KM_USER1);
inode->i_mode = btrfs_inode_mode(leaf, inode_item);
inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
inode->i_uid = btrfs_inode_uid(leaf, inode_item);
......@@ -2575,11 +2569,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
if (!maybe_acls)
cache_no_acl(inode);
if (leaf->map_token) {
unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
leaf->map_token = NULL;
}
btrfs_free_path(path);
switch (inode->i_mode & S_IFMT) {
......@@ -2624,13 +2613,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *item,
struct inode *inode)
{
if (!leaf->map_token)
map_private_extent_buffer(leaf, (unsigned long)item,
sizeof(struct btrfs_inode_item),
&leaf->map_token, &leaf->kaddr,
&leaf->map_start, &leaf->map_len,
KM_USER1);
btrfs_set_inode_uid(leaf, item, inode->i_uid);
btrfs_set_inode_gid(leaf, item, inode->i_gid);
btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
......@@ -2659,11 +2641,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
btrfs_set_inode_block_group(leaf, item, 0);
if (leaf->map_token) {
unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
leaf->map_token = NULL;
}
}
/*
......@@ -2684,7 +2661,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
* The data relocation inode should also be directly updated
* without delay
*/
if (!is_free_space_inode(root, inode)
if (!btrfs_is_free_space_inode(root, inode)
&& root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_delayed_update_inode(trans, root, inode);
if (!ret)
......@@ -3398,7 +3375,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
ret = -ENOMEM;
again:
page = grab_cache_page(mapping, index);
page = find_or_create_page(mapping, index, GFP_NOFS);
if (!page) {
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
goto out;
......@@ -3634,7 +3611,7 @@ void btrfs_evict_inode(struct inode *inode)
truncate_inode_pages(&inode->i_data, 0);
if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
is_free_space_inode(root, inode)))
btrfs_is_free_space_inode(root, inode)))
goto no_delete;
if (is_bad_inode(inode)) {
......@@ -4271,7 +4248,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (BTRFS_I(inode)->dummy_inode)
return 0;
if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode))
if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
nolock = true;
if (wbc->sync_mode == WB_SYNC_ALL) {
......@@ -6728,8 +6705,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->index_cnt = (u64)-1;
ei->last_unlink_trans = 0;
atomic_set(&ei->outstanding_extents, 0);
atomic_set(&ei->reserved_extents, 0);
spin_lock_init(&ei->lock);
ei->outstanding_extents = 0;
ei->reserved_extents = 0;
ei->ordered_data_close = 0;
ei->orphan_meta_reserved = 0;
......@@ -6767,8 +6745,8 @@ void btrfs_destroy_inode(struct inode *inode)
WARN_ON(!list_empty(&inode->i_dentry));
WARN_ON(inode->i_data.nrpages);
WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents));
WARN_ON(BTRFS_I(inode)->outstanding_extents);
WARN_ON(BTRFS_I(inode)->reserved_extents);
/*
* This can happen where we create an inode, but somebody else also
......@@ -6823,7 +6801,7 @@ int btrfs_drop_inode(struct inode *inode)
struct btrfs_root *root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0 &&
!is_free_space_inode(root, inode))
!btrfs_is_free_space_inode(root, inode))
return 1;
else
return generic_drop_inode(inode);
......
......@@ -859,8 +859,8 @@ static int cluster_pages_for_defrag(struct inode *inode,
/* step one, lock all the pages */
for (i = 0; i < num_pages; i++) {
struct page *page;
page = grab_cache_page(inode->i_mapping,
start_index + i);
page = find_or_create_page(inode->i_mapping,
start_index + i, GFP_NOFS);
if (!page)
break;
......@@ -930,7 +930,9 @@ static int cluster_pages_for_defrag(struct inode *inode,
GFP_NOFS);
if (i_done != num_pages) {
atomic_inc(&BTRFS_I(inode)->outstanding_extents);
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode,
(num_pages - i_done) << PAGE_CACHE_SHIFT);
}
......
......@@ -24,185 +24,197 @@
#include "extent_io.h"
#include "locking.h"
static inline void spin_nested(struct extent_buffer *eb)
{
spin_lock(&eb->lock);
}
void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
/*
* Setting a lock to blocking will drop the spinlock and set the
* flag that forces other procs who want the lock to wait. After
* this you can safely schedule with the lock held.
* if we currently have a spinning reader or writer lock
* (indicated by the rw flag) this will bump the count
* of blocking holders and drop the spinlock.
*/
void btrfs_set_lock_blocking(struct extent_buffer *eb)
void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
spin_unlock(&eb->lock);
if (rw == BTRFS_WRITE_LOCK) {
if (atomic_read(&eb->blocking_writers) == 0) {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
atomic_dec(&eb->spinning_writers);
btrfs_assert_tree_locked(eb);
atomic_inc(&eb->blocking_writers);
write_unlock(&eb->lock);
}
} else if (rw == BTRFS_READ_LOCK) {
btrfs_assert_tree_read_locked(eb);
atomic_inc(&eb->blocking_readers);
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
atomic_dec(&eb->spinning_readers);
read_unlock(&eb->lock);
}
/* exit with the spin lock released and the bit set */
return;
}
/*
* clearing the blocking flag will take the spinlock again.
* After this you can't safely schedule
* if we currently have a blocking lock, take the spinlock
* and drop our blocking count
*/
void btrfs_clear_lock_blocking(struct extent_buffer *eb)
void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
spin_nested(eb);
clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
smp_mb__after_clear_bit();
if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
BUG_ON(atomic_read(&eb->blocking_writers) != 1);
write_lock(&eb->lock);
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
if (atomic_dec_and_test(&eb->blocking_writers))
wake_up(&eb->write_lock_wq);
} else if (rw == BTRFS_READ_LOCK_BLOCKING) {
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
read_lock(&eb->lock);
atomic_inc(&eb->spinning_readers);
if (atomic_dec_and_test(&eb->blocking_readers))
wake_up(&eb->read_lock_wq);
}
/* exit with the spin lock held */
return;
}
/*
* unfortunately, many of the places that currently set a lock to blocking
* don't end up blocking for very long, and often they don't block
* at all. For a dbench 50 run, if we don't spin on the blocking bit
* at all, the context switch rate can jump up to 400,000/sec or more.
*
* So, we're still stuck with this crummy spin on the blocking bit,
* at least until the most common causes of the short blocks
* can be dealt with.
* take a spinning read lock. This will wait for any blocking
* writers
*/
static int btrfs_spin_on_block(struct extent_buffer *eb)
void btrfs_tree_read_lock(struct extent_buffer *eb)
{
int i;
for (i = 0; i < 512; i++) {
if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
return 1;
if (need_resched())
break;
cpu_relax();
again:
wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
read_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers)) {
read_unlock(&eb->lock);
wait_event(eb->write_lock_wq,
atomic_read(&eb->blocking_writers) == 0);
goto again;
}
return 0;
atomic_inc(&eb->read_locks);
atomic_inc(&eb->spinning_readers);
}
/*
* This is somewhat different from trylock. It will take the
* spinlock but if it finds the lock is set to blocking, it will
* return without the lock held.
*
* returns 1 if it was able to take the lock and zero otherwise
*
* After this call, scheduling is not safe without first calling
* btrfs_set_lock_blocking()
* returns 1 if we get the read lock and 0 if we don't
* this won't wait for blocking writers
*/
int btrfs_try_spin_lock(struct extent_buffer *eb)
int btrfs_try_tree_read_lock(struct extent_buffer *eb)
{
int i;
if (atomic_read(&eb->blocking_writers))
return 0;
if (btrfs_spin_on_block(eb)) {
spin_nested(eb);
if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
return 1;
spin_unlock(&eb->lock);
read_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers)) {
read_unlock(&eb->lock);
return 0;
}
/* spin for a bit on the BLOCKING flag */
for (i = 0; i < 2; i++) {
cpu_relax();
if (!btrfs_spin_on_block(eb))
break;
spin_nested(eb);
if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
return 1;
spin_unlock(&eb->lock);
}
return 0;
atomic_inc(&eb->read_locks);
atomic_inc(&eb->spinning_readers);
return 1;
}
/*
* the autoremove wake function will return 0 if it tried to wake up
* a process that was already awake, which means that process won't
* count as an exclusive wakeup. The waitq code will continue waking
* procs until it finds one that was actually sleeping.
*
* For btrfs, this isn't quite what we want. We want a single proc
* to be notified that the lock is ready for taking. If that proc
* already happen to be awake, great, it will loop around and try for
* the lock.
*
* So, btrfs_wake_function always returns 1, even when the proc that we
* tried to wake up was already awake.
* returns 1 if we get the read lock and 0 if we don't
* this won't wait for blocking writers or readers
*/
static int btrfs_wake_function(wait_queue_t *wait, unsigned mode,
int sync, void *key)
int btrfs_try_tree_write_lock(struct extent_buffer *eb)
{
autoremove_wake_function(wait, mode, sync, key);
if (atomic_read(&eb->blocking_writers) ||
atomic_read(&eb->blocking_readers))
return 0;
write_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers) ||
atomic_read(&eb->blocking_readers)) {
write_unlock(&eb->lock);
return 0;
}
atomic_inc(&eb->write_locks);
atomic_inc(&eb->spinning_writers);
return 1;
}
/*
* returns with the extent buffer spinlocked.
*
* This will spin and/or wait as required to take the lock, and then
* return with the spinlock held.
*
* After this call, scheduling is not safe without first calling
* btrfs_set_lock_blocking()
* drop a spinning read lock
*/
void btrfs_tree_read_unlock(struct extent_buffer *eb)
{
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
atomic_dec(&eb->spinning_readers);
atomic_dec(&eb->read_locks);
read_unlock(&eb->lock);
}
/*
* drop a blocking read lock
*/
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
{
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
if (atomic_dec_and_test(&eb->blocking_readers))
wake_up(&eb->read_lock_wq);
atomic_dec(&eb->read_locks);
}
/*
* take a spinning write lock. This will wait for both
* blocking readers or writers
*/
int btrfs_tree_lock(struct extent_buffer *eb)
{
DEFINE_WAIT(wait);
wait.func = btrfs_wake_function;
if (!btrfs_spin_on_block(eb))
goto sleep;
while(1) {
spin_nested(eb);
/* nobody is blocking, exit with the spinlock held */
if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
return 0;
/*
* we have the spinlock, but the real owner is blocking.
* wait for them
*/
spin_unlock(&eb->lock);
/*
* spin for a bit, and if the blocking flag goes away,
* loop around
*/
cpu_relax();
if (btrfs_spin_on_block(eb))
continue;
sleep:
prepare_to_wait_exclusive(&eb->lock_wq, &wait,
TASK_UNINTERRUPTIBLE);
if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
schedule();
finish_wait(&eb->lock_wq, &wait);
again:
wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
write_lock(&eb->lock);
if (atomic_read(&eb->blocking_readers)) {
write_unlock(&eb->lock);
wait_event(eb->read_lock_wq,
atomic_read(&eb->blocking_readers) == 0);
goto again;
}
if (atomic_read(&eb->blocking_writers)) {
write_unlock(&eb->lock);
wait_event(eb->write_lock_wq,
atomic_read(&eb->blocking_writers) == 0);
goto again;
}
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
atomic_inc(&eb->write_locks);
return 0;
}
/*
* drop a spinning or a blocking write lock.
*/
int btrfs_tree_unlock(struct extent_buffer *eb)
{
/*
* if we were a blocking owner, we don't have the spinlock held
* just clear the bit and look for waiters
*/
if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
smp_mb__after_clear_bit();
else
spin_unlock(&eb->lock);
if (waitqueue_active(&eb->lock_wq))
wake_up(&eb->lock_wq);
int blockers = atomic_read(&eb->blocking_writers);
BUG_ON(blockers > 1);
btrfs_assert_tree_locked(eb);
atomic_dec(&eb->write_locks);
if (blockers) {
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_dec(&eb->blocking_writers);
smp_wmb();
wake_up(&eb->write_lock_wq);
} else {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
atomic_dec(&eb->spinning_writers);
write_unlock(&eb->lock);
}
return 0;
}
void btrfs_assert_tree_locked(struct extent_buffer *eb)
{
if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
assert_spin_locked(&eb->lock);
BUG_ON(!atomic_read(&eb->write_locks));
}
void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
{
BUG_ON(!atomic_read(&eb->read_locks));
}
......@@ -19,11 +19,43 @@
#ifndef __BTRFS_LOCKING_
#define __BTRFS_LOCKING_
#define BTRFS_WRITE_LOCK 1
#define BTRFS_READ_LOCK 2
#define BTRFS_WRITE_LOCK_BLOCKING 3
#define BTRFS_READ_LOCK_BLOCKING 4
int btrfs_tree_lock(struct extent_buffer *eb);
int btrfs_tree_unlock(struct extent_buffer *eb);
int btrfs_try_spin_lock(struct extent_buffer *eb);
void btrfs_set_lock_blocking(struct extent_buffer *eb);
void btrfs_clear_lock_blocking(struct extent_buffer *eb);
void btrfs_tree_read_lock(struct extent_buffer *eb);
void btrfs_tree_read_unlock(struct extent_buffer *eb);
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw);
void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
void btrfs_assert_tree_locked(struct extent_buffer *eb);
int btrfs_try_tree_read_lock(struct extent_buffer *eb);
int btrfs_try_tree_write_lock(struct extent_buffer *eb);
static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
{
if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING)
btrfs_tree_unlock(eb);
else if (rw == BTRFS_READ_LOCK_BLOCKING)
btrfs_tree_read_unlock_blocking(eb);
else if (rw == BTRFS_READ_LOCK)
btrfs_tree_read_unlock(eb);
else
BUG();
}
static inline void btrfs_set_lock_blocking(struct extent_buffer *eb)
{
btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK);
}
static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb)
{
btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING);
}
#endif
......@@ -2955,7 +2955,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
page_cache_sync_readahead(inode->i_mapping,
ra, NULL, index,
last_index + 1 - index);
page = grab_cache_page(inode->i_mapping, index);
page = find_or_create_page(inode->i_mapping, index,
GFP_NOFS);
if (!page) {
btrfs_delalloc_release_metadata(inode,
PAGE_CACHE_SIZE);
......
......@@ -50,36 +50,22 @@ u##bits btrfs_##name(struct extent_buffer *eb, \
unsigned long part_offset = (unsigned long)s; \
unsigned long offset = part_offset + offsetof(type, member); \
type *p; \
/* ugly, but we want the fast path here */ \
if (eb->map_token && offset >= eb->map_start && \
offset + sizeof(((type *)0)->member) <= eb->map_start + \
eb->map_len) { \
p = (type *)(eb->kaddr + part_offset - eb->map_start); \
return le##bits##_to_cpu(p->member); \
} \
{ \
int err; \
char *map_token; \
char *kaddr; \
int unmap_on_exit = (eb->map_token == NULL); \
unsigned long map_start; \
unsigned long map_len; \
u##bits res; \
err = map_extent_buffer(eb, offset, \
sizeof(((type *)0)->member), \
&map_token, &kaddr, \
&map_start, &map_len, KM_USER1); \
if (err) { \
__le##bits leres; \
read_eb_member(eb, s, type, member, &leres); \
return le##bits##_to_cpu(leres); \
} \
p = (type *)(kaddr + part_offset - map_start); \
res = le##bits##_to_cpu(p->member); \
if (unmap_on_exit) \
unmap_extent_buffer(eb, map_token, KM_USER1); \
return res; \
} \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
u##bits res; \
err = map_private_extent_buffer(eb, offset, \
sizeof(((type *)0)->member), \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits leres; \
read_eb_member(eb, s, type, member, &leres); \
return le##bits##_to_cpu(leres); \
} \
p = (type *)(kaddr + part_offset - map_start); \
res = le##bits##_to_cpu(p->member); \
return res; \
} \
void btrfs_set_##name(struct extent_buffer *eb, \
type *s, u##bits val) \
......@@ -87,36 +73,21 @@ void btrfs_set_##name(struct extent_buffer *eb, \
unsigned long part_offset = (unsigned long)s; \
unsigned long offset = part_offset + offsetof(type, member); \
type *p; \
/* ugly, but we want the fast path here */ \
if (eb->map_token && offset >= eb->map_start && \
offset + sizeof(((type *)0)->member) <= eb->map_start + \
eb->map_len) { \
p = (type *)(eb->kaddr + part_offset - eb->map_start); \
p->member = cpu_to_le##bits(val); \
return; \
} \
{ \
int err; \
char *map_token; \
char *kaddr; \
int unmap_on_exit = (eb->map_token == NULL); \
unsigned long map_start; \
unsigned long map_len; \
err = map_extent_buffer(eb, offset, \
sizeof(((type *)0)->member), \
&map_token, &kaddr, \
&map_start, &map_len, KM_USER1); \
if (err) { \
__le##bits val2; \
val2 = cpu_to_le##bits(val); \
write_eb_member(eb, s, type, member, &val2); \
return; \
} \
p = (type *)(kaddr + part_offset - map_start); \
p->member = cpu_to_le##bits(val); \
if (unmap_on_exit) \
unmap_extent_buffer(eb, map_token, KM_USER1); \
} \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
err = map_private_extent_buffer(eb, offset, \
sizeof(((type *)0)->member), \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits val2; \
val2 = cpu_to_le##bits(val); \
write_eb_member(eb, s, type, member, &val2); \
return; \
} \
p = (type *)(kaddr + part_offset - map_start); \
p->member = cpu_to_le##bits(val); \
}
#include "ctree.h"
......@@ -125,15 +96,6 @@ void btrfs_node_key(struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
unsigned long ptr = btrfs_node_key_ptr_offset(nr);
if (eb->map_token && ptr >= eb->map_start &&
ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) {
memcpy(disk_key, eb->kaddr + ptr - eb->map_start,
sizeof(*disk_key));
return;
} else if (eb->map_token) {
unmap_extent_buffer(eb, eb->map_token, KM_USER1);
eb->map_token = NULL;
}
read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
struct btrfs_key_ptr, key, disk_key);
}
......@@ -260,7 +260,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
{
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
int retries = 0;
u64 num_bytes = 0;
int ret;
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
......@@ -274,6 +274,19 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
h->block_rsv = NULL;
goto got_it;
}
/*
* Do the reservation before we join the transaction so we can do all
* the appropriate flushing if need be.
*/
if (num_items > 0 && root != root->fs_info->chunk_root) {
num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
ret = btrfs_block_rsv_add(NULL, root,
&root->fs_info->trans_block_rsv,
num_bytes);
if (ret)
return ERR_PTR(ret);
}
again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h)
......@@ -310,24 +323,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
goto again;
}
if (num_items > 0) {
ret = btrfs_trans_reserve_metadata(h, root, num_items);
if (ret == -EAGAIN && !retries) {
retries++;
btrfs_commit_transaction(h, root);
goto again;
} else if (ret == -EAGAIN) {
/*
* We have already retried and got EAGAIN, so really we
* don't have space, so set ret to -ENOSPC.
*/
ret = -ENOSPC;
}
if (ret < 0) {
btrfs_end_transaction(h, root);
return ERR_PTR(ret);
}
if (num_bytes) {
h->block_rsv = &root->fs_info->trans_block_rsv;
h->bytes_reserved = num_bytes;
}
got_it:
......@@ -499,10 +497,17 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
}
if (lock && cur_trans->blocked && !cur_trans->in_commit) {
if (throttle)
if (throttle) {
/*
* We may race with somebody else here so end up having
* to call end_transaction on ourselves again, so inc
* our use_count.
*/
trans->use_count++;
return btrfs_commit_transaction(trans, root);
else
} else {
wake_up_process(info->transaction_kthread);
}
}
WARN_ON(cur_trans != info->running_transaction);
......
......@@ -1730,8 +1730,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
btrfs_read_buffer(next, ptr_gen);
btrfs_tree_lock(next);
clean_tree_block(trans, root, next);
btrfs_set_lock_blocking(next);
clean_tree_block(trans, root, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
......@@ -1796,8 +1796,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
next = path->nodes[*level];
btrfs_tree_lock(next);
clean_tree_block(trans, root, next);
btrfs_set_lock_blocking(next);
clean_tree_block(trans, root, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
......@@ -1864,8 +1864,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
next = path->nodes[orig_level];
btrfs_tree_lock(next);
clean_tree_block(trans, log, next);
btrfs_set_lock_blocking(next);
clean_tree_block(trans, log, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
......
......@@ -3595,7 +3595,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
if (!sb)
return -ENOMEM;
btrfs_set_buffer_uptodate(sb);
btrfs_set_buffer_lockdep_class(sb, 0);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
array_size = btrfs_super_sys_array_size(super_copy);
......
......@@ -102,43 +102,57 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
/* first lets see if we already have this xattr */
di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
strlen(name), -1);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
}
/* ok we already have this xattr, lets remove it */
if (di) {
/* if we want create only exit */
if (flags & XATTR_CREATE) {
ret = -EEXIST;
if (flags & XATTR_REPLACE) {
di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
name_len, -1);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
} else if (!di) {
ret = -ENODATA;
goto out;
}
ret = btrfs_delete_one_dir_name(trans, root, path, di);
BUG_ON(ret);
if (ret)
goto out;
btrfs_release_path(path);
}
/* if we don't have a value then we are removing the xattr */
if (!value)
again:
ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
name, name_len, value, size);
if (ret == -EEXIST) {
if (flags & XATTR_CREATE)
goto out;
} else {
/*
* We can't use the path we already have since we won't have the
* proper locking for a delete, so release the path and
* re-lookup to delete the thing.
*/
btrfs_release_path(path);
di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
name, name_len, -1);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
} else if (!di) {
/* Shouldn't happen but just in case... */
btrfs_release_path(path);
goto again;
}
if (flags & XATTR_REPLACE) {
/* we couldn't find the attr to replace */
ret = -ENODATA;
ret = btrfs_delete_one_dir_name(trans, root, path, di);
if (ret)
goto out;
/*
* We have a value to set, so go back and try to insert it now.
*/
if (value) {
btrfs_release_path(path);
goto again;
}
}
/* ok we have to create a completely new xattr */
ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
name, name_len, value, size);
BUG_ON(ret);
out:
btrfs_free_path(path);
return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment