Commit a22180d2 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs update from Chris Mason:
 "A big set of fixes and features.

  In terms of line count, most of the code comes from Stefan, who added
  the ability to replace a single drive in place.  This is different
  from how btrfs normally replaces drives, and is much much much faster.

  Josef is plowing through our synchronous write performance.  This pull
  request does not include the DIO_OWN_WAITING patch that was discussed
  on the list, but it has a number of other improvements to cut down our
  latencies and CPU time during fsync/O_DIRECT writes.

  Miao Xie has a big series of fixes and is spreading out ordered
  operations over more CPUs.  This improves performance and reduces
  contention.

  I've put in fixes for error handling around hash collisions.  These
  are going back to individual stable kernels as I test against them.

  Otherwise we have a lot of fixes and cleanups, thanks everyone!
  raid5/6 is being rebased against the device replacement code.  I'll
  have it posted this Friday along with a nice series of benchmarks."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (115 commits)
  Btrfs: fix a bug of per-file nocow
  Btrfs: fix hash overflow handling
  Btrfs: don't take inode delalloc mutex if we're a free space inode
  Btrfs: fix autodefrag and umount lockup
  Btrfs: fix permissions of empty files not affected by umask
  Btrfs: put raid properties into global table
  Btrfs: fix BUG() in scrub when first superblock reading gives EIO
  Btrfs: do not call file_update_time in aio_write
  Btrfs: only unlock and relock if we have to
  Btrfs: use tokens where we can in the tree log
  Btrfs: optimize leaf_space_used
  Btrfs: don't memset new tokens
  Btrfs: only clear dirty on the buffer if it is marked as dirty
  Btrfs: move checks in set_page_dirty under DEBUG
  Btrfs: log changed inodes based on the extent map tree
  Btrfs: add path->really_keep_locks
  Btrfs: do not mark ems as prealloc if we are writing to them
  Btrfs: keep track of the extents original block length
  Btrfs: inline csums if we're fsyncing
  Btrfs: don't bother copying if we're only logging the inode
  ...
parents 2d4dce00 213490b3
......@@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
......@@ -121,6 +121,8 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
ret = posix_acl_equiv_mode(acl, &inode->i_mode);
if (ret < 0)
return ret;
if (ret == 0)
acl = NULL;
}
ret = 0;
break;
......
......@@ -461,6 +461,7 @@ static int __merge_refs(struct list_head *head, int mode)
pos2 = n2, n2 = pos2->next) {
struct __prelim_ref *ref2;
struct __prelim_ref *xchg;
struct extent_inode_elem *eie;
ref2 = list_entry(pos2, struct __prelim_ref, list);
......@@ -472,12 +473,20 @@ static int __merge_refs(struct list_head *head, int mode)
ref1 = ref2;
ref2 = xchg;
}
ref1->count += ref2->count;
} else {
if (ref1->parent != ref2->parent)
continue;
ref1->count += ref2->count;
}
eie = ref1->inode_list;
while (eie && eie->next)
eie = eie->next;
if (eie)
eie->next = ref2->inode_list;
else
ref1->inode_list = ref2->inode_list;
ref1->count += ref2->count;
list_del(&ref2->list);
kfree(ref2);
}
......@@ -890,8 +899,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
while (!list_empty(&prefs)) {
ref = list_first_entry(&prefs, struct __prelim_ref, list);
list_del(&ref->list);
if (ref->count < 0)
WARN_ON(1);
WARN_ON(ref->count < 0);
if (ref->count && ref->root_id && ref->parent == 0) {
/* no parent == root of tree */
ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
......
......@@ -39,6 +39,7 @@
#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
#define BTRFS_INODE_NEEDS_FULL_SYNC 7
#define BTRFS_INODE_COPY_EVERYTHING 8
/* in memory btrfs inode */
struct btrfs_inode {
......@@ -90,6 +91,9 @@ struct btrfs_inode {
unsigned long runtime_flags;
/* Keep track of who's O_SYNC/fsycing currently */
atomic_t sync_writers;
/* full 64 bit generation number, struct vfs_inode doesn't have a big
* enough field for this.
*/
......
......@@ -137,7 +137,7 @@ struct btrfsic_block {
unsigned int never_written:1; /* block was added because it was
* referenced, not because it was
* written */
unsigned int mirror_num:2; /* large enough to hold
unsigned int mirror_num; /* large enough to hold
* BTRFS_SUPER_MIRROR_MAX */
struct btrfsic_dev_state *dev_state;
u64 dev_bytenr; /* key, physical byte num on disk */
......@@ -723,7 +723,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
}
num_copies =
btrfs_num_copies(&state->root->fs_info->mapping_tree,
btrfs_num_copies(state->root->fs_info,
next_bytenr, state->metablock_size);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
......@@ -903,7 +903,7 @@ static int btrfsic_process_superblock_dev_mirror(
}
num_copies =
btrfs_num_copies(&state->root->fs_info->mapping_tree,
btrfs_num_copies(state->root->fs_info,
next_bytenr, state->metablock_size);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
......@@ -1287,7 +1287,7 @@ static int btrfsic_create_link_to_next_block(
*next_blockp = NULL;
if (0 == *num_copiesp) {
*num_copiesp =
btrfs_num_copies(&state->root->fs_info->mapping_tree,
btrfs_num_copies(state->root->fs_info,
next_bytenr, state->metablock_size);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
......@@ -1489,7 +1489,7 @@ static int btrfsic_handle_extent_data(
chunk_len = num_bytes;
num_copies =
btrfs_num_copies(&state->root->fs_info->mapping_tree,
btrfs_num_copies(state->root->fs_info,
next_bytenr, state->datablock_size);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
......@@ -1582,9 +1582,21 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
struct btrfs_device *device;
length = len;
ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ,
ret = btrfs_map_block(state->root->fs_info, READ,
bytenr, &length, &multi, mirror_num);
if (ret) {
block_ctx_out->start = 0;
block_ctx_out->dev_bytenr = 0;
block_ctx_out->len = 0;
block_ctx_out->dev = NULL;
block_ctx_out->datav = NULL;
block_ctx_out->pagev = NULL;
block_ctx_out->mem_to_free = NULL;
return ret;
}
device = multi->stripes[0].dev;
block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
block_ctx_out->dev_bytenr = multi->stripes[0].physical;
......@@ -1594,8 +1606,7 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
block_ctx_out->pagev = NULL;
block_ctx_out->mem_to_free = NULL;
if (0 == ret)
kfree(multi);
kfree(multi);
if (NULL == block_ctx_out->dev) {
ret = -ENXIO;
printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
......@@ -2463,7 +2474,7 @@ static int btrfsic_process_written_superblock(
}
num_copies =
btrfs_num_copies(&state->root->fs_info->mapping_tree,
btrfs_num_copies(state->root->fs_info,
next_bytenr, BTRFS_SUPER_INFO_SIZE);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
......@@ -2960,7 +2971,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
struct btrfsic_block_data_ctx block_ctx;
int match = 0;
num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
num_copies = btrfs_num_copies(state->root->fs_info,
bytenr, state->metablock_size);
for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
......
......@@ -687,7 +687,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(root, READ, comp_bio,
mirror_num, 0);
BUG_ON(ret); /* -ENOMEM */
if (ret)
bio_endio(comp_bio, ret);
bio_put(comp_bio);
......@@ -712,7 +713,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
}
ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
BUG_ON(ret); /* -ENOMEM */
if (ret)
bio_endio(comp_bio, ret);
bio_put(comp_bio);
return 0;
......
This diff is collapsed.
This diff is collapsed.
......@@ -651,7 +651,8 @@ static int btrfs_delayed_inode_reserve_metadata(
*/
if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
BTRFS_RESERVE_NO_FLUSH);
/*
* Since we're under a transaction reserve_metadata_bytes could
* try to commit the transaction which will make it return
......@@ -686,7 +687,8 @@ static int btrfs_delayed_inode_reserve_metadata(
* reserve something strictly for us. If not be a pain and try
* to steal from the delalloc block rsv.
*/
ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
BTRFS_RESERVE_NO_FLUSH);
if (!ret)
goto out;
......@@ -1255,7 +1257,6 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
struct btrfs_delayed_node *delayed_node = NULL;
struct btrfs_root *root;
struct btrfs_block_rsv *block_rsv;
unsigned long nr = 0;
int need_requeue = 0;
int ret;
......@@ -1316,11 +1317,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
delayed_node);
mutex_unlock(&delayed_node->mutex);
nr = trans->blocks_used;
trans->block_rsv = block_rsv;
btrfs_end_transaction_dmeta(trans, root);
__btrfs_btree_balance_dirty(root, nr);
btrfs_btree_balance_dirty_nodelay(root);
free_path:
btrfs_free_path(path);
out:
......
This diff is collapsed.
/*
* Copyright (C) STRATO AG 2012. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#if !defined(__BTRFS_DEV_REPLACE__)
#define __BTRFS_DEV_REPLACE__
struct btrfs_ioctl_dev_replace_args;
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info);
int btrfs_dev_replace_start(struct btrfs_root *root,
struct btrfs_ioctl_dev_replace_args *args);
void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args);
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args);
void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace);
static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value)
{
atomic64_inc(stat_value);
}
#endif
......@@ -213,6 +213,65 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
return btrfs_match_dir_item_name(root, path, name, name_len);
}
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
const char *name, int name_len)
{
int ret;
struct btrfs_key key;
struct btrfs_dir_item *di;
int data_size;
struct extent_buffer *leaf;
int slot;
struct btrfs_path *path;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = dir;
btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
key.offset = btrfs_name_hash(name, name_len);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
/* return back any errors */
if (ret < 0)
goto out;
/* nothing found, we're safe */
if (ret > 0) {
ret = 0;
goto out;
}
/* we found an item, look for our name in the item */
di = btrfs_match_dir_item_name(root, path, name, name_len);
if (di) {
/* our exact name was found */
ret = -EEXIST;
goto out;
}
/*
* see if there is room in the item to insert this
* name
*/
data_size = sizeof(*di) + name_len + sizeof(struct btrfs_item);
leaf = path->nodes[0];
slot = path->slots[0];
if (data_size + btrfs_item_size_nr(leaf, slot) +
sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) {
ret = -EOVERFLOW;
} else {
/* plenty of insertion room */
ret = 0;
}
out:
btrfs_free_path(path);
return ret;
}
/*
* lookup a directory item based on index. 'dir' is the objectid
* we're searching in, and 'mod' tells us if you plan on deleting the
......
This diff is collapsed.
......@@ -62,8 +62,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
struct btrfs_key *location);
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
void btrfs_btree_balance_dirty(struct btrfs_root *root);
void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root);
void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
......
This diff is collapsed.
......@@ -341,12 +341,10 @@ static int insert_state(struct extent_io_tree *tree,
{
struct rb_node *node;
if (end < start) {
printk(KERN_ERR "btrfs end < start %llu %llu\n",
if (end < start)
WARN(1, KERN_ERR "btrfs end < start %llu %llu\n",
(unsigned long long)end,
(unsigned long long)start);
WARN_ON(1);
}
state->start = start;
state->end = end;
......@@ -1919,12 +1917,12 @@ static void repair_io_failure_callback(struct bio *bio, int err)
* the standard behavior is to write all copies in a raid setup. here we only
* want to write the one bad copy. so we do the mapping for ourselves and issue
* submit_bio directly.
* to avoid any synchonization issues, wait for the data after writing, which
* to avoid any synchronization issues, wait for the data after writing, which
* actually prevents the read that triggered the error from finishing.
* currently, there can be no more than two copies of every data bit. thus,
* exactly one rewrite is required.
*/
int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
u64 length, u64 logical, struct page *page,
int mirror_num)
{
......@@ -1946,7 +1944,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
bio->bi_size = 0;
map_length = length;
ret = btrfs_map_block(map_tree, WRITE, logical,
ret = btrfs_map_block(fs_info, WRITE, logical,
&map_length, &bbio, mirror_num);
if (ret) {
bio_put(bio);
......@@ -1984,14 +1982,13 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
int mirror_num)
{
struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
u64 start = eb->start;
unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
int ret = 0;
for (i = 0; i < num_pages; i++) {
struct page *p = extent_buffer_page(eb, i);
ret = repair_io_failure(map_tree, start, PAGE_CACHE_SIZE,
ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
start, p, mirror_num);
if (ret)
break;
......@@ -2010,7 +2007,7 @@ static int clean_io_failure(u64 start, struct page *page)
u64 private;
u64 private_failure;
struct io_failure_record *failrec;
struct btrfs_mapping_tree *map_tree;
struct btrfs_fs_info *fs_info;
struct extent_state *state;
int num_copies;
int did_repair = 0;
......@@ -2046,11 +2043,11 @@ static int clean_io_failure(u64 start, struct page *page)
spin_unlock(&BTRFS_I(inode)->io_tree.lock);
if (state && state->start == failrec->start) {
map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree;
num_copies = btrfs_num_copies(map_tree, failrec->logical,
failrec->len);
fs_info = BTRFS_I(inode)->root->fs_info;
num_copies = btrfs_num_copies(fs_info, failrec->logical,
failrec->len);
if (num_copies > 1) {
ret = repair_io_failure(map_tree, start, failrec->len,
ret = repair_io_failure(fs_info, start, failrec->len,
failrec->logical, page,
failrec->failed_mirror);
did_repair = !ret;
......@@ -2159,9 +2156,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
* clean_io_failure() clean all those errors at once.
*/
}
num_copies = btrfs_num_copies(
&BTRFS_I(inode)->root->fs_info->mapping_tree,
failrec->logical, failrec->len);
num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
failrec->logical, failrec->len);
if (num_copies == 1) {
/*
* we only have a single copy of the data, so don't bother with
......@@ -2466,10 +2462,6 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
return bio;
}
/*
* Since writes are async, they will only return -ENOMEM.
* Reads can return the full range of I/O error conditions.
*/
static int __must_check submit_one_bio(int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags)
{
......@@ -4721,10 +4713,9 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
}
if (start + min_len > eb->len) {
printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
"wanted %lu %lu\n", (unsigned long long)eb->start,
eb->len, start, min_len);
WARN_ON(1);
return -EINVAL;
}
......
......@@ -337,9 +337,9 @@ struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags);
struct btrfs_mapping_tree;
struct btrfs_fs_info;
int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
u64 length, u64 logical, struct page *page,
int mirror_num);
int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
......
......@@ -49,7 +49,7 @@ void extent_map_tree_init(struct extent_map_tree *tree)
struct extent_map *alloc_extent_map(void)
{
struct extent_map *em;
em = kmem_cache_alloc(extent_map_cache, GFP_NOFS);
em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS);
if (!em)
return NULL;
em->in_tree = 0;
......@@ -198,16 +198,15 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
merge = rb_entry(rb, struct extent_map, rb_node);
if (rb && mergable_maps(merge, em)) {
em->start = merge->start;
em->orig_start = merge->orig_start;
em->len += merge->len;
em->block_len += merge->block_len;
em->block_start = merge->block_start;
merge->in_tree = 0;
if (merge->generation > em->generation) {
em->mod_start = em->start;
em->mod_len = em->len;
em->generation = merge->generation;
list_move(&em->list, &tree->modified_extents);
}
em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
em->mod_start = merge->mod_start;
em->generation = max(em->generation, merge->generation);
list_move(&em->list, &tree->modified_extents);
list_del_init(&merge->list);
rb_erase(&merge->rb_node, &tree->map);
......@@ -223,11 +222,8 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
em->block_len += merge->len;
rb_erase(&merge->rb_node, &tree->map);
merge->in_tree = 0;
if (merge->generation > em->generation) {
em->mod_len = em->len;
em->generation = merge->generation;
list_move(&em->list, &tree->modified_extents);
}
em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
em->generation = max(em->generation, merge->generation);
list_del_init(&merge->list);
free_extent_map(merge);
}
......@@ -265,9 +261,9 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
em->mod_start = em->start;
em->mod_len = em->len;
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
if (test_bit(EXTENT_FLAG_FILLING, &em->flags)) {
prealloc = true;
clear_bit(EXTENT_FLAG_PREALLOC, &em->flags);
clear_bit(EXTENT_FLAG_FILLING, &em->flags);
}
try_merge_map(tree, em);
......
......@@ -14,6 +14,7 @@
#define EXTENT_FLAG_VACANCY 2 /* no file extent item found */
#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */
#define EXTENT_FLAG_LOGGING 4 /* Logging this extent */
#define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */
struct extent_map {
struct rb_node rb_node;
......@@ -24,6 +25,7 @@ struct extent_map {
u64 mod_start;
u64 mod_len;
u64 orig_start;
u64 orig_block_len;
u64 block_start;
u64 block_len;
u64 generation;
......
......@@ -133,7 +133,6 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
return ERR_PTR(ret);
}
int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid,
......@@ -151,6 +150,26 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
return ret;
}
u64 btrfs_file_extent_length(struct btrfs_path *path)
{
int extent_type;
struct btrfs_file_extent_item *fi;
u64 len;
fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(path->nodes[0], fi);
if (extent_type == BTRFS_FILE_EXTENT_REG ||
extent_type == BTRFS_FILE_EXTENT_PREALLOC)
len = btrfs_file_extent_num_bytes(path->nodes[0], fi);
else if (extent_type == BTRFS_FILE_EXTENT_INLINE)
len = btrfs_file_extent_inline_len(path->nodes[0], fi);
else
BUG();
return len;
}
static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
struct inode *inode, struct bio *bio,
......
This diff is collapsed.
......@@ -307,7 +307,6 @@ static void io_ctl_unmap_page(struct io_ctl *io_ctl)
static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
{
WARN_ON(io_ctl->cur);
BUG_ON(io_ctl->index >= io_ctl->num_pages);
io_ctl->page = io_ctl->pages[io_ctl->index++];
io_ctl->cur = kmap(io_ctl->page);
......@@ -1250,18 +1249,13 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl,
* if previous extent entry covers the offset,
* we should return it instead of the bitmap entry
*/
n = &entry->offset_index;
while (1) {
n = rb_prev(n);
if (!n)
break;
n = rb_prev(&entry->offset_index);
if (n) {
prev = rb_entry(n, struct btrfs_free_space,
offset_index);
if (!prev->bitmap) {
if (prev->offset + prev->bytes > offset)
entry = prev;
break;
}
if (!prev->bitmap &&
prev->offset + prev->bytes > offset)
entry = prev;
}
}
return entry;
......@@ -1287,18 +1281,13 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl,
}
if (entry->bitmap) {
n = &entry->offset_index;
while (1) {
n = rb_prev(n);
if (!n)
break;
n = rb_prev(&entry->offset_index);
if (n) {
prev = rb_entry(n, struct btrfs_free_space,
offset_index);
if (!prev->bitmap) {
if (prev->offset + prev->bytes > offset)
return prev;
break;
}
if (!prev->bitmap &&
prev->offset + prev->bytes > offset)
return prev;
}
if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
return entry;
......@@ -1364,7 +1353,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
u64 bitmap_bytes;
u64 extent_bytes;
u64 size = block_group->key.offset;
u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
BUG_ON(ctl->total_bitmaps > max_bitmaps);
......@@ -1650,8 +1639,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
* some block groups are so tiny they can't be enveloped by a bitmap, so
* don't even bother to create a bitmap for this
*/
if (BITS_PER_BITMAP * block_group->sectorsize >
block_group->key.offset)
if (BITS_PER_BITMAP * ctl->unit > block_group->key.offset)
return false;
return true;
......@@ -2298,10 +2286,10 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
unsigned long total_found = 0;
int ret;
i = offset_to_bit(entry->offset, block_group->sectorsize,
i = offset_to_bit(entry->offset, ctl->unit,
max_t(u64, offset, entry->offset));
want_bits = bytes_to_bits(bytes, block_group->sectorsize);
min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
want_bits = bytes_to_bits(bytes, ctl->unit);
min_bits = bytes_to_bits(min_bytes, ctl->unit);
again:
found_bits = 0;
......@@ -2325,23 +2313,22 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
total_found += found_bits;
if (cluster->max_size < found_bits * block_group->sectorsize)
cluster->max_size = found_bits * block_group->sectorsize;
if (cluster->max_size < found_bits * ctl->unit)
cluster->max_size = found_bits * ctl->unit;
if (total_found < want_bits || cluster->max_size < cont1_bytes) {
i = next_zero + 1;
goto again;
}
cluster->window_start = start * block_group->sectorsize +
entry->offset;
cluster->window_start = start * ctl->unit + entry->offset;
rb_erase(&entry->offset_index, &ctl->free_space_offset);
ret = tree_insert_offset(&cluster->root, entry->offset,
&entry->offset_index, 1);
BUG_ON(ret); /* -EEXIST; Logic error */
trace_btrfs_setup_cluster(block_group, cluster,
total_found * block_group->sectorsize, 1);
total_found * ctl->unit, 1);
return 0;
}
......
......@@ -434,8 +434,9 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
* 3 items for pre-allocation
*/
trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8);
ret = btrfs_block_rsv_add_noflush(root, trans->block_rsv,
trans->bytes_reserved);
ret = btrfs_block_rsv_add(root, trans->block_rsv,
trans->bytes_reserved,
BTRFS_RESERVE_NO_FLUSH);
if (ret)
goto out;
trace_btrfs_space_reservation(root->fs_info, "ino_cache",
......
This diff is collapsed.
This diff is collapsed.
......@@ -30,6 +30,8 @@ struct btrfs_ioctl_vol_args {
char name[BTRFS_PATH_NAME_MAX + 1];
};
#define BTRFS_DEVICE_PATH_NAME_MAX 1024
#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
......@@ -123,7 +125,48 @@ struct btrfs_ioctl_scrub_args {
__u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8];
};
#define BTRFS_DEVICE_PATH_NAME_MAX 1024
#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0
#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1
struct btrfs_ioctl_dev_replace_start_params {
__u64 srcdevid; /* in, if 0, use srcdev_name instead */
__u64 cont_reading_from_srcdev_mode; /* in, see #define
* above */
__u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */
__u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */
};
#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0
#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1
#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2
#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3
#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4
struct btrfs_ioctl_dev_replace_status_params {
__u64 replace_state; /* out, see #define above */
__u64 progress_1000; /* out, 0 <= x <= 1000 */
__u64 time_started; /* out, seconds since 1-Jan-1970 */
__u64 time_stopped; /* out, seconds since 1-Jan-1970 */
__u64 num_write_errors; /* out */
__u64 num_uncorrectable_read_errors; /* out */
};
#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0
#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1
#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2
#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0
#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1
#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2
struct btrfs_ioctl_dev_replace_args {
__u64 cmd; /* in */
__u64 result; /* out */
union {
struct btrfs_ioctl_dev_replace_start_params start;
struct btrfs_ioctl_dev_replace_status_params status;
}; /* in/out */
__u64 spare[64];
};
struct btrfs_ioctl_dev_info_args {
__u64 devid; /* in/out */
__u8 uuid[BTRFS_UUID_SIZE]; /* in/out */
......@@ -453,4 +496,7 @@ struct btrfs_ioctl_send_args {
struct btrfs_ioctl_qgroup_limit_args)
#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
struct btrfs_ioctl_get_dev_stats)
#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
struct btrfs_ioctl_dev_replace_args)
#endif
/*
* Copyright (C) 2012 Fujitsu. All rights reserved.
* Written by Miao Xie <miaox@cn.fujitsu.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_MATH_H
#define __BTRFS_MATH_H
#include <asm/div64.h>
static inline u64 div_factor(u64 num, int factor)
{
if (factor == 10)
return num;
num *= factor;
do_div(num, 10);
return num;
}
static inline u64 div_factor_fine(u64 num, int factor)
{
if (factor == 100)
return num;
num *= factor;
do_div(num, 100);
return num;
}
#endif
This diff is collapsed.
......@@ -128,8 +128,11 @@ struct btrfs_ordered_extent {
struct list_head root_extent_list;
struct btrfs_work work;
};
struct completion completion;
struct btrfs_work flush_work;
struct list_head work_list;
};
/*
* calculates the total size you need to allocate for an ordered sum
......@@ -186,7 +189,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
void btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
......
......@@ -297,6 +297,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
case BTRFS_DEV_STATS_KEY:
printk(KERN_INFO "\t\tdevice stats\n");
break;
case BTRFS_DEV_REPLACE_KEY:
printk(KERN_INFO "\t\tdev replace\n");
break;
};
}
}
......
......@@ -27,6 +27,7 @@
#include "volumes.h"
#include "disk-io.h"
#include "transaction.h"
#include "dev-replace.h"
#undef DEBUG
......@@ -323,7 +324,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
struct reada_extent *re = NULL;
struct reada_extent *re_exist = NULL;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
struct btrfs_bio *bbio = NULL;
struct btrfs_device *dev;
struct btrfs_device *prev_dev;
......@@ -332,6 +332,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
int nzones = 0;
int i;
unsigned long index = logical >> PAGE_CACHE_SHIFT;
int dev_replace_is_ongoing;
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
......@@ -358,7 +359,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
* map block
*/
length = blocksize;
ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, &bbio, 0);
ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, &length,
&bbio, 0);
if (ret || !bbio || length < blocksize)
goto error;
......@@ -393,6 +395,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
}
/* insert extent in reada_tree + all per-device trees, all or nothing */
btrfs_dev_replace_lock(&fs_info->dev_replace);
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re);
if (ret == -EEXIST) {
......@@ -400,13 +403,17 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
BUG_ON(!re_exist);
re_exist->refcnt++;
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error;
}
if (ret) {
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error;
}
prev_dev = NULL;
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
&fs_info->dev_replace);
for (i = 0; i < nzones; ++i) {
dev = bbio->stripes[i].dev;
if (dev == prev_dev) {
......@@ -419,21 +426,36 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
*/
continue;
}
if (!dev->bdev) {
/* cannot read ahead on missing device */
continue;
}
if (dev_replace_is_ongoing &&
dev == fs_info->dev_replace.tgtdev) {
/*
* as this device is selected for reading only as
* a last resort, skip it for read ahead.
*/
continue;
}
prev_dev = dev;
ret = radix_tree_insert(&dev->reada_extents, index, re);
if (ret) {
while (--i >= 0) {
dev = bbio->stripes[i].dev;
BUG_ON(dev == NULL);
/* ignore whether the entry was inserted */
radix_tree_delete(&dev->reada_extents, index);
}
BUG_ON(fs_info == NULL);
radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error;
}
}
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
kfree(bbio);
return re;
......@@ -915,7 +937,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
generation = btrfs_header_generation(node);
free_extent_buffer(node);
reada_add_block(rc, start, &max_key, level, generation);
if (reada_add_block(rc, start, &max_key, level, generation)) {
kfree(rc);
return ERR_PTR(-ENOMEM);
}
reada_start_machine(root->fs_info);
......
This diff is collapsed.
......@@ -548,9 +548,9 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct btrfs_root_item *item = &root->root_item;
struct timespec ct = CURRENT_TIME;
spin_lock(&root->root_times_lock);
spin_lock(&root->root_item_lock);
item->ctransid = cpu_to_le64(trans->transid);
item->ctime.sec = cpu_to_le64(ct.tv_sec);
item->ctime.nsec = cpu_to_le32(ct.tv_nsec);
spin_unlock(&root->root_times_lock);
spin_unlock(&root->root_item_lock);
}
This diff is collapsed.
......@@ -4397,9 +4397,9 @@ static int full_send_tree(struct send_ctx *sctx)
if (!path)
return -ENOMEM;
spin_lock(&send_root->root_times_lock);
spin_lock(&send_root->root_item_lock);
start_ctransid = btrfs_root_ctransid(&send_root->root_item);
spin_unlock(&send_root->root_times_lock);
spin_unlock(&send_root->root_item_lock);
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
......@@ -4422,9 +4422,9 @@ static int full_send_tree(struct send_ctx *sctx)
* Make sure the tree has not changed after re-joining. We detect this
* by comparing start_ctransid and ctransid. They should always match.
*/
spin_lock(&send_root->root_times_lock);
spin_lock(&send_root->root_item_lock);
ctransid = btrfs_root_ctransid(&send_root->root_item);
spin_unlock(&send_root->root_times_lock);
spin_unlock(&send_root->root_item_lock);
if (ctransid != start_ctransid) {
WARN(1, KERN_WARNING "btrfs: the root that you're trying to "
......
This diff is collapsed.
This diff is collapsed.
......@@ -105,7 +105,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
int num_items);
struct btrfs_trans_handle *btrfs_start_transaction_noflush(
struct btrfs_trans_handle *btrfs_start_transaction_lflush(
struct btrfs_root *root, int num_items);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment