Commit a22180d2 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs update from Chris Mason:
 "A big set of fixes and features.

  In terms of line count, most of the code comes from Stefan, who added
  the ability to replace a single drive in place.  This is different
  from how btrfs normally replaces drives, and is much much much faster.

  Josef is plowing through our synchronous write performance.  This pull
  request does not include the DIO_OWN_WAITING patch that was discussed
  on the list, but it has a number of other improvements to cut down our
  latencies and CPU time during fsync/O_DIRECT writes.

  Miao Xie has a big series of fixes and is spreading out ordered
  operations over more CPUs.  This improves performance and reduces
  contention.

  I've put in fixes for error handling around hash collisions.  These
  are going back to individual stable kernels as I test against them.

  Otherwise we have a lot of fixes and cleanups, thanks everyone!
  raid5/6 is being rebased against the device replacement code.  I'll
  have it posted this Friday along with a nice series of benchmarks."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (115 commits)
  Btrfs: fix a bug of per-file nocow
  Btrfs: fix hash overflow handling
  Btrfs: don't take inode delalloc mutex if we're a free space inode
  Btrfs: fix autodefrag and umount lockup
  Btrfs: fix permissions of empty files not affected by umask
  Btrfs: put raid properties into global table
  Btrfs: fix BUG() in scrub when first superblock reading gives EIO
  Btrfs: do not call file_update_time in aio_write
  Btrfs: only unlock and relock if we have to
  Btrfs: use tokens where we can in the tree log
  Btrfs: optimize leaf_space_used
  Btrfs: don't memset new tokens
  Btrfs: only clear dirty on the buffer if it is marked as dirty
  Btrfs: move checks in set_page_dirty under DEBUG
  Btrfs: log changed inodes based on the extent map tree
  Btrfs: add path->really_keep_locks
  Btrfs: do not mark ems as prealloc if we are writing to them
  Btrfs: keep track of the extents original block length
  Btrfs: inline csums if we're fsyncing
  Btrfs: don't bother copying if we're only logging the inode
  ...
parents 2d4dce00 213490b3
......@@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
......@@ -121,6 +121,8 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
ret = posix_acl_equiv_mode(acl, &inode->i_mode);
if (ret < 0)
return ret;
if (ret == 0)
acl = NULL;
}
ret = 0;
break;
......
......@@ -461,6 +461,7 @@ static int __merge_refs(struct list_head *head, int mode)
pos2 = n2, n2 = pos2->next) {
struct __prelim_ref *ref2;
struct __prelim_ref *xchg;
struct extent_inode_elem *eie;
ref2 = list_entry(pos2, struct __prelim_ref, list);
......@@ -472,12 +473,20 @@ static int __merge_refs(struct list_head *head, int mode)
ref1 = ref2;
ref2 = xchg;
}
ref1->count += ref2->count;
} else {
if (ref1->parent != ref2->parent)
continue;
ref1->count += ref2->count;
}
eie = ref1->inode_list;
while (eie && eie->next)
eie = eie->next;
if (eie)
eie->next = ref2->inode_list;
else
ref1->inode_list = ref2->inode_list;
ref1->count += ref2->count;
list_del(&ref2->list);
kfree(ref2);
}
......@@ -890,8 +899,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
while (!list_empty(&prefs)) {
ref = list_first_entry(&prefs, struct __prelim_ref, list);
list_del(&ref->list);
if (ref->count < 0)
WARN_ON(1);
WARN_ON(ref->count < 0);
if (ref->count && ref->root_id && ref->parent == 0) {
/* no parent == root of tree */
ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
......
......@@ -39,6 +39,7 @@
#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
#define BTRFS_INODE_NEEDS_FULL_SYNC 7
#define BTRFS_INODE_COPY_EVERYTHING 8
/* in memory btrfs inode */
struct btrfs_inode {
......@@ -90,6 +91,9 @@ struct btrfs_inode {
unsigned long runtime_flags;
/* Keep track of who's O_SYNC/fsycing currently */
atomic_t sync_writers;
/* full 64 bit generation number, struct vfs_inode doesn't have a big
* enough field for this.
*/
......
......@@ -137,7 +137,7 @@ struct btrfsic_block {
unsigned int never_written:1; /* block was added because it was
* referenced, not because it was
* written */
unsigned int mirror_num:2; /* large enough to hold
unsigned int mirror_num; /* large enough to hold
* BTRFS_SUPER_MIRROR_MAX */
struct btrfsic_dev_state *dev_state;
u64 dev_bytenr; /* key, physical byte num on disk */
......@@ -723,7 +723,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
}
num_copies =
btrfs_num_copies(&state->root->fs_info->mapping_tree,
btrfs_num_copies(state->root->fs_info,
next_bytenr, state->metablock_size);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
......@@ -903,7 +903,7 @@ static int btrfsic_process_superblock_dev_mirror(
}
num_copies =
btrfs_num_copies(&state->root->fs_info->mapping_tree,
btrfs_num_copies(state->root->fs_info,
next_bytenr, state->metablock_size);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
......@@ -1287,7 +1287,7 @@ static int btrfsic_create_link_to_next_block(
*next_blockp = NULL;
if (0 == *num_copiesp) {
*num_copiesp =
btrfs_num_copies(&state->root->fs_info->mapping_tree,
btrfs_num_copies(state->root->fs_info,
next_bytenr, state->metablock_size);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
......@@ -1489,7 +1489,7 @@ static int btrfsic_handle_extent_data(
chunk_len = num_bytes;
num_copies =
btrfs_num_copies(&state->root->fs_info->mapping_tree,
btrfs_num_copies(state->root->fs_info,
next_bytenr, state->datablock_size);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
......@@ -1582,9 +1582,21 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
struct btrfs_device *device;
length = len;
ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ,
ret = btrfs_map_block(state->root->fs_info, READ,
bytenr, &length, &multi, mirror_num);
if (ret) {
block_ctx_out->start = 0;
block_ctx_out->dev_bytenr = 0;
block_ctx_out->len = 0;
block_ctx_out->dev = NULL;
block_ctx_out->datav = NULL;
block_ctx_out->pagev = NULL;
block_ctx_out->mem_to_free = NULL;
return ret;
}
device = multi->stripes[0].dev;
block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
block_ctx_out->dev_bytenr = multi->stripes[0].physical;
......@@ -1594,7 +1606,6 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
block_ctx_out->pagev = NULL;
block_ctx_out->mem_to_free = NULL;
if (0 == ret)
kfree(multi);
if (NULL == block_ctx_out->dev) {
ret = -ENXIO;
......@@ -2463,7 +2474,7 @@ static int btrfsic_process_written_superblock(
}
num_copies =
btrfs_num_copies(&state->root->fs_info->mapping_tree,
btrfs_num_copies(state->root->fs_info,
next_bytenr, BTRFS_SUPER_INFO_SIZE);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
......@@ -2960,7 +2971,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
struct btrfsic_block_data_ctx block_ctx;
int match = 0;
num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
num_copies = btrfs_num_copies(state->root->fs_info,
bytenr, state->metablock_size);
for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
......
......@@ -687,7 +687,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(root, READ, comp_bio,
mirror_num, 0);
BUG_ON(ret); /* -ENOMEM */
if (ret)
bio_endio(comp_bio, ret);
bio_put(comp_bio);
......@@ -712,7 +713,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
}
ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
BUG_ON(ret); /* -ENOMEM */
if (ret)
bio_endio(comp_bio, ret);
bio_put(comp_bio);
return 0;
......
This diff is collapsed.
This diff is collapsed.
......@@ -651,7 +651,8 @@ static int btrfs_delayed_inode_reserve_metadata(
*/
if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
BTRFS_RESERVE_NO_FLUSH);
/*
* Since we're under a transaction reserve_metadata_bytes could
* try to commit the transaction which will make it return
......@@ -686,7 +687,8 @@ static int btrfs_delayed_inode_reserve_metadata(
* reserve something strictly for us. If not be a pain and try
* to steal from the delalloc block rsv.
*/
ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
BTRFS_RESERVE_NO_FLUSH);
if (!ret)
goto out;
......@@ -1255,7 +1257,6 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
struct btrfs_delayed_node *delayed_node = NULL;
struct btrfs_root *root;
struct btrfs_block_rsv *block_rsv;
unsigned long nr = 0;
int need_requeue = 0;
int ret;
......@@ -1316,11 +1317,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
delayed_node);
mutex_unlock(&delayed_node->mutex);
nr = trans->blocks_used;
trans->block_rsv = block_rsv;
btrfs_end_transaction_dmeta(trans, root);
__btrfs_btree_balance_dirty(root, nr);
btrfs_btree_balance_dirty_nodelay(root);
free_path:
btrfs_free_path(path);
out:
......
This diff is collapsed.
/*
* Copyright (C) STRATO AG 2012. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#if !defined(__BTRFS_DEV_REPLACE__)
#define __BTRFS_DEV_REPLACE__
struct btrfs_ioctl_dev_replace_args;
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info);
int btrfs_dev_replace_start(struct btrfs_root *root,
struct btrfs_ioctl_dev_replace_args *args);
void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args);
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args);
void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace);
static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value)
{
atomic64_inc(stat_value);
}
#endif
......@@ -213,6 +213,65 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
return btrfs_match_dir_item_name(root, path, name, name_len);
}
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
const char *name, int name_len)
{
int ret;
struct btrfs_key key;
struct btrfs_dir_item *di;
int data_size;
struct extent_buffer *leaf;
int slot;
struct btrfs_path *path;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = dir;
btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
key.offset = btrfs_name_hash(name, name_len);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
/* return back any errors */
if (ret < 0)
goto out;
/* nothing found, we're safe */
if (ret > 0) {
ret = 0;
goto out;
}
/* we found an item, look for our name in the item */
di = btrfs_match_dir_item_name(root, path, name, name_len);
if (di) {
/* our exact name was found */
ret = -EEXIST;
goto out;
}
/*
* see if there is room in the item to insert this
* name
*/
data_size = sizeof(*di) + name_len + sizeof(struct btrfs_item);
leaf = path->nodes[0];
slot = path->slots[0];
if (data_size + btrfs_item_size_nr(leaf, slot) +
sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) {
ret = -EOVERFLOW;
} else {
/* plenty of insertion room */
ret = 0;
}
out:
btrfs_free_path(path);
return ret;
}
/*
* lookup a directory item based on index. 'dir' is the objectid
* we're searching in, and 'mod' tells us if you plan on deleting the
......
......@@ -45,6 +45,7 @@
#include "inode-map.h"
#include "check-integrity.h"
#include "rcu-string.h"
#include "dev-replace.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
......@@ -387,7 +388,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
break;
num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
num_copies = btrfs_num_copies(root->fs_info,
eb->start, eb->len);
if (num_copies == 1)
break;
......@@ -852,11 +853,16 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
int ret;
/*
* when we're called for a write, we're already in the async
* submission context. Just jump into btrfs_map_bio
*/
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
if (ret)
bio_endio(bio, ret);
return ret;
}
static int check_async_write(struct inode *inode, unsigned long bio_flags)
......@@ -878,7 +884,6 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int ret;
if (!(rw & REQ_WRITE)) {
/*
* called for a read, do the setup so that checksum validation
* can happen in the async kernel threads
......@@ -886,26 +891,32 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
bio, 1);
if (ret)
return ret;
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
goto out_w_error;
ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
mirror_num, 0);
} else if (!async) {
ret = btree_csum_one_bio(bio);
if (ret)
return ret;
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
goto out_w_error;
ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
mirror_num, 0);
}
} else {
/*
* kthread helpers are used to submit writes so that checksumming
* can happen in parallel across all CPUs
* kthread helpers are used to submit writes so that
* checksumming can happen in parallel across all CPUs
*/
return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
inode, rw, bio, mirror_num, 0,
bio_offset,
__btree_submit_bio_start,
__btree_submit_bio_done);
}
if (ret) {
out_w_error:
bio_endio(bio, ret);
}
return ret;
}
#ifdef CONFIG_MIGRATION
......@@ -990,6 +1001,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
static int btree_set_page_dirty(struct page *page)
{
#ifdef DEBUG
struct extent_buffer *eb;
BUG_ON(!PagePrivate(page));
......@@ -998,6 +1010,7 @@ static int btree_set_page_dirty(struct page *page)
BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
BUG_ON(!atomic_read(&eb->refs));
btrfs_assert_tree_locked(eb);
#endif
return __set_page_dirty_nobuffers(page);
}
......@@ -1129,12 +1142,12 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
root->fs_info->dirty_metadata_bytes);
}
spin_unlock(&root->fs_info->delalloc_lock);
}
/* ugh, clear_extent_buffer_dirty needs to lock the page */
btrfs_set_lock_blocking(buf);
clear_extent_buffer_dirty(buf);
}
}
}
static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
......@@ -1193,7 +1206,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->root_key.objectid = objectid;
root->anon_dev = 0;
spin_lock_init(&root->root_times_lock);
spin_lock_init(&root->root_item_lock);
}
static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
......@@ -2131,6 +2144,11 @@ int open_ctree(struct super_block *sb,
init_rwsem(&fs_info->extent_commit_sem);
init_rwsem(&fs_info->cleanup_work_sem);
init_rwsem(&fs_info->subvol_sem);
fs_info->dev_replace.lock_owner = 0;
atomic_set(&fs_info->dev_replace.nesting_level, 0);
mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
mutex_init(&fs_info->dev_replace.lock_management_lock);
mutex_init(&fs_info->dev_replace.lock);
spin_lock_init(&fs_info->qgroup_lock);
fs_info->qgroup_tree = RB_ROOT;
......@@ -2279,6 +2297,10 @@ int open_ctree(struct super_block *sb,
fs_info->thread_pool_size,
&fs_info->generic_worker);
btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc",
fs_info->thread_pool_size,
&fs_info->generic_worker);
btrfs_init_workers(&fs_info->submit_workers, "submit",
min_t(u64, fs_devices->num_devices,
fs_info->thread_pool_size),
......@@ -2350,6 +2372,7 @@ int open_ctree(struct super_block *sb,
ret |= btrfs_start_workers(&fs_info->delayed_workers);
ret |= btrfs_start_workers(&fs_info->caching_workers);
ret |= btrfs_start_workers(&fs_info->readahead_workers);
ret |= btrfs_start_workers(&fs_info->flush_workers);
if (ret) {
err = -ENOMEM;
goto fail_sb_buffer;
......@@ -2418,7 +2441,11 @@ int open_ctree(struct super_block *sb,
goto fail_tree_roots;
}
btrfs_close_extra_devices(fs_devices);
/*
* keep the device that is marked to be the target device for the
* dev_replace procedure
*/
btrfs_close_extra_devices(fs_info, fs_devices, 0);
if (!fs_devices->latest_bdev) {
printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
......@@ -2490,6 +2517,14 @@ int open_ctree(struct super_block *sb,
goto fail_block_groups;
}
ret = btrfs_init_dev_replace(fs_info);
if (ret) {
pr_err("btrfs: failed to init dev_replace: %d\n", ret);
goto fail_block_groups;
}
btrfs_close_extra_devices(fs_info, fs_devices, 1);
ret = btrfs_init_space_info(fs_info);
if (ret) {
printk(KERN_ERR "Failed to initial space info: %d\n", ret);
......@@ -2503,6 +2538,13 @@ int open_ctree(struct super_block *sb,
}
fs_info->num_tolerated_disk_barrier_failures =
btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
if (fs_info->fs_devices->missing_devices >
fs_info->num_tolerated_disk_barrier_failures &&
!(sb->s_flags & MS_RDONLY)) {
printk(KERN_WARNING
"Btrfs: too many missing devices, writeable mount is not allowed\n");
goto fail_block_groups;
}
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
"btrfs-cleaner");
......@@ -2631,6 +2673,13 @@ int open_ctree(struct super_block *sb,
return ret;
}
ret = btrfs_resume_dev_replace_async(fs_info);
if (ret) {
pr_warn("btrfs: failed to resume dev_replace\n");
close_ctree(tree_root);
return ret;
}
return 0;
fail_qgroup:
......@@ -2667,6 +2716,7 @@ int open_ctree(struct super_block *sb,
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->delayed_workers);
btrfs_stop_workers(&fs_info->caching_workers);
btrfs_stop_workers(&fs_info->flush_workers);
fail_alloc:
fail_iput:
btrfs_mapping_tree_free(&fs_info->mapping_tree);
......@@ -3270,16 +3320,18 @@ int close_ctree(struct btrfs_root *root)
smp_mb();
/* pause restriper - we want to resume on mount */
btrfs_pause_balance(root->fs_info);
btrfs_pause_balance(fs_info);
btrfs_scrub_cancel(root);
btrfs_dev_replace_suspend_for_unmount(fs_info);
btrfs_scrub_cancel(fs_info);
/* wait for any defraggers to finish */
wait_event(fs_info->transaction_wait,
(atomic_read(&fs_info->defrag_running) == 0));
/* clear out the rbtree of defraggable inodes */
btrfs_run_defrag_inodes(fs_info);
btrfs_cleanup_defrag_inodes(fs_info);
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
......@@ -3339,6 +3391,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->delayed_workers);
btrfs_stop_workers(&fs_info->caching_workers);
btrfs_stop_workers(&fs_info->readahead_workers);
btrfs_stop_workers(&fs_info->flush_workers);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_test_opt(root, CHECK_INTEGRITY))
......@@ -3383,14 +3436,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
int was_dirty;
btrfs_assert_tree_locked(buf);
if (transid != root->fs_info->generation) {
printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
if (transid != root->fs_info->generation)
WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "
"found %llu running %llu\n",
(unsigned long long)buf->start,
(unsigned long long)transid,
(unsigned long long)root->fs_info->generation);
WARN_ON(1);
}
was_dirty = set_extent_buffer_dirty(buf);
if (!was_dirty) {
spin_lock(&root->fs_info->delalloc_lock);
......@@ -3399,7 +3450,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
}
}
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
int flush_delayed)
{
/*
* looks as though older kernels can get into trouble with
......@@ -3411,6 +3463,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
if (current->flags & PF_MEMALLOC)
return;
if (flush_delayed)
btrfs_balance_delayed_items(root);
num_dirty = root->fs_info->dirty_metadata_bytes;
......@@ -3422,25 +3475,14 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
return;
}
void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
void btrfs_btree_balance_dirty(struct btrfs_root *root)
{
/*
* looks as though older kernels can get into trouble with
* this code, they end up stuck in balance_dirty_pages forever
*/
u64 num_dirty;
unsigned long thresh = 32 * 1024 * 1024;
if (current->flags & PF_MEMALLOC)
return;
num_dirty = root->fs_info->dirty_metadata_bytes;
__btrfs_btree_balance_dirty(root, 1);
}
if (num_dirty > thresh) {
balance_dirty_pages_ratelimited(
root->fs_info->btree_inode->i_mapping);
}
return;
void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root)
{
__btrfs_btree_balance_dirty(root, 0);
}
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
......
......@@ -62,8 +62,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
struct btrfs_key *location);
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
void btrfs_btree_balance_dirty(struct btrfs_root *root);
void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root);
void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
......
This diff is collapsed.
......@@ -341,12 +341,10 @@ static int insert_state(struct extent_io_tree *tree,
{
struct rb_node *node;
if (end < start) {
printk(KERN_ERR "btrfs end < start %llu %llu\n",
if (end < start)
WARN(1, KERN_ERR "btrfs end < start %llu %llu\n",
(unsigned long long)end,
(unsigned long long)start);
WARN_ON(1);
}
state->start = start;
state->end = end;
......@@ -1919,12 +1917,12 @@ static void repair_io_failure_callback(struct bio *bio, int err)
* the standard behavior is to write all copies in a raid setup. here we only
* want to write the one bad copy. so we do the mapping for ourselves and issue
* submit_bio directly.
* to avoid any synchonization issues, wait for the data after writing, which
* to avoid any synchronization issues, wait for the data after writing, which
* actually prevents the read that triggered the error from finishing.
* currently, there can be no more than two copies of every data bit. thus,
* exactly one rewrite is required.
*/
int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
u64 length, u64 logical, struct page *page,
int mirror_num)
{
......@@ -1946,7 +1944,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
bio->bi_size = 0;
map_length = length;
ret = btrfs_map_block(map_tree, WRITE, logical,
ret = btrfs_map_block(fs_info, WRITE, logical,
&map_length, &bbio, mirror_num);
if (ret) {
bio_put(bio);
......@@ -1984,14 +1982,13 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
int mirror_num)
{
struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
u64 start = eb->start;
unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
int ret = 0;
for (i = 0; i < num_pages; i++) {
struct page *p = extent_buffer_page(eb, i);
ret = repair_io_failure(map_tree, start, PAGE_CACHE_SIZE,
ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
start, p, mirror_num);
if (ret)
break;
......@@ -2010,7 +2007,7 @@ static int clean_io_failure(u64 start, struct page *page)
u64 private;
u64 private_failure;
struct io_failure_record *failrec;
struct btrfs_mapping_tree *map_tree;
struct btrfs_fs_info *fs_info;
struct extent_state *state;
int num_copies;
int did_repair = 0;
......@@ -2046,11 +2043,11 @@ static int clean_io_failure(u64 start, struct page *page)
spin_unlock(&BTRFS_I(inode)->io_tree.lock);
if (state && state->start == failrec->start) {
map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree;
num_copies = btrfs_num_copies(map_tree, failrec->logical,
fs_info = BTRFS_I(inode)->root->fs_info;
num_copies = btrfs_num_copies(fs_info, failrec->logical,
failrec->len);
if (num_copies > 1) {
ret = repair_io_failure(map_tree, start, failrec->len,
ret = repair_io_failure(fs_info, start, failrec->len,
failrec->logical, page,
failrec->failed_mirror);
did_repair = !ret;
......@@ -2159,8 +2156,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
* clean_io_failure() clean all those errors at once.
*/
}
num_copies = btrfs_num_copies(
&BTRFS_I(inode)->root->fs_info->mapping_tree,
num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
failrec->logical, failrec->len);
if (num_copies == 1) {
/*
......@@ -2466,10 +2462,6 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
return bio;
}
/*
* Since writes are async, they will only return -ENOMEM.
* Reads can return the full range of I/O error conditions.
*/
static int __must_check submit_one_bio(int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags)
{
......@@ -4721,10 +4713,9 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
}
if (start + min_len > eb->len) {
printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
"wanted %lu %lu\n", (unsigned long long)eb->start,
eb->len, start, min_len);
WARN_ON(1);
return -EINVAL;
}
......
......@@ -337,9 +337,9 @@ struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags);
struct btrfs_mapping_tree;
struct btrfs_fs_info;
int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
u64 length, u64 logical, struct page *page,
int mirror_num);
int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
......
......@@ -49,7 +49,7 @@ void extent_map_tree_init(struct extent_map_tree *tree)
struct extent_map *alloc_extent_map(void)
{
struct extent_map *em;
em = kmem_cache_alloc(extent_map_cache, GFP_NOFS);
em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS);
if (!em)
return NULL;
em->in_tree = 0;
......@@ -198,16 +198,15 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
merge = rb_entry(rb, struct extent_map, rb_node);
if (rb && mergable_maps(merge, em)) {
em->start = merge->start;
em->orig_start = merge->orig_start;
em->len += merge->len;
em->block_len += merge->block_len;
em->block_start = merge->block_start;
merge->in_tree = 0;
if (merge->generation > em->generation) {
em->mod_start = em->start;
em->mod_len = em->len;
em->generation = merge->generation;
em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
em->mod_start = merge->mod_start;
em->generation = max(em->generation, merge->generation);
list_move(&em->list, &tree->modified_extents);
}
list_del_init(&merge->list);
rb_erase(&merge->rb_node, &tree->map);
......@@ -223,11 +222,8 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
em->block_len += merge->len;
rb_erase(&merge->rb_node, &tree->map);
merge->in_tree = 0;
if (merge->generation > em->generation) {
em->mod_len = em->len;
em->generation = merge->generation;
list_move(&em->list, &tree->modified_extents);
}
em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
em->generation = max(em->generation, merge->generation);
list_del_init(&merge->list);
free_extent_map(merge);
}
......@@ -265,9 +261,9 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
em->mod_start = em->start;
em->mod_len = em->len;
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
if (test_bit(EXTENT_FLAG_FILLING, &em->flags)) {
prealloc = true;
clear_bit(EXTENT_FLAG_PREALLOC, &em->flags);
clear_bit(EXTENT_FLAG_FILLING, &em->flags);
}
try_merge_map(tree, em);
......
......@@ -14,6 +14,7 @@
#define EXTENT_FLAG_VACANCY 2 /* no file extent item found */
#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */
#define EXTENT_FLAG_LOGGING 4 /* Logging this extent */
#define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */
struct extent_map {
struct rb_node rb_node;
......@@ -24,6 +25,7 @@ struct extent_map {
u64 mod_start;
u64 mod_len;
u64 orig_start;
u64 orig_block_len;
u64 block_start;
u64 block_len;
u64 generation;
......
......@@ -133,7 +133,6 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
return ERR_PTR(ret);
}
int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid,
......@@ -151,6 +150,26 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
return ret;
}
u64 btrfs_file_extent_length(struct btrfs_path *path)
{
int extent_type;
struct btrfs_file_extent_item *fi;
u64 len;
fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(path->nodes[0], fi);
if (extent_type == BTRFS_FILE_EXTENT_REG ||
extent_type == BTRFS_FILE_EXTENT_PREALLOC)
len = btrfs_file_extent_num_bytes(path->nodes[0], fi);
else if (extent_type == BTRFS_FILE_EXTENT_INLINE)
len = btrfs_file_extent_inline_len(path->nodes[0], fi);
else
BUG();
return len;
}
static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
struct inode *inode, struct bio *bio,
......
This diff is collapsed.
......@@ -307,7 +307,6 @@ static void io_ctl_unmap_page(struct io_ctl *io_ctl)
static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
{
WARN_ON(io_ctl->cur);
BUG_ON(io_ctl->index >= io_ctl->num_pages);
io_ctl->page = io_ctl->pages[io_ctl->index++];
io_ctl->cur = kmap(io_ctl->page);
......@@ -1250,18 +1249,13 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl,
* if previous extent entry covers the offset,
* we should return it instead of the bitmap entry
*/
n = &entry->offset_index;
while (1) {
n = rb_prev(n);
if (!n)
break;
n = rb_prev(&entry->offset_index);
if (n) {
prev = rb_entry(n, struct btrfs_free_space,
offset_index);
if (!prev->bitmap) {
if (prev->offset + prev->bytes > offset)
if (!prev->bitmap &&
prev->offset + prev->bytes > offset)
entry = prev;
break;
}
}
}
return entry;
......@@ -1287,18 +1281,13 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl,
}
if (entry->bitmap) {
n = &entry->offset_index;
while (1) {
n = rb_prev(n);
if (!n)
break;
n = rb_prev(&entry->offset_index);
if (n) {
prev = rb_entry(n, struct btrfs_free_space,
offset_index);
if (!prev->bitmap) {
if (prev->offset + prev->bytes > offset)
if (!prev->bitmap &&
prev->offset + prev->bytes > offset)
return prev;
break;
}
}
if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
return entry;
......@@ -1364,7 +1353,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
u64 bitmap_bytes;
u64 extent_bytes;
u64 size = block_group->key.offset;
u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
BUG_ON(ctl->total_bitmaps > max_bitmaps);
......@@ -1650,8 +1639,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
* some block groups are so tiny they can't be enveloped by a bitmap, so
* don't even bother to create a bitmap for this
*/
if (BITS_PER_BITMAP * block_group->sectorsize >
block_group->key.offset)
if (BITS_PER_BITMAP * ctl->unit > block_group->key.offset)
return false;
return true;
......@@ -2298,10 +2286,10 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
unsigned long total_found = 0;
int ret;
i = offset_to_bit(entry->offset, block_group->sectorsize,
i = offset_to_bit(entry->offset, ctl->unit,
max_t(u64, offset, entry->offset));
want_bits = bytes_to_bits(bytes, block_group->sectorsize);
min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
want_bits = bytes_to_bits(bytes, ctl->unit);
min_bits = bytes_to_bits(min_bytes, ctl->unit);
again:
found_bits = 0;
......@@ -2325,23 +2313,22 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
total_found += found_bits;
if (cluster->max_size < found_bits * block_group->sectorsize)
cluster->max_size = found_bits * block_group->sectorsize;
if (cluster->max_size < found_bits * ctl->unit)
cluster->max_size = found_bits * ctl->unit;
if (total_found < want_bits || cluster->max_size < cont1_bytes) {
i = next_zero + 1;
goto again;
}
cluster->window_start = start * block_group->sectorsize +
entry->offset;
cluster->window_start = start * ctl->unit + entry->offset;
rb_erase(&entry->offset_index, &ctl->free_space_offset);
ret = tree_insert_offset(&cluster->root, entry->offset,
&entry->offset_index, 1);
BUG_ON(ret); /* -EEXIST; Logic error */
trace_btrfs_setup_cluster(block_group, cluster,
total_found * block_group->sectorsize, 1);
total_found * ctl->unit, 1);
return 0;
}
......
......@@ -434,8 +434,9 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
* 3 items for pre-allocation
*/
trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8);
ret = btrfs_block_rsv_add_noflush(root, trans->block_rsv,
trans->bytes_reserved);
ret = btrfs_block_rsv_add(root, trans->block_rsv,
trans->bytes_reserved,
BTRFS_RESERVE_NO_FLUSH);
if (ret)
goto out;
trace_btrfs_space_reservation(root->fs_info, "ino_cache",
......
This diff is collapsed.
This diff is collapsed.
......@@ -30,6 +30,8 @@ struct btrfs_ioctl_vol_args {
char name[BTRFS_PATH_NAME_MAX + 1];
};
#define BTRFS_DEVICE_PATH_NAME_MAX 1024
#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
......@@ -123,7 +125,48 @@ struct btrfs_ioctl_scrub_args {
__u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8];
};
#define BTRFS_DEVICE_PATH_NAME_MAX 1024
#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0
#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1
struct btrfs_ioctl_dev_replace_start_params {
__u64 srcdevid; /* in, if 0, use srcdev_name instead */
__u64 cont_reading_from_srcdev_mode; /* in, see #define
* above */
__u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */
__u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */
};
#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0
#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1
#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2
#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3
#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4
struct btrfs_ioctl_dev_replace_status_params {
__u64 replace_state; /* out, see #define above */
__u64 progress_1000; /* out, 0 <= x <= 1000 */
__u64 time_started; /* out, seconds since 1-Jan-1970 */
__u64 time_stopped; /* out, seconds since 1-Jan-1970 */
__u64 num_write_errors; /* out */
__u64 num_uncorrectable_read_errors; /* out */
};
#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0
#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1
#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2
#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0
#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1
#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2
struct btrfs_ioctl_dev_replace_args {
__u64 cmd; /* in */
__u64 result; /* out */
union {
struct btrfs_ioctl_dev_replace_start_params start;
struct btrfs_ioctl_dev_replace_status_params status;
}; /* in/out */
__u64 spare[64];
};
struct btrfs_ioctl_dev_info_args {
__u64 devid; /* in/out */
__u8 uuid[BTRFS_UUID_SIZE]; /* in/out */
......@@ -453,4 +496,7 @@ struct btrfs_ioctl_send_args {
struct btrfs_ioctl_qgroup_limit_args)
#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
struct btrfs_ioctl_get_dev_stats)
#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
struct btrfs_ioctl_dev_replace_args)
#endif
/*
* Copyright (C) 2012 Fujitsu. All rights reserved.
* Written by Miao Xie <miaox@cn.fujitsu.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_MATH_H
#define __BTRFS_MATH_H
#include <asm/div64.h>
static inline u64 div_factor(u64 num, int factor)
{
if (factor == 10)
return num;
num *= factor;
do_div(num, 10);
return num;
}
static inline u64 div_factor_fine(u64 num, int factor)
{
if (factor == 100)
return num;
num *= factor;
do_div(num, 100);
return num;
}
#endif
......@@ -211,6 +211,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
init_waitqueue_head(&entry->wait);
INIT_LIST_HEAD(&entry->list);
INIT_LIST_HEAD(&entry->root_extent_list);
INIT_LIST_HEAD(&entry->work_list);
init_completion(&entry->completion);
trace_btrfs_ordered_extent_add(inode, entry);
......@@ -464,18 +466,28 @@ void btrfs_remove_ordered_extent(struct inode *inode,
wake_up(&entry->wait);
}
static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
{
struct btrfs_ordered_extent *ordered;
ordered = container_of(work, struct btrfs_ordered_extent, flush_work);
btrfs_start_ordered_extent(ordered->inode, ordered, 1);
complete(&ordered->completion);
}
/*
* wait for all the ordered extents in a root. This is done when balancing
* space between drives.
*/
void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
{
struct list_head splice;
struct list_head splice, works;
struct list_head *cur;
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_extent *ordered, *next;
struct inode *inode;
INIT_LIST_HEAD(&splice);
INIT_LIST_HEAD(&works);
spin_lock(&root->fs_info->ordered_extent_lock);
list_splice_init(&root->fs_info->ordered_extents, &splice);
......@@ -494,19 +506,32 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
spin_unlock(&root->fs_info->ordered_extent_lock);
if (inode) {
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
if (delay_iput)
btrfs_add_delayed_iput(inode);
else
iput(inode);
ordered->flush_work.func = btrfs_run_ordered_extent_work;
list_add_tail(&ordered->work_list, &works);
btrfs_queue_worker(&root->fs_info->flush_workers,
&ordered->flush_work);
} else {
btrfs_put_ordered_extent(ordered);
}
cond_resched();
spin_lock(&root->fs_info->ordered_extent_lock);
}
spin_unlock(&root->fs_info->ordered_extent_lock);
list_for_each_entry_safe(ordered, next, &works, work_list) {
list_del_init(&ordered->work_list);
wait_for_completion(&ordered->completion);
inode = ordered->inode;
btrfs_put_ordered_extent(ordered);
if (delay_iput)
btrfs_add_delayed_iput(inode);
else
iput(inode);
cond_resched();
}
}
/*
......@@ -519,13 +544,17 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
* extra check to make sure the ordered operation list really is empty
* before we return
*/
void btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
{
struct btrfs_inode *btrfs_inode;
struct inode *inode;
struct list_head splice;
struct list_head works;
struct btrfs_delalloc_work *work, *next;
int ret = 0;
INIT_LIST_HEAD(&splice);
INIT_LIST_HEAD(&works);
mutex_lock(&root->fs_info->ordered_operations_mutex);
spin_lock(&root->fs_info->ordered_extent_lock);
......@@ -533,6 +562,7 @@ void btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
list_splice_init(&root->fs_info->ordered_operations, &splice);
while (!list_empty(&splice)) {
btrfs_inode = list_entry(splice.next, struct btrfs_inode,
ordered_operations);
......@@ -549,15 +579,26 @@ void btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
list_add_tail(&BTRFS_I(inode)->ordered_operations,
&root->fs_info->ordered_operations);
}
if (!inode)
continue;
spin_unlock(&root->fs_info->ordered_extent_lock);
if (inode) {
if (wait)
btrfs_wait_ordered_range(inode, 0, (u64)-1);
else
filemap_flush(inode->i_mapping);
btrfs_add_delayed_iput(inode);
work = btrfs_alloc_delalloc_work(inode, wait, 1);
if (!work) {
if (list_empty(&BTRFS_I(inode)->ordered_operations))
list_add_tail(&btrfs_inode->ordered_operations,
&splice);
spin_lock(&root->fs_info->ordered_extent_lock);
list_splice_tail(&splice,
&root->fs_info->ordered_operations);
spin_unlock(&root->fs_info->ordered_extent_lock);
ret = -ENOMEM;
goto out;
}
list_add_tail(&work->list, &works);
btrfs_queue_worker(&root->fs_info->flush_workers,
&work->work);
cond_resched();
spin_lock(&root->fs_info->ordered_extent_lock);
......@@ -566,7 +607,13 @@ void btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
goto again;
spin_unlock(&root->fs_info->ordered_extent_lock);
out:
list_for_each_entry_safe(work, next, &works, list) {
list_del_init(&work->list);
btrfs_wait_and_free_delalloc_work(work);
}
mutex_unlock(&root->fs_info->ordered_operations_mutex);
return ret;
}
/*
......@@ -606,7 +653,6 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
u64 end;
u64 orig_end;
struct btrfs_ordered_extent *ordered;
int found;
if (start + len < start) {
orig_end = INT_LIMIT(loff_t);
......@@ -642,7 +688,6 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
filemap_fdatawait_range(inode->i_mapping, start, orig_end);
end = orig_end;
found = 0;
while (1) {
ordered = btrfs_lookup_first_ordered_extent(inode, end);
if (!ordered)
......@@ -655,7 +700,6 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
btrfs_put_ordered_extent(ordered);
break;
}
found++;
btrfs_start_ordered_extent(inode, ordered, 1);
end = ordered->file_offset;
btrfs_put_ordered_extent(ordered);
......@@ -934,15 +978,6 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
if (last_mod < root->fs_info->last_trans_committed)
return;
/*
* the transaction is already committing. Just start the IO and
* don't bother with all of this list nonsense
*/
if (trans && root->fs_info->running_transaction->blocked) {
btrfs_wait_ordered_range(inode, 0, (u64)-1);
return;
}
spin_lock(&root->fs_info->ordered_extent_lock);
if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
list_add_tail(&BTRFS_I(inode)->ordered_operations,
......@@ -959,6 +994,7 @@ int __init ordered_data_init(void)
NULL);
if (!btrfs_ordered_extent_cache)
return -ENOMEM;
return 0;
}
......
......@@ -128,8 +128,11 @@ struct btrfs_ordered_extent {
struct list_head root_extent_list;
struct btrfs_work work;
};
struct completion completion;
struct btrfs_work flush_work;
struct list_head work_list;
};
/*
* calculates the total size you need to allocate for an ordered sum
......@@ -186,7 +189,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
void btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
......
......@@ -297,6 +297,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
case BTRFS_DEV_STATS_KEY:
printk(KERN_INFO "\t\tdevice stats\n");
break;
case BTRFS_DEV_REPLACE_KEY:
printk(KERN_INFO "\t\tdev replace\n");
break;
};
}
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -548,9 +548,9 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct btrfs_root_item *item = &root->root_item;
struct timespec ct = CURRENT_TIME;
spin_lock(&root->root_times_lock);
spin_lock(&root->root_item_lock);
item->ctransid = cpu_to_le64(trans->transid);
item->ctime.sec = cpu_to_le64(ct.tv_sec);
item->ctime.nsec = cpu_to_le32(ct.tv_nsec);
spin_unlock(&root->root_times_lock);
spin_unlock(&root->root_item_lock);
}
This diff is collapsed.
......@@ -4397,9 +4397,9 @@ static int full_send_tree(struct send_ctx *sctx)
if (!path)
return -ENOMEM;
spin_lock(&send_root->root_times_lock);
spin_lock(&send_root->root_item_lock);
start_ctransid = btrfs_root_ctransid(&send_root->root_item);
spin_unlock(&send_root->root_times_lock);
spin_unlock(&send_root->root_item_lock);
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
......@@ -4422,9 +4422,9 @@ static int full_send_tree(struct send_ctx *sctx)
* Make sure the tree has not changed after re-joining. We detect this
* by comparing start_ctransid and ctransid. They should always match.
*/
spin_lock(&send_root->root_times_lock);
spin_lock(&send_root->root_item_lock);
ctransid = btrfs_root_ctransid(&send_root->root_item);
spin_unlock(&send_root->root_times_lock);
spin_unlock(&send_root->root_item_lock);
if (ctransid != start_ctransid) {
WARN(1, KERN_WARNING "btrfs: the root that you're trying to "
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment