Commit 61d0d0d2 authored by Nikolay Borisov's avatar Nikolay Borisov Committed by David Sterba

btrfs: Handle pending/pinned chunks before blockgroup relocation during device shrink

During device shrink pinned/pending chunks (i.e. those which have been
deleted/created respectively, in the current transaction and haven't
touched disk) need to be accounted when doing device shrink. Presently
this happens after the main relocation loop in btrfs_shrink_device,
which could lead to making another go in the body of the function.

Since there is no hard requirement to perform pinned/pending chunks
handling after the relocation loop, move the code before it. This leads
to simplifying the code flow around - i.e. no need to use 'goto again'.

A notable side effect of this change is that modification of the
device's size requires a transaction to be started and committed before
the relocation loop starts. This is necessary to ensure that relocation
process sees the shrunk device size.
Signed-off-by: default avatarNikolay Borisov <nborisov@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent bbbf7243
...@@ -4720,15 +4720,16 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) ...@@ -4720,15 +4720,16 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
int slot; int slot;
int failed = 0; int failed = 0;
bool retried = false; bool retried = false;
bool checked_pending_chunks = false;
struct extent_buffer *l; struct extent_buffer *l;
struct btrfs_key key; struct btrfs_key key;
struct btrfs_super_block *super_copy = fs_info->super_copy; struct btrfs_super_block *super_copy = fs_info->super_copy;
u64 old_total = btrfs_super_total_bytes(super_copy); u64 old_total = btrfs_super_total_bytes(super_copy);
u64 old_size = btrfs_device_get_total_bytes(device); u64 old_size = btrfs_device_get_total_bytes(device);
u64 diff; u64 diff;
u64 start;
new_size = round_down(new_size, fs_info->sectorsize); new_size = round_down(new_size, fs_info->sectorsize);
start = new_size;
diff = round_down(old_size - new_size, fs_info->sectorsize); diff = round_down(old_size - new_size, fs_info->sectorsize);
if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
...@@ -4740,6 +4741,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) ...@@ -4740,6 +4741,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
path->reada = READA_BACK; path->reada = READA_BACK;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
btrfs_free_path(path);
return PTR_ERR(trans);
}
mutex_lock(&fs_info->chunk_mutex); mutex_lock(&fs_info->chunk_mutex);
btrfs_device_set_total_bytes(device, new_size); btrfs_device_set_total_bytes(device, new_size);
...@@ -4747,7 +4754,21 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) ...@@ -4747,7 +4754,21 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
device->fs_devices->total_rw_bytes -= diff; device->fs_devices->total_rw_bytes -= diff;
atomic64_sub(diff, &fs_info->free_chunk_space); atomic64_sub(diff, &fs_info->free_chunk_space);
} }
mutex_unlock(&fs_info->chunk_mutex);
/*
* Once the device's size has been set to the new size, ensure all
* in-memory chunks are synced to disk so that the loop below sees them
* and relocates them accordingly.
*/
if (contains_pending_extent(trans->transaction, device, &start, diff)) {
mutex_unlock(&fs_info->chunk_mutex);
ret = btrfs_commit_transaction(trans);
if (ret)
goto done;
} else {
mutex_unlock(&fs_info->chunk_mutex);
btrfs_end_transaction(trans);
}
again: again:
key.objectid = device->devid; key.objectid = device->devid;
...@@ -4838,36 +4859,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) ...@@ -4838,36 +4859,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
} }
mutex_lock(&fs_info->chunk_mutex); mutex_lock(&fs_info->chunk_mutex);
/*
* We checked in the above loop all device extents that were already in
* the device tree. However before we have updated the device's
* total_bytes to the new size, we might have had chunk allocations that
* have not complete yet (new block groups attached to transaction
* handles), and therefore their device extents were not yet in the
* device tree and we missed them in the loop above. So if we have any
* pending chunk using a device extent that overlaps the device range
* that we can not use anymore, commit the current transaction and
* repeat the search on the device tree - this way we guarantee we will
* not have chunks using device extents that end beyond 'new_size'.
*/
if (!checked_pending_chunks) {
u64 start = new_size;
u64 len = old_size - new_size;
if (contains_pending_extent(trans->transaction, device,
&start, len)) {
mutex_unlock(&fs_info->chunk_mutex);
checked_pending_chunks = true;
failed = 0;
retried = false;
ret = btrfs_commit_transaction(trans);
if (ret)
goto done;
goto again;
}
}
btrfs_device_set_disk_total_bytes(device, new_size); btrfs_device_set_disk_total_bytes(device, new_size);
if (list_empty(&device->post_commit_list)) if (list_empty(&device->post_commit_list))
list_add_tail(&device->post_commit_list, list_add_tail(&device->post_commit_list,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment