Commit f9eab5f0 authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba

btrfs: scrub: try to fix super block errors

[BUG]
The following script shows that, although scrub can detect super block
errors, it never tries to fix it:

	mkfs.btrfs -f -d raid1 -m raid1 $dev1 $dev2
	xfs_io -c "pwrite 67108864 4k" $dev2

	mount $dev1 $mnt
	btrfs scrub start -B $dev2
	btrfs scrub start -Br $dev2
	umount $mnt

The first scrub reports the super error correctly:

  scrub done for f3289218-abd3-41ac-a630-202f766c0859
  Scrub started:    Tue Aug  2 14:44:11 2022
  Status:           finished
  Duration:         0:00:00
  Total to scrub:   1.26GiB
  Rate:             0.00B/s
  Error summary:    super=1
    Corrected:      0
    Uncorrectable:  0
    Unverified:     0

But the second read-only scrub still reports the same super error:

  Scrub started:    Tue Aug  2 14:44:11 2022
  Status:           finished
  Duration:         0:00:00
  Total to scrub:   1.26GiB
  Rate:             0.00B/s
  Error summary:    super=1
    Corrected:      0
    Uncorrectable:  0
    Unverified:     0

[CAUSE]
The comments already shows that super block can be easily fixed by
committing a transaction:

	/*
	 * If we find an error in a super block, we just report it.
	 * They will get written with the next transaction commit
	 * anyway
	 */

But the truth is, such assumption is not always true, and since scrub
should try to repair every error it found (except for read-only scrub),
we should really actively commit a transaction to fix this.

[FIX]
Just commit a transaction if we found any super block errors, after
everything else is done.

We cannot do this just after scrub_supers(), as
btrfs_commit_transaction() will try to pause and wait for the running
scrub, thus we can not call it with scrub_lock hold.
Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent e69bf81c
...@@ -4093,6 +4093,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, ...@@ -4093,6 +4093,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
int ret; int ret;
struct btrfs_device *dev; struct btrfs_device *dev;
unsigned int nofs_flag; unsigned int nofs_flag;
bool need_commit = false;
if (btrfs_fs_closing(fs_info)) if (btrfs_fs_closing(fs_info))
return -EAGAIN; return -EAGAIN;
...@@ -4196,6 +4197,12 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, ...@@ -4196,6 +4197,12 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
*/ */
nofs_flag = memalloc_nofs_save(); nofs_flag = memalloc_nofs_save();
if (!is_dev_replace) { if (!is_dev_replace) {
u64 old_super_errors;
spin_lock(&sctx->stat_lock);
old_super_errors = sctx->stat.super_errors;
spin_unlock(&sctx->stat_lock);
btrfs_info(fs_info, "scrub: started on devid %llu", devid); btrfs_info(fs_info, "scrub: started on devid %llu", devid);
/* /*
* by holding device list mutex, we can * by holding device list mutex, we can
...@@ -4204,6 +4211,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, ...@@ -4204,6 +4211,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
mutex_lock(&fs_info->fs_devices->device_list_mutex); mutex_lock(&fs_info->fs_devices->device_list_mutex);
ret = scrub_supers(sctx, dev); ret = scrub_supers(sctx, dev);
mutex_unlock(&fs_info->fs_devices->device_list_mutex); mutex_unlock(&fs_info->fs_devices->device_list_mutex);
spin_lock(&sctx->stat_lock);
/*
* Super block errors found, but we can not commit transaction
* at current context, since btrfs_commit_transaction() needs
* to pause the current running scrub (hold by ourselves).
*/
if (sctx->stat.super_errors > old_super_errors && !sctx->readonly)
need_commit = true;
spin_unlock(&sctx->stat_lock);
} }
if (!ret) if (!ret)
...@@ -4230,6 +4247,25 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, ...@@ -4230,6 +4247,25 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
scrub_workers_put(fs_info); scrub_workers_put(fs_info);
scrub_put_ctx(sctx); scrub_put_ctx(sctx);
/*
* We found some super block errors before, now try to force a
* transaction commit, as scrub has finished.
*/
if (need_commit) {
struct btrfs_trans_handle *trans;
trans = btrfs_start_transaction(fs_info->tree_root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
btrfs_err(fs_info,
"scrub: failed to start transaction to fix super block errors: %d", ret);
return ret;
}
ret = btrfs_commit_transaction(trans);
if (ret < 0)
btrfs_err(fs_info,
"scrub: failed to commit transaction to fix super block errors: %d", ret);
}
return ret; return ret;
out: out:
scrub_workers_put(fs_info); scrub_workers_put(fs_info);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment