Commit 400dd456 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.9-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - fix race when reading extent buffer and 'uptodate' status is missed
   by one thread (introduced in 6.5)

 - do additional validation of devices using major:minor numbers

 - zoned mode fixes:
     - use zone-aware super block access during scrub
     - fix use-after-free during device replace (found by KASAN)
     - also delete zones that are 100% unusable to reclaim space

 - extent unpinning fixes:
     - fix extent map leak after error handling
     - print correct range in error message

 - error code and message updates

* tag 'for-6.9-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix race in read_extent_buffer_pages()
  btrfs: return accurate error code on open failure in open_fs_devices()
  btrfs: zoned: don't skip block groups with 100% zone unusable
  btrfs: use btrfs_warn() to log message at btrfs_add_extent_mapping()
  btrfs: fix message not properly printing interval when adding extent map
  btrfs: fix warning messages not printing interval at unpin_extent_range()
  btrfs: fix extent map leak in unexpected scenario at unpin_extent_cache()
  btrfs: validate device maj:min during open
  btrfs: zoned: fix use-after-free in do_zone_finish()
  btrfs: zoned: use zone aware sb location for scrub
parents dc189b8e ef1e6823
...@@ -1559,7 +1559,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) ...@@ -1559,7 +1559,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* needing to allocate extents from the block group. * needing to allocate extents from the block group.
*/ */
used = btrfs_space_info_used(space_info, true); used = btrfs_space_info_used(space_info, true);
if (space_info->total_bytes - block_group->length < used) { if (space_info->total_bytes - block_group->length < used &&
block_group->zone_unusable < block_group->length) {
/* /*
* Add a reference for the list, compensate for the ref * Add a reference for the list, compensate for the ref
* drop under the "next" label for the * drop under the "next" label for the
......
...@@ -4333,6 +4333,19 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, ...@@ -4333,6 +4333,19 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags)) if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags))
goto done; goto done;
/*
* Between the initial test_bit(EXTENT_BUFFER_UPTODATE) and the above
* test_and_set_bit(EXTENT_BUFFER_READING), someone else could have
* started and finished reading the same eb. In this case, UPTODATE
* will now be set, and we shouldn't read it in again.
*/
if (unlikely(test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))) {
clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
smp_mb__after_atomic();
wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
return 0;
}
clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = 0; eb->read_mirror = 0;
check_buffer_tree_ref(eb); check_buffer_tree_ref(eb);
......
...@@ -309,7 +309,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) ...@@ -309,7 +309,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
btrfs_warn(fs_info, btrfs_warn(fs_info,
"no extent map found for inode %llu (root %lld) when unpinning extent range [%llu, %llu), generation %llu", "no extent map found for inode %llu (root %lld) when unpinning extent range [%llu, %llu), generation %llu",
btrfs_ino(inode), btrfs_root_id(inode->root), btrfs_ino(inode), btrfs_root_id(inode->root),
start, len, gen); start, start + len, gen);
ret = -ENOENT; ret = -ENOENT;
goto out; goto out;
} }
...@@ -318,7 +318,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) ...@@ -318,7 +318,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
btrfs_warn(fs_info, btrfs_warn(fs_info,
"found extent map for inode %llu (root %lld) with unexpected start offset %llu when unpinning extent range [%llu, %llu), generation %llu", "found extent map for inode %llu (root %lld) with unexpected start offset %llu when unpinning extent range [%llu, %llu), generation %llu",
btrfs_ino(inode), btrfs_root_id(inode->root), btrfs_ino(inode), btrfs_root_id(inode->root),
em->start, start, len, gen); em->start, start, start + len, gen);
ret = -EUCLEAN; ret = -EUCLEAN;
goto out; goto out;
} }
...@@ -340,9 +340,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) ...@@ -340,9 +340,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
em->mod_len = em->len; em->mod_len = em->len;
} }
free_extent_map(em);
out: out:
write_unlock(&tree->lock); write_unlock(&tree->lock);
free_extent_map(em);
return ret; return ret;
} }
...@@ -629,13 +629,13 @@ int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info, ...@@ -629,13 +629,13 @@ int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
*/ */
ret = merge_extent_mapping(em_tree, existing, ret = merge_extent_mapping(em_tree, existing,
em, start); em, start);
if (ret) { if (WARN_ON(ret)) {
free_extent_map(em); free_extent_map(em);
*em_in = NULL; *em_in = NULL;
WARN_ONCE(ret, btrfs_warn(fs_info,
"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu\n", "extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu",
existing->start, existing->len, existing->start, extent_map_end(existing),
orig_start, orig_len, start); orig_start, orig_start + orig_len, start);
} }
free_extent_map(existing); free_extent_map(existing);
} }
......
...@@ -2812,7 +2812,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, ...@@ -2812,7 +2812,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
gen = btrfs_get_last_trans_committed(fs_info); gen = btrfs_get_last_trans_committed(fs_info);
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i); ret = btrfs_sb_log_location(scrub_dev, i, 0, &bytenr);
if (ret == -ENOENT)
break;
if (ret) {
spin_lock(&sctx->stat_lock);
sctx->stat.super_errors++;
spin_unlock(&sctx->stat_lock);
continue;
}
if (bytenr + BTRFS_SUPER_INFO_SIZE > if (bytenr + BTRFS_SUPER_INFO_SIZE >
scrub_dev->commit_total_bytes) scrub_dev->commit_total_bytes)
break; break;
......
...@@ -692,6 +692,16 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, ...@@ -692,6 +692,16 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
device->bdev = file_bdev(bdev_file); device->bdev = file_bdev(bdev_file);
clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
if (device->devt != device->bdev->bd_dev) {
btrfs_warn(NULL,
"device %s maj:min changed from %d:%d to %d:%d",
device->name->str, MAJOR(device->devt),
MINOR(device->devt), MAJOR(device->bdev->bd_dev),
MINOR(device->bdev->bd_dev));
device->devt = device->bdev->bd_dev;
}
fs_devices->open_devices++; fs_devices->open_devices++;
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
device->devid != BTRFS_DEV_REPLACE_DEVID) { device->devid != BTRFS_DEV_REPLACE_DEVID) {
...@@ -1174,23 +1184,30 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, ...@@ -1174,23 +1184,30 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *device; struct btrfs_device *device;
struct btrfs_device *latest_dev = NULL; struct btrfs_device *latest_dev = NULL;
struct btrfs_device *tmp_device; struct btrfs_device *tmp_device;
int ret = 0;
list_for_each_entry_safe(device, tmp_device, &fs_devices->devices, list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
dev_list) { dev_list) {
int ret; int ret2;
ret = btrfs_open_one_device(fs_devices, device, flags, holder); ret2 = btrfs_open_one_device(fs_devices, device, flags, holder);
if (ret == 0 && if (ret2 == 0 &&
(!latest_dev || device->generation > latest_dev->generation)) { (!latest_dev || device->generation > latest_dev->generation)) {
latest_dev = device; latest_dev = device;
} else if (ret == -ENODATA) { } else if (ret2 == -ENODATA) {
fs_devices->num_devices--; fs_devices->num_devices--;
list_del(&device->dev_list); list_del(&device->dev_list);
btrfs_free_device(device); btrfs_free_device(device);
} }
if (ret == 0 && ret2 != 0)
ret = ret2;
} }
if (fs_devices->open_devices == 0)
if (fs_devices->open_devices == 0) {
if (ret)
return ret;
return -EINVAL; return -EINVAL;
}
fs_devices->opened = 1; fs_devices->opened = 1;
fs_devices->latest_dev = latest_dev; fs_devices->latest_dev = latest_dev;
......
...@@ -1574,11 +1574,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ...@@ -1574,11 +1574,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
if (!map) if (!map)
return -EINVAL; return -EINVAL;
cache->physical_map = btrfs_clone_chunk_map(map, GFP_NOFS); cache->physical_map = map;
if (!cache->physical_map) {
ret = -ENOMEM;
goto out;
}
zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS); zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS);
if (!zone_info) { if (!zone_info) {
...@@ -1690,7 +1686,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ...@@ -1690,7 +1686,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
} }
bitmap_free(active); bitmap_free(active);
kfree(zone_info); kfree(zone_info);
btrfs_free_chunk_map(map);
return ret; return ret;
} }
...@@ -2175,6 +2170,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ ...@@ -2175,6 +2170,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
struct btrfs_chunk_map *map; struct btrfs_chunk_map *map;
const bool is_metadata = (block_group->flags & const bool is_metadata = (block_group->flags &
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)); (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM));
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
int ret = 0; int ret = 0;
int i; int i;
...@@ -2250,6 +2246,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ ...@@ -2250,6 +2246,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
btrfs_clear_data_reloc_bg(block_group); btrfs_clear_data_reloc_bg(block_group);
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
down_read(&dev_replace->rwsem);
map = block_group->physical_map; map = block_group->physical_map;
for (i = 0; i < map->num_stripes; i++) { for (i = 0; i < map->num_stripes; i++) {
struct btrfs_device *device = map->stripes[i].dev; struct btrfs_device *device = map->stripes[i].dev;
...@@ -2266,13 +2263,16 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ ...@@ -2266,13 +2263,16 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
zinfo->zone_size >> SECTOR_SHIFT); zinfo->zone_size >> SECTOR_SHIFT);
memalloc_nofs_restore(nofs_flags); memalloc_nofs_restore(nofs_flags);
if (ret) if (ret) {
up_read(&dev_replace->rwsem);
return ret; return ret;
}
if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA)) if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
zinfo->reserved_active_zones++; zinfo->reserved_active_zones++;
btrfs_dev_clear_active_zone(device, physical); btrfs_dev_clear_active_zone(device, physical);
} }
up_read(&dev_replace->rwsem);
if (!fully_written) if (!fully_written)
btrfs_dec_block_group_ro(block_group); btrfs_dec_block_group_ro(block_group);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment