Commit 400dd456 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.9-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - fix race when reading extent buffer and 'uptodate' status is missed
   by one thread (introduced in 6.5)

 - do additional validation of devices using major:minor numbers

 - zoned mode fixes:
     - use zone-aware super block access during scrub
     - fix use-after-free during device replace (found by KASAN)
     - also delete zones that are 100% unusable to reclaim space

 - extent unpinning fixes:
     - fix extent map leak after error handling
     - print correct range in error message

 - error code and message updates

* tag 'for-6.9-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix race in read_extent_buffer_pages()
  btrfs: return accurate error code on open failure in open_fs_devices()
  btrfs: zoned: don't skip block groups with 100% zone unusable
  btrfs: use btrfs_warn() to log message at btrfs_add_extent_mapping()
  btrfs: fix message not properly printing interval when adding extent map
  btrfs: fix warning messages not printing interval at unpin_extent_range()
  btrfs: fix extent map leak in unexpected scenario at unpin_extent_cache()
  btrfs: validate device maj:min during open
  btrfs: zoned: fix use-after-free in do_zone_finish()
  btrfs: zoned: use zone aware sb location for scrub
parents dc189b8e ef1e6823
......@@ -1559,7 +1559,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* needing to allocate extents from the block group.
*/
used = btrfs_space_info_used(space_info, true);
if (space_info->total_bytes - block_group->length < used) {
if (space_info->total_bytes - block_group->length < used &&
block_group->zone_unusable < block_group->length) {
/*
* Add a reference for the list, compensate for the ref
* drop under the "next" label for the
......
......@@ -4333,6 +4333,19 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags))
goto done;
/*
* Between the initial test_bit(EXTENT_BUFFER_UPTODATE) and the above
* test_and_set_bit(EXTENT_BUFFER_READING), someone else could have
* started and finished reading the same eb. In this case, UPTODATE
* will now be set, and we shouldn't read it in again.
*/
if (unlikely(test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))) {
clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
smp_mb__after_atomic();
wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
return 0;
}
clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = 0;
check_buffer_tree_ref(eb);
......
......@@ -309,7 +309,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
btrfs_warn(fs_info,
"no extent map found for inode %llu (root %lld) when unpinning extent range [%llu, %llu), generation %llu",
btrfs_ino(inode), btrfs_root_id(inode->root),
start, len, gen);
start, start + len, gen);
ret = -ENOENT;
goto out;
}
......@@ -318,7 +318,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
btrfs_warn(fs_info,
"found extent map for inode %llu (root %lld) with unexpected start offset %llu when unpinning extent range [%llu, %llu), generation %llu",
btrfs_ino(inode), btrfs_root_id(inode->root),
em->start, start, len, gen);
em->start, start, start + len, gen);
ret = -EUCLEAN;
goto out;
}
......@@ -340,9 +340,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
em->mod_len = em->len;
}
free_extent_map(em);
out:
write_unlock(&tree->lock);
free_extent_map(em);
return ret;
}
......@@ -629,13 +629,13 @@ int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
*/
ret = merge_extent_mapping(em_tree, existing,
em, start);
if (ret) {
if (WARN_ON(ret)) {
free_extent_map(em);
*em_in = NULL;
WARN_ONCE(ret,
"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu\n",
existing->start, existing->len,
orig_start, orig_len, start);
btrfs_warn(fs_info,
"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu",
existing->start, extent_map_end(existing),
orig_start, orig_start + orig_len, start);
}
free_extent_map(existing);
}
......
......@@ -2812,7 +2812,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
gen = btrfs_get_last_trans_committed(fs_info);
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
ret = btrfs_sb_log_location(scrub_dev, i, 0, &bytenr);
if (ret == -ENOENT)
break;
if (ret) {
spin_lock(&sctx->stat_lock);
sctx->stat.super_errors++;
spin_unlock(&sctx->stat_lock);
continue;
}
if (bytenr + BTRFS_SUPER_INFO_SIZE >
scrub_dev->commit_total_bytes)
break;
......
......@@ -692,6 +692,16 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
device->bdev = file_bdev(bdev_file);
clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
if (device->devt != device->bdev->bd_dev) {
btrfs_warn(NULL,
"device %s maj:min changed from %d:%d to %d:%d",
device->name->str, MAJOR(device->devt),
MINOR(device->devt), MAJOR(device->bdev->bd_dev),
MINOR(device->bdev->bd_dev));
device->devt = device->bdev->bd_dev;
}
fs_devices->open_devices++;
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
device->devid != BTRFS_DEV_REPLACE_DEVID) {
......@@ -1174,23 +1184,30 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *device;
struct btrfs_device *latest_dev = NULL;
struct btrfs_device *tmp_device;
int ret = 0;
list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
dev_list) {
int ret;
int ret2;
ret = btrfs_open_one_device(fs_devices, device, flags, holder);
if (ret == 0 &&
ret2 = btrfs_open_one_device(fs_devices, device, flags, holder);
if (ret2 == 0 &&
(!latest_dev || device->generation > latest_dev->generation)) {
latest_dev = device;
} else if (ret == -ENODATA) {
} else if (ret2 == -ENODATA) {
fs_devices->num_devices--;
list_del(&device->dev_list);
btrfs_free_device(device);
}
if (ret == 0 && ret2 != 0)
ret = ret2;
}
if (fs_devices->open_devices == 0)
if (fs_devices->open_devices == 0) {
if (ret)
return ret;
return -EINVAL;
}
fs_devices->opened = 1;
fs_devices->latest_dev = latest_dev;
......
......@@ -1574,11 +1574,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
if (!map)
return -EINVAL;
cache->physical_map = btrfs_clone_chunk_map(map, GFP_NOFS);
if (!cache->physical_map) {
ret = -ENOMEM;
goto out;
}
cache->physical_map = map;
zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS);
if (!zone_info) {
......@@ -1690,7 +1686,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
}
bitmap_free(active);
kfree(zone_info);
btrfs_free_chunk_map(map);
return ret;
}
......@@ -2175,6 +2170,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
struct btrfs_chunk_map *map;
const bool is_metadata = (block_group->flags &
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM));
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
int ret = 0;
int i;
......@@ -2250,6 +2246,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
btrfs_clear_data_reloc_bg(block_group);
spin_unlock(&block_group->lock);
down_read(&dev_replace->rwsem);
map = block_group->physical_map;
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_device *device = map->stripes[i].dev;
......@@ -2266,13 +2263,16 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
zinfo->zone_size >> SECTOR_SHIFT);
memalloc_nofs_restore(nofs_flags);
if (ret)
if (ret) {
up_read(&dev_replace->rwsem);
return ret;
}
if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
zinfo->reserved_active_zones++;
btrfs_dev_clear_active_zone(device, physical);
}
up_read(&dev_replace->rwsem);
if (!fully_written)
btrfs_dec_block_group_ro(block_group);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment