Commit a229cf67 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.6-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "A few more followup fixes to the directory listing.

  People have noticed different behaviour compared to other filesystems
  after changes in 6.5. This is now unified to more "logical" and
  expected behaviour while still within POSIX. And a few more fixes for
  stable.

   - change behaviour of readdir()/rewinddir() when new directory
     entries are created after opendir(), properly tracking the last
     entry

   - fix race in readdir when multiple threads can set the last entry
     index for a directory

  Additionally:

   - use exclusive lock when direct io might need to drop privs and call
     notify_change()

   - don't clear uptodate bit on page after an error, this may lead to a
     deadlock in subpage mode

   - fix waiting pattern when multiple readers block on Merkle tree
     data, switch to folios"

* tag 'for-6.6-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix race between reading a directory and adding entries to it
  btrfs: refresh dir last index during a rewinddir(3) call
  btrfs: set last dir index to the current last index when opening dir
  btrfs: don't clear uptodate on write errors
  btrfs: file_remove_privs needs an exclusive lock in direct io write
  btrfs: convert btrfs_read_merkle_tree_page() to use a folio
parents 5d2f5353 8e7f82de
...@@ -484,10 +484,8 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio) ...@@ -484,10 +484,8 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio)
bvec->bv_offset, bvec->bv_len); bvec->bv_offset, bvec->bv_len);
btrfs_finish_ordered_extent(bbio->ordered, page, start, len, !error); btrfs_finish_ordered_extent(bbio->ordered, page, start, len, !error);
if (error) { if (error)
btrfs_page_clear_uptodate(fs_info, page, start, len);
mapping_set_error(page->mapping, error); mapping_set_error(page->mapping, error);
}
btrfs_page_clear_writeback(fs_info, page, start, len); btrfs_page_clear_writeback(fs_info, page, start, len);
} }
...@@ -1456,8 +1454,6 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl ...@@ -1456,8 +1454,6 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl
if (ret) { if (ret) {
btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, page_start, btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, page_start,
PAGE_SIZE, !ret); PAGE_SIZE, !ret);
btrfs_page_clear_uptodate(btrfs_sb(inode->i_sb), page,
page_start, PAGE_SIZE);
mapping_set_error(page->mapping, ret); mapping_set_error(page->mapping, ret);
} }
unlock_page(page); unlock_page(page);
...@@ -1624,8 +1620,6 @@ static void extent_buffer_write_end_io(struct btrfs_bio *bbio) ...@@ -1624,8 +1620,6 @@ static void extent_buffer_write_end_io(struct btrfs_bio *bbio)
struct page *page = bvec->bv_page; struct page *page = bvec->bv_page;
u32 len = bvec->bv_len; u32 len = bvec->bv_len;
if (!uptodate)
btrfs_page_clear_uptodate(fs_info, page, start, len);
btrfs_page_clear_writeback(fs_info, page, start, len); btrfs_page_clear_writeback(fs_info, page, start, len);
bio_offset += len; bio_offset += len;
} }
...@@ -2201,7 +2195,6 @@ void extent_write_locked_range(struct inode *inode, struct page *locked_page, ...@@ -2201,7 +2195,6 @@ void extent_write_locked_range(struct inode *inode, struct page *locked_page,
if (ret) { if (ret) {
btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, btrfs_mark_ordered_io_finished(BTRFS_I(inode), page,
cur, cur_len, !ret); cur, cur_len, !ret);
btrfs_page_clear_uptodate(fs_info, page, cur, cur_len);
mapping_set_error(page->mapping, ret); mapping_set_error(page->mapping, ret);
} }
btrfs_page_unlock_writer(fs_info, page, cur, cur_len); btrfs_page_unlock_writer(fs_info, page, cur, cur_len);
......
...@@ -1451,8 +1451,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -1451,8 +1451,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
if (iocb->ki_flags & IOCB_NOWAIT) if (iocb->ki_flags & IOCB_NOWAIT)
ilock_flags |= BTRFS_ILOCK_TRY; ilock_flags |= BTRFS_ILOCK_TRY;
/* If the write DIO is within EOF, use a shared lock */ /*
if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode)) * If the write DIO is within EOF, use a shared lock and also only if
* security bits will likely not be dropped by file_remove_privs() called
* from btrfs_write_check(). Either will need to be rechecked after the
* lock was acquired.
*/
if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode) && IS_NOSEC(inode))
ilock_flags |= BTRFS_ILOCK_SHARED; ilock_flags |= BTRFS_ILOCK_SHARED;
relock: relock:
...@@ -1460,6 +1465,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -1460,6 +1465,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
if (err < 0) if (err < 0)
return err; return err;
/* Shared lock cannot be used with security bits set. */
if ((ilock_flags & BTRFS_ILOCK_SHARED) && !IS_NOSEC(inode)) {
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
ilock_flags &= ~BTRFS_ILOCK_SHARED;
goto relock;
}
err = generic_write_checks(iocb, from); err = generic_write_checks(iocb, from);
if (err <= 0) { if (err <= 0) {
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
......
...@@ -1085,9 +1085,6 @@ static void submit_uncompressed_range(struct btrfs_inode *inode, ...@@ -1085,9 +1085,6 @@ static void submit_uncompressed_range(struct btrfs_inode *inode,
btrfs_mark_ordered_io_finished(inode, locked_page, btrfs_mark_ordered_io_finished(inode, locked_page,
page_start, PAGE_SIZE, page_start, PAGE_SIZE,
!ret); !ret);
btrfs_page_clear_uptodate(inode->root->fs_info,
locked_page, page_start,
PAGE_SIZE);
mapping_set_error(locked_page->mapping, ret); mapping_set_error(locked_page->mapping, ret);
unlock_page(locked_page); unlock_page(locked_page);
} }
...@@ -2791,7 +2788,6 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) ...@@ -2791,7 +2788,6 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
mapping_set_error(page->mapping, ret); mapping_set_error(page->mapping, ret);
btrfs_mark_ordered_io_finished(inode, page, page_start, btrfs_mark_ordered_io_finished(inode, page, page_start,
PAGE_SIZE, !ret); PAGE_SIZE, !ret);
btrfs_page_clear_uptodate(fs_info, page, page_start, PAGE_SIZE);
clear_page_dirty_for_io(page); clear_page_dirty_for_io(page);
} }
btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE); btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE);
...@@ -5769,20 +5765,24 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode) ...@@ -5769,20 +5765,24 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index)
{ {
if (dir->index_cnt == (u64)-1) { int ret = 0;
int ret;
btrfs_inode_lock(dir, 0);
if (dir->index_cnt == (u64)-1) {
ret = btrfs_inode_delayed_dir_index_count(dir); ret = btrfs_inode_delayed_dir_index_count(dir);
if (ret) { if (ret) {
ret = btrfs_set_inode_index_count(dir); ret = btrfs_set_inode_index_count(dir);
if (ret) if (ret)
return ret; goto out;
} }
} }
*index = dir->index_cnt; /* index_cnt is the index number of next new entry, so decrement it. */
*index = dir->index_cnt - 1;
out:
btrfs_inode_unlock(dir, 0);
return 0; return ret;
} }
/* /*
...@@ -5817,6 +5817,19 @@ static int btrfs_opendir(struct inode *inode, struct file *file) ...@@ -5817,6 +5817,19 @@ static int btrfs_opendir(struct inode *inode, struct file *file)
return 0; return 0;
} }
static loff_t btrfs_dir_llseek(struct file *file, loff_t offset, int whence)
{
struct btrfs_file_private *private = file->private_data;
int ret;
ret = btrfs_get_dir_last_index(BTRFS_I(file_inode(file)),
&private->last_index);
if (ret)
return ret;
return generic_file_llseek(file, offset, whence);
}
struct dir_entry { struct dir_entry {
u64 ino; u64 ino;
u64 offset; u64 offset;
...@@ -10868,7 +10881,7 @@ static const struct inode_operations btrfs_dir_inode_operations = { ...@@ -10868,7 +10881,7 @@ static const struct inode_operations btrfs_dir_inode_operations = {
}; };
static const struct file_operations btrfs_dir_file_operations = { static const struct file_operations btrfs_dir_file_operations = {
.llseek = generic_file_llseek, .llseek = btrfs_dir_llseek,
.read = generic_read_dir, .read = generic_read_dir,
.iterate_shared = btrfs_real_readdir, .iterate_shared = btrfs_real_readdir,
.open = btrfs_opendir, .open = btrfs_opendir,
......
...@@ -715,7 +715,7 @@ static struct page *btrfs_read_merkle_tree_page(struct inode *inode, ...@@ -715,7 +715,7 @@ static struct page *btrfs_read_merkle_tree_page(struct inode *inode,
pgoff_t index, pgoff_t index,
unsigned long num_ra_pages) unsigned long num_ra_pages)
{ {
struct page *page; struct folio *folio;
u64 off = (u64)index << PAGE_SHIFT; u64 off = (u64)index << PAGE_SHIFT;
loff_t merkle_pos = merkle_file_pos(inode); loff_t merkle_pos = merkle_file_pos(inode);
int ret; int ret;
...@@ -726,29 +726,36 @@ static struct page *btrfs_read_merkle_tree_page(struct inode *inode, ...@@ -726,29 +726,36 @@ static struct page *btrfs_read_merkle_tree_page(struct inode *inode,
return ERR_PTR(-EFBIG); return ERR_PTR(-EFBIG);
index += merkle_pos >> PAGE_SHIFT; index += merkle_pos >> PAGE_SHIFT;
again: again:
page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0);
if (page) { if (!IS_ERR(folio)) {
if (PageUptodate(page)) if (folio_test_uptodate(folio))
return page; goto out;
lock_page(page); folio_lock(folio);
/* /* If it's not uptodate after we have the lock, we got a read error. */
* We only insert uptodate pages, so !Uptodate has to be if (!folio_test_uptodate(folio)) {
* an error folio_unlock(folio);
*/ folio_put(folio);
if (!PageUptodate(page)) {
unlock_page(page);
put_page(page);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
} }
unlock_page(page); folio_unlock(folio);
return page; goto out;
} }
page = __page_cache_alloc(mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); folio = filemap_alloc_folio(mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS),
if (!page) 0);
if (!folio)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
ret = filemap_add_folio(inode->i_mapping, folio, index, GFP_NOFS);
if (ret) {
folio_put(folio);
/* Did someone else insert a folio here? */
if (ret == -EEXIST)
goto again;
return ERR_PTR(ret);
}
/* /*
* Merkle item keys are indexed from byte 0 in the merkle tree. * Merkle item keys are indexed from byte 0 in the merkle tree.
* They have the form: * They have the form:
...@@ -756,28 +763,19 @@ static struct page *btrfs_read_merkle_tree_page(struct inode *inode, ...@@ -756,28 +763,19 @@ static struct page *btrfs_read_merkle_tree_page(struct inode *inode,
* [ inode objectid, BTRFS_MERKLE_ITEM_KEY, offset in bytes ] * [ inode objectid, BTRFS_MERKLE_ITEM_KEY, offset in bytes ]
*/ */
ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY, off, ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY, off,
page_address(page), PAGE_SIZE, page); folio_address(folio), PAGE_SIZE, &folio->page);
if (ret < 0) { if (ret < 0) {
put_page(page); folio_put(folio);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
if (ret < PAGE_SIZE) if (ret < PAGE_SIZE)
memzero_page(page, ret, PAGE_SIZE - ret); folio_zero_segment(folio, ret, PAGE_SIZE);
SetPageUptodate(page); folio_mark_uptodate(folio);
ret = add_to_page_cache_lru(page, inode->i_mapping, index, GFP_NOFS); folio_unlock(folio);
if (!ret) { out:
/* Inserted and ready for fsverity */ return folio_file_page(folio, index);
unlock_page(page);
} else {
put_page(page);
/* Did someone race us into inserting this page? */
if (ret == -EEXIST)
goto again;
page = ERR_PTR(ret);
}
return page;
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment