Commit aab4ed58 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'erofs-for-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
 "In this cycle, we continue converting to use meta buffers for all
  remaining uncompressed paths to prepare for the upcoming subpage,
  folio and fscache features.

  We also fixed a double-free issue when sysfs initialization fails,
  which was reported by syzbot.

  Besides, in order for the userspace to control per-file timestamp
  easier, we now switch to record mtime instead of ctime with a
  compatible feature marked. And there are also some code cleanups and
  documentation update as usual.

  Summary:

   - Avoid using page structure directly for all uncompressed paths

   - Fix a double-free issue when sysfs initialization fails

   - Complete DAX description for erofs

   - Use mtime instead since there's no (easy) way for users to control
     ctime

   - Several code cleanups"

* tag 'erofs-for-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: rename ctime to mtime
  erofs: use meta buffers for inode lookup
  erofs: use meta buffers for reading directories
  fs: erofs: add sanity check for kobject in erofs_unregister_sysfs
  erofs: refine managed inode stuffs
  erofs: clean up z_erofs_extent_lookback
  erofs: silence warnings related to impossible m_plen
  Documentation/filesystem/dax: update DAX description on erofs
  erofs: clean up preload_compressed_pages()
  erofs: get rid of `struct z_erofs_collector'
  erofs: use meta buffers for erofs_read_superblock()
parents 881b5687 a1108dcd
......@@ -23,11 +23,11 @@ on it as usual. The `DAX` code currently only supports files with a block
size equal to your kernel's `PAGE_SIZE`, so you may need to specify a block
size when creating the filesystem.
Currently 4 filesystems support `DAX`: ext2, ext4, xfs and virtiofs.
Currently 5 filesystems support `DAX`: ext2, ext4, xfs, virtiofs and erofs.
Enabling `DAX` on them is different.
Enabling DAX on ext2
--------------------
Enabling DAX on ext2 and erofs
------------------------------
When mounting the filesystem, use the ``-o dax`` option on the command line or
add 'dax' to the options in ``/etc/fstab``. This works to enable `DAX` on all files
......
......@@ -40,7 +40,7 @@ Here is the main features of EROFS:
Inode metadata size 32 bytes 64 bytes
Max file size 4 GB 16 EB (also limited by max. vol size)
Max uids/gids 65536 4294967296
File change time no yes (64 + 32-bit timestamp)
Per-inode timestamp no yes (64 + 32-bit timestamp)
Max hardlinks 65536 4294967296
Metadata reserved 4 bytes 14 bytes
===================== ============ =====================================
......
......@@ -28,10 +28,10 @@ void erofs_put_metabuf(struct erofs_buf *buf)
buf->page = NULL;
}
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
erofs_blk_t blkaddr, enum erofs_kmap_type type)
void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
erofs_blk_t blkaddr, enum erofs_kmap_type type)
{
struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping;
struct address_space *const mapping = inode->i_mapping;
erofs_off_t offset = blknr_to_addr(blkaddr);
pgoff_t index = offset >> PAGE_SHIFT;
struct page *page = buf->page;
......@@ -60,6 +60,12 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
return buf->base + (offset & ~PAGE_MASK);
}
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
erofs_blk_t blkaddr, enum erofs_kmap_type type)
{
return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type);
}
static int erofs_map_blocks_flatmode(struct inode *inode,
struct erofs_map_blocks *map,
int flags)
......
......@@ -2,6 +2,7 @@
/*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/
* Copyright (C) 2022, Alibaba Cloud
*/
#include "internal.h"
......@@ -67,7 +68,7 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
static int erofs_readdir(struct file *f, struct dir_context *ctx)
{
struct inode *dir = file_inode(f);
struct address_space *mapping = dir->i_mapping;
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
const size_t dirsize = i_size_read(dir);
unsigned int i = ctx->pos / EROFS_BLKSIZ;
unsigned int ofs = ctx->pos % EROFS_BLKSIZ;
......@@ -75,26 +76,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
bool initial = true;
while (ctx->pos < dirsize) {
struct page *dentry_page;
struct erofs_dirent *de;
unsigned int nameoff, maxsize;
dentry_page = read_mapping_page(mapping, i, NULL);
if (dentry_page == ERR_PTR(-ENOMEM)) {
err = -ENOMEM;
break;
} else if (IS_ERR(dentry_page)) {
de = erofs_bread(&buf, dir, i, EROFS_KMAP);
if (IS_ERR(de)) {
erofs_err(dir->i_sb,
"fail to readdir of logical block %u of nid %llu",
i, EROFS_I(dir)->nid);
err = -EFSCORRUPTED;
err = PTR_ERR(de);
break;
}
de = (struct erofs_dirent *)kmap(dentry_page);
nameoff = le16_to_cpu(de->nameoff);
if (nameoff < sizeof(struct erofs_dirent) ||
nameoff >= PAGE_SIZE) {
erofs_err(dir->i_sb,
......@@ -119,10 +113,6 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
err = erofs_fill_dentries(dir, ctx, de, &ofs,
nameoff, maxsize);
skip_this:
kunmap(dentry_page);
put_page(dentry_page);
ctx->pos = blknr_to_addr(i) + ofs;
if (err)
......@@ -130,6 +120,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
++i;
ofs = 0;
}
erofs_put_metabuf(&buf);
return err < 0 ? err : 0;
}
......
......@@ -12,6 +12,7 @@
#define EROFS_SUPER_OFFSET 1024
#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
#define EROFS_FEATURE_COMPAT_MTIME 0x00000002
/*
* Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
......@@ -186,8 +187,8 @@ struct erofs_inode_extended {
__le32 i_uid;
__le32 i_gid;
__le64 i_ctime;
__le32 i_ctime_nsec;
__le64 i_mtime;
__le32 i_mtime_nsec;
__le32 i_nlink;
__u8 i_reserved2[16];
};
......
......@@ -113,8 +113,8 @@ static void *erofs_read_inode(struct erofs_buf *buf,
set_nlink(inode, le32_to_cpu(die->i_nlink));
/* extended inode has its own timestamp */
inode->i_ctime.tv_sec = le64_to_cpu(die->i_ctime);
inode->i_ctime.tv_nsec = le32_to_cpu(die->i_ctime_nsec);
inode->i_ctime.tv_sec = le64_to_cpu(die->i_mtime);
inode->i_ctime.tv_nsec = le32_to_cpu(die->i_mtime_nsec);
inode->i_size = le64_to_cpu(die->i_size);
......
......@@ -479,6 +479,8 @@ struct erofs_map_dev {
extern const struct file_operations erofs_file_fops;
void erofs_unmap_metabuf(struct erofs_buf *buf);
void erofs_put_metabuf(struct erofs_buf *buf);
void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
erofs_blk_t blkaddr, enum erofs_kmap_type type);
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
erofs_blk_t blkaddr, enum erofs_kmap_type type);
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev);
......
......@@ -2,6 +2,7 @@
/*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/
* Copyright (C) 2022, Alibaba Cloud
*/
#include "xattr.h"
......@@ -86,14 +87,14 @@ static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
return ERR_PTR(-ENOENT);
}
static struct page *find_target_block_classic(struct inode *dir,
struct erofs_qstr *name,
int *_ndirents)
static void *find_target_block_classic(struct erofs_buf *target,
struct inode *dir,
struct erofs_qstr *name,
int *_ndirents)
{
unsigned int startprfx, endprfx;
int head, back;
struct address_space *const mapping = dir->i_mapping;
struct page *candidate = ERR_PTR(-ENOENT);
void *candidate = ERR_PTR(-ENOENT);
startprfx = endprfx = 0;
head = 0;
......@@ -101,10 +102,11 @@ static struct page *find_target_block_classic(struct inode *dir,
while (head <= back) {
const int mid = head + (back - head) / 2;
struct page *page = read_mapping_page(mapping, mid, NULL);
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct erofs_dirent *de;
if (!IS_ERR(page)) {
struct erofs_dirent *de = kmap_atomic(page);
de = erofs_bread(&buf, dir, mid, EROFS_KMAP);
if (!IS_ERR(de)) {
const int nameoff = nameoff_from_disk(de->nameoff,
EROFS_BLKSIZ);
const int ndirents = nameoff / sizeof(*de);
......@@ -113,13 +115,12 @@ static struct page *find_target_block_classic(struct inode *dir,
struct erofs_qstr dname;
if (!ndirents) {
kunmap_atomic(de);
put_page(page);
erofs_put_metabuf(&buf);
erofs_err(dir->i_sb,
"corrupted dir block %d @ nid %llu",
mid, EROFS_I(dir)->nid);
DBG_BUGON(1);
page = ERR_PTR(-EFSCORRUPTED);
de = ERR_PTR(-EFSCORRUPTED);
goto out;
}
......@@ -135,7 +136,6 @@ static struct page *find_target_block_classic(struct inode *dir,
/* string comparison without already matched prefix */
diff = erofs_dirnamecmp(name, &dname, &matched);
kunmap_atomic(de);
if (!diff) {
*_ndirents = 0;
......@@ -145,11 +145,12 @@ static struct page *find_target_block_classic(struct inode *dir,
startprfx = matched;
if (!IS_ERR(candidate))
put_page(candidate);
candidate = page;
erofs_put_metabuf(target);
*target = buf;
candidate = de;
*_ndirents = ndirents;
} else {
put_page(page);
erofs_put_metabuf(&buf);
back = mid - 1;
endprfx = matched;
......@@ -158,8 +159,8 @@ static struct page *find_target_block_classic(struct inode *dir,
}
out: /* free if the candidate is valid */
if (!IS_ERR(candidate))
put_page(candidate);
return page;
erofs_put_metabuf(target);
return de;
}
return candidate;
}
......@@ -169,8 +170,7 @@ int erofs_namei(struct inode *dir,
erofs_nid_t *nid, unsigned int *d_type)
{
int ndirents;
struct page *page;
void *data;
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct erofs_dirent *de;
struct erofs_qstr qn;
......@@ -181,26 +181,20 @@ int erofs_namei(struct inode *dir,
qn.end = name->name + name->len;
ndirents = 0;
page = find_target_block_classic(dir, &qn, &ndirents);
if (IS_ERR(page))
return PTR_ERR(page);
de = find_target_block_classic(&buf, dir, &qn, &ndirents);
if (IS_ERR(de))
return PTR_ERR(de);
data = kmap_atomic(page);
/* the target page has been mapped */
if (ndirents)
de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);
else
de = (struct erofs_dirent *)data;
de = find_target_dirent(&qn, (u8 *)de, EROFS_BLKSIZ, ndirents);
if (!IS_ERR(de)) {
*nid = le64_to_cpu(de->nid);
*d_type = de->file_type;
}
kunmap_atomic(data);
put_page(page);
erofs_put_metabuf(&buf);
return PTR_ERR_OR_ZERO(de);
}
......
......@@ -281,21 +281,19 @@ static int erofs_init_devices(struct super_block *sb,
static int erofs_read_superblock(struct super_block *sb)
{
struct erofs_sb_info *sbi;
struct page *page;
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct erofs_super_block *dsb;
unsigned int blkszbits;
void *data;
int ret;
page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL);
if (IS_ERR(page)) {
data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP);
if (IS_ERR(data)) {
erofs_err(sb, "cannot read erofs superblock");
return PTR_ERR(page);
return PTR_ERR(data);
}
sbi = EROFS_SB(sb);
data = kmap(page);
dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET);
ret = -EINVAL;
......@@ -365,8 +363,7 @@ static int erofs_read_superblock(struct super_block *sb)
if (erofs_sb_has_ztailpacking(sbi))
erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
out:
kunmap(page);
put_page(page);
erofs_put_metabuf(&buf);
return ret;
}
......@@ -535,6 +532,11 @@ static int erofs_managed_cache_releasepage(struct page *page, gfp_t gfp_mask)
return ret;
}
/*
* It will be called only on inode eviction. In case that there are still some
* decompression requests in progress, wait with rescheduling for a bit here.
* We could introduce an extra locking instead but it seems unnecessary.
*/
static void erofs_managed_cache_invalidatepage(struct page *page,
unsigned int offset,
unsigned int length)
......@@ -568,8 +570,7 @@ static int erofs_init_managed_cache(struct super_block *sb)
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &managed_cache_aops;
mapping_set_gfp_mask(inode->i_mapping,
GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE);
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
sbi->managed_cache = inode;
return 0;
}
......
......@@ -221,9 +221,11 @@ void erofs_unregister_sysfs(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
kobject_del(&sbi->s_kobj);
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
if (sbi->s_kobj.state_in_sysfs) {
kobject_del(&sbi->s_kobj);
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
}
}
int __init erofs_init_sysfs(void)
......
This diff is collapsed.
......@@ -431,48 +431,47 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
unsigned int lookback_distance)
{
struct erofs_inode *const vi = EROFS_I(m->inode);
struct erofs_map_blocks *const map = m->map;
const unsigned int lclusterbits = vi->z_logical_clusterbits;
unsigned long lcn = m->lcn;
int err;
if (lcn < lookback_distance) {
erofs_err(m->inode->i_sb,
"bogus lookback distance @ nid %llu", vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
while (m->lcn >= lookback_distance) {
unsigned long lcn = m->lcn - lookback_distance;
int err;
/* load extent head logical cluster if needed */
lcn -= lookback_distance;
err = z_erofs_load_cluster_from_disk(m, lcn, false);
if (err)
return err;
/* load extent head logical cluster if needed */
err = z_erofs_load_cluster_from_disk(m, lcn, false);
if (err)
return err;
switch (m->type) {
case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
if (!m->delta[0]) {
switch (m->type) {
case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
if (!m->delta[0]) {
erofs_err(m->inode->i_sb,
"invalid lookback distance 0 @ nid %llu",
vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
lookback_distance = m->delta[0];
continue;
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
m->headtype = m->type;
m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
return 0;
default:
erofs_err(m->inode->i_sb,
"invalid lookback distance 0 @ nid %llu",
vi->nid);
"unknown type %u @ lcn %lu of nid %llu",
m->type, lcn, vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
return -EOPNOTSUPP;
}
return z_erofs_extent_lookback(m, m->delta[0]);
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
m->headtype = m->type;
map->m_la = (lcn << lclusterbits) | m->clusterofs;
break;
default:
erofs_err(m->inode->i_sb,
"unknown type %u @ lcn %lu of nid %llu",
m->type, lcn, vi->nid);
DBG_BUGON(1);
return -EOPNOTSUPP;
}
return 0;
erofs_err(m->inode->i_sb, "bogus lookback distance @ nid %llu",
vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
......@@ -494,7 +493,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) ||
((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) &&
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
map->m_plen = 1 << lclusterbits;
map->m_plen = 1ULL << lclusterbits;
return 0;
}
lcn = m->lcn + 1;
......@@ -540,7 +539,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
return -EFSCORRUPTED;
}
out:
map->m_plen = m->compressedlcs << lclusterbits;
map->m_plen = (u64)m->compressedlcs << lclusterbits;
return 0;
err_bonus_cblkcnt:
erofs_err(m->inode->i_sb,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment