Commit 3497640a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'erofs-for-6.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
 "In this cycle, for container use cases, fscache-based shared domain is
  introduced [1] so that data blobs in the same domain will be storage
  deduplicated and it will also be used for page cache sharing later.

  Also, a special packed inode is now introduced to record inode
  fragments which keep the tail part of files by Yue Hu [2]. You can
  keep arbitary length or (at will) the whole file as a fragment and
  then fragments can be optionally compressed in the packed inode
  together and even deduplicated for smaller image sizes.

  In addition to that, global compressed data deduplication by sharing
  partial-referenced pclusters is also supported in this cycle.

  Summary:

   - Introduce fscache-based domain to share blobs between images

   - Support recording fragments in a special packed inode

   - Support partial-referenced pclusters for global compressed data
     deduplication

   - Fix an order >= MAX_ORDER warning due to crafted negative i_size

   - Several cleanups"

Link: https://lore.kernel.org/r/20220916085940.89392-1-zhujia.zj@bytedance.com [1]
Link: https://lore.kernel.org/r/cover.1663065968.git.huyue2@coolpad.com [2]

* tag 'erofs-for-6.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: clean up erofs_iget()
  erofs: clean up unnecessary code and comments
  erofs: fold in z_erofs_reload_indexes()
  erofs: introduce partial-referenced pclusters
  erofs: support on-disk compressed fragments data
  erofs: support interlaced uncompressed data for compressed files
  erofs: clean up .read_folio() and .readahead() in fscache mode
  erofs: introduce 'domain_id' mount option
  erofs: Support sharing cookies in the same domain
  erofs: introduce a pseudo mnt to manage shared cookies
  erofs: introduce fscache-based domain
  erofs: code clean up for fscache
  erofs: use kill_anon_super() to kill super in fscache mode
  erofs: fix order >= MAX_ORDER warning due to crafted negative i_size
parents 8bea8ff3 312fe643
......@@ -317,52 +317,61 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
return ret;
}
static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq,
struct page **pagepool)
static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
const unsigned int nrpages_out =
const unsigned int inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
const unsigned int outpages =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
const unsigned int righthalf = min_t(unsigned int, rq->outputsize,
PAGE_SIZE - rq->pageofs_out);
const unsigned int lefthalf = rq->outputsize - righthalf;
const unsigned int interlaced_offset =
rq->alg == Z_EROFS_COMPRESSION_SHIFTED ? 0 : rq->pageofs_out;
unsigned char *src, *dst;
if (nrpages_out > 2) {
if (outpages > 2 && rq->alg == Z_EROFS_COMPRESSION_SHIFTED) {
DBG_BUGON(1);
return -EIO;
return -EFSCORRUPTED;
}
if (rq->out[0] == *rq->in) {
DBG_BUGON(nrpages_out != 1);
DBG_BUGON(rq->pageofs_out);
return 0;
}
src = kmap_atomic(*rq->in) + rq->pageofs_in;
src = kmap_local_page(rq->in[inpages - 1]) + rq->pageofs_in;
if (rq->out[0]) {
dst = kmap_atomic(rq->out[0]);
memcpy(dst + rq->pageofs_out, src, righthalf);
kunmap_atomic(dst);
dst = kmap_local_page(rq->out[0]);
memcpy(dst + rq->pageofs_out, src + interlaced_offset,
righthalf);
kunmap_local(dst);
}
if (nrpages_out == 2) {
DBG_BUGON(!rq->out[1]);
if (rq->out[1] == *rq->in) {
if (outpages > inpages) {
DBG_BUGON(!rq->out[outpages - 1]);
if (rq->out[outpages - 1] != rq->in[inpages - 1]) {
dst = kmap_local_page(rq->out[outpages - 1]);
memcpy(dst, interlaced_offset ? src :
(src + righthalf), lefthalf);
kunmap_local(dst);
} else if (!interlaced_offset) {
memmove(src, src + righthalf, lefthalf);
} else {
dst = kmap_atomic(rq->out[1]);
memcpy(dst, src + righthalf, lefthalf);
kunmap_atomic(dst);
}
}
kunmap_atomic(src);
kunmap_local(src);
return 0;
}
static struct z_erofs_decompressor decompressors[] = {
[Z_EROFS_COMPRESSION_SHIFTED] = {
.decompress = z_erofs_shifted_transform,
.decompress = z_erofs_transform_plain,
.name = "shifted"
},
[Z_EROFS_COMPRESSION_INTERLACED] = {
.decompress = z_erofs_transform_plain,
.name = "interlaced"
},
[Z_EROFS_COMPRESSION_LZ4] = {
.decompress = z_erofs_lz4_decompress,
.name = "lz4"
......
......@@ -217,6 +217,9 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
strm->buf.out_size = min_t(u32, outlen,
PAGE_SIZE - pageofs);
outlen -= strm->buf.out_size;
if (!rq->out[no] && rq->fillgaps) /* deduped */
rq->out[no] = erofs_allocpage(pagepool,
GFP_KERNEL | __GFP_NOFAIL);
if (rq->out[no])
strm->buf.out = kmap(rq->out[no]) + pageofs;
pageofs = 0;
......
......@@ -25,6 +25,8 @@
#define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008
#define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008
#define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010
#define EROFS_FEATURE_INCOMPAT_FRAGMENTS 0x00000020
#define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020
#define EROFS_ALL_FEATURE_INCOMPAT \
(EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \
EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
......@@ -32,7 +34,9 @@
EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \
EROFS_FEATURE_INCOMPAT_ZTAILPACKING)
EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \
EROFS_FEATURE_INCOMPAT_FRAGMENTS | \
EROFS_FEATURE_INCOMPAT_DEDUPE)
#define EROFS_SB_EXTSLOT_SIZE 16
......@@ -71,7 +75,9 @@ struct erofs_super_block {
} __packed u1;
__le16 extra_devices; /* # of devices besides the primary device */
__le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */
__u8 reserved2[38];
__u8 reserved[6];
__le64 packed_nid; /* nid of the special packed inode */
__u8 reserved2[24];
};
/*
......@@ -295,16 +301,27 @@ struct z_erofs_lzma_cfgs {
* bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
* bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
* bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
* bit 4 : interlaced plain pcluster (0 - off; 1 - on)
* bit 5 : fragment pcluster (0 - off; 1 - on)
*/
#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001
#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002
#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004
#define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008
#define Z_EROFS_ADVISE_INTERLACED_PCLUSTER 0x0010
#define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER 0x0020
#define Z_EROFS_FRAGMENT_INODE_BIT 7
struct z_erofs_map_header {
__le16 h_reserved1;
/* indicates the encoded size of tailpacking data */
__le16 h_idata_size;
union {
/* fragment data offset in the packed inode */
__le32 h_fragmentoff;
struct {
__le16 h_reserved1;
/* indicates the encoded size of tailpacking data */
__le16 h_idata_size;
};
};
__le16 h_advise;
/*
* bit 0-3 : algorithm type of head 1 (logical cluster type 01);
......@@ -313,7 +330,8 @@ struct z_erofs_map_header {
__u8 h_algorithmtype;
/*
* bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
* bit 3-7 : reserved.
* bit 3-6 : reserved;
* bit 7 : move the whole file into packed inode or not.
*/
__u8 h_clusterbits;
};
......@@ -355,6 +373,9 @@ enum {
#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2
#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0
/* (noncompact only, HEAD) This pcluster refers to partial decompressed data */
#define Z_EROFS_VLE_DI_PARTIAL_REF (1 << 15)
/*
* D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the
* compressed block count of a compressed extent (in logical clusters, aka.
......@@ -402,6 +423,10 @@ struct erofs_dirent {
/* check the EROFS on-disk layout strictly at compile time */
static inline void erofs_check_ondisk_layout_definitions(void)
{
const __le64 fmh = *(__le64 *)&(struct z_erofs_map_header) {
.h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT
};
BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32);
BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64);
......@@ -419,6 +444,9 @@ static inline void erofs_check_ondisk_layout_definitions(void)
BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);
/* exclude old compiler versions like gcc 7.5.0 */
BUILD_BUG_ON(__builtin_constant_p(fmh) ?
fmh != cpu_to_le64(1ULL << 63) : 0);
}
#endif
This diff is collapsed.
......@@ -214,7 +214,7 @@ static int erofs_fill_symlink(struct inode *inode, void *kaddr,
/* if it cannot be handled with fast symlink scheme */
if (vi->datalayout != EROFS_INODE_FLAT_INLINE ||
inode->i_size >= EROFS_BLKSIZ) {
inode->i_size >= EROFS_BLKSIZ || inode->i_size < 0) {
inode->i_op = &erofs_symlink_iops;
return 0;
}
......@@ -241,7 +241,7 @@ static int erofs_fill_symlink(struct inode *inode, void *kaddr,
return 0;
}
static int erofs_fill_inode(struct inode *inode, int isdir)
static int erofs_fill_inode(struct inode *inode)
{
struct erofs_inode *vi = EROFS_I(inode);
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
......@@ -249,7 +249,7 @@ static int erofs_fill_inode(struct inode *inode, int isdir)
unsigned int ofs;
int err = 0;
trace_erofs_fill_inode(inode, isdir);
trace_erofs_fill_inode(inode);
/* read inode base data from disk */
kaddr = erofs_read_inode(&buf, inode, &ofs);
......@@ -324,21 +324,13 @@ static int erofs_iget_set_actor(struct inode *inode, void *opaque)
return 0;
}
static inline struct inode *erofs_iget_locked(struct super_block *sb,
erofs_nid_t nid)
struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid)
{
const unsigned long hashval = erofs_inode_hash(nid);
struct inode *inode;
return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
inode = iget5_locked(sb, hashval, erofs_ilookup_test_actor,
erofs_iget_set_actor, &nid);
}
struct inode *erofs_iget(struct super_block *sb,
erofs_nid_t nid,
bool isdir)
{
struct inode *inode = erofs_iget_locked(sb, nid);
if (!inode)
return ERR_PTR(-ENOMEM);
......@@ -348,10 +340,10 @@ struct inode *erofs_iget(struct super_block *sb,
vi->nid = nid;
err = erofs_fill_inode(inode, isdir);
if (!err)
err = erofs_fill_inode(inode);
if (!err) {
unlock_new_inode(inode);
else {
} else {
iget_failed(inode);
inode = ERR_PTR(err);
}
......
......@@ -76,6 +76,7 @@ struct erofs_mount_opts {
#endif
unsigned int mount_opt;
char *fsid;
char *domain_id;
};
struct erofs_dev_context {
......@@ -98,9 +99,19 @@ struct erofs_sb_lz4_info {
u16 max_pclusterblks;
};
struct erofs_domain {
refcount_t ref;
struct list_head list;
struct fscache_volume *volume;
char *domain_id;
};
struct erofs_fscache {
struct fscache_cookie *cookie;
struct inode *inode;
struct inode *anon_inode;
struct erofs_domain *domain;
char *name;
};
struct erofs_sb_info {
......@@ -120,6 +131,7 @@ struct erofs_sb_info {
struct inode *managed_cache;
struct erofs_sb_lz4_info lz4;
struct inode *packed_inode;
#endif /* CONFIG_EROFS_FS_ZIP */
struct erofs_dev_context *devs;
struct dax_device *dax_dev;
......@@ -157,6 +169,7 @@ struct erofs_sb_info {
/* fscache support */
struct fscache_volume *volume;
struct erofs_fscache *s_fscache;
struct erofs_domain *domain;
};
#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
......@@ -183,7 +196,6 @@ enum {
EROFS_ZIP_CACHE_READAROUND
};
#ifdef CONFIG_EROFS_FS_ZIP
#define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL)
/* basic unit of the workstation of a super_block */
......@@ -223,7 +235,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
return atomic_cond_read_relaxed(&grp->refcount,
VAL != EROFS_LOCKED_MAGIC);
}
#endif /* !CONFIG_EROFS_FS_ZIP */
/* we strictly follow PAGE_SIZE and no buffer head yet */
#define LOG_BLOCK_SIZE PAGE_SHIFT
......@@ -277,6 +288,8 @@ EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2)
EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING)
EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS)
EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE)
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
/* atomic flag definitions */
......@@ -312,8 +325,13 @@ struct erofs_inode {
unsigned char z_algorithmtype[2];
unsigned char z_logical_clusterbits;
unsigned long z_tailextent_headlcn;
erofs_off_t z_idataoff;
unsigned short z_idata_size;
union {
struct {
erofs_off_t z_idataoff;
unsigned short z_idata_size;
};
erofs_off_t z_fragmentoff;
};
};
#endif /* CONFIG_EROFS_FS_ZIP */
};
......@@ -364,6 +382,7 @@ struct page *erofs_grab_cache_page_nowait(struct address_space *mapping,
}
extern const struct super_operations erofs_sops;
extern struct file_system_type erofs_fs_type;
extern const struct address_space_operations erofs_raw_access_aops;
extern const struct address_space_operations z_erofs_aops;
......@@ -371,6 +390,8 @@ extern const struct address_space_operations z_erofs_aops;
enum {
BH_Encoded = BH_PrivateStart,
BH_FullMapped,
BH_Fragment,
BH_Partialref,
};
/* Has a disk mapping */
......@@ -381,6 +402,10 @@ enum {
#define EROFS_MAP_ENCODED (1 << BH_Encoded)
/* The length of extent is full */
#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped)
/* Located in the special packed inode */
#define EROFS_MAP_FRAGMENT (1 << BH_Fragment)
/* The extent refers to partial decompressed data */
#define EROFS_MAP_PARTIAL_REF (1 << BH_Partialref)
struct erofs_map_blocks {
struct erofs_buf buf;
......@@ -402,11 +427,12 @@ struct erofs_map_blocks {
#define EROFS_GET_BLOCKS_FIEMAP 0x0002
/* Used to map the whole extent if non-negligible data is requested for LZMA */
#define EROFS_GET_BLOCKS_READMORE 0x0004
/* Used to map tail extent for tailpacking inline pcluster */
/* Used to map tail extent for tailpacking inline or fragment pcluster */
#define EROFS_GET_BLOCKS_FINDTAIL 0x0008
enum {
Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
Z_EROFS_COMPRESSION_INTERLACED,
Z_EROFS_COMPRESSION_RUNTIME_MAX
};
......@@ -466,7 +492,7 @@ extern const struct inode_operations erofs_generic_iops;
extern const struct inode_operations erofs_symlink_iops;
extern const struct inode_operations erofs_fast_symlink_iops;
struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir);
struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid);
int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path,
struct kstat *stat, u32 request_mask,
unsigned int query_flags);
......@@ -581,27 +607,26 @@ static inline int z_erofs_load_lzma_config(struct super_block *sb,
int erofs_fscache_register_fs(struct super_block *sb);
void erofs_fscache_unregister_fs(struct super_block *sb);
int erofs_fscache_register_cookie(struct super_block *sb,
struct erofs_fscache **fscache,
char *name, bool need_inode);
void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache);
struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
char *name, bool need_inode);
void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache);
extern const struct address_space_operations erofs_fscache_access_aops;
#else
static inline int erofs_fscache_register_fs(struct super_block *sb)
{
return 0;
return -EOPNOTSUPP;
}
static inline void erofs_fscache_unregister_fs(struct super_block *sb) {}
static inline int erofs_fscache_register_cookie(struct super_block *sb,
struct erofs_fscache **fscache,
char *name, bool need_inode)
static inline
struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
char *name, bool need_inode)
{
return -EOPNOTSUPP;
return ERR_PTR(-EOPNOTSUPP);
}
static inline void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache)
static inline void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache)
{
}
#endif
......
......@@ -185,7 +185,6 @@ int erofs_namei(struct inode *dir, const struct qstr *name, erofs_nid_t *nid,
if (IS_ERR(de))
return PTR_ERR(de);
/* the target page has been mapped */
if (ndirents)
de = find_target_dirent(&qn, (u8 *)de, EROFS_BLKSIZ, ndirents);
......@@ -197,9 +196,7 @@ int erofs_namei(struct inode *dir, const struct qstr *name, erofs_nid_t *nid,
return PTR_ERR_OR_ZERO(de);
}
/* NOTE: i_mutex is already held by vfs */
static struct dentry *erofs_lookup(struct inode *dir,
struct dentry *dentry,
static struct dentry *erofs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
int err;
......@@ -207,17 +204,11 @@ static struct dentry *erofs_lookup(struct inode *dir,
unsigned int d_type;
struct inode *inode;
DBG_BUGON(!d_really_is_negative(dentry));
/* dentry must be unhashed in lookup, no need to worry about */
DBG_BUGON(!d_unhashed(dentry));
trace_erofs_lookup(dir, dentry, flags);
/* file name exceeds fs limit */
if (dentry->d_name.len > EROFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
/* false uninitialized warnings on gcc 4.8.x */
err = erofs_namei(dir, &dentry->d_name, &nid, &d_type);
if (err == -ENOENT) {
......@@ -228,7 +219,7 @@ static struct dentry *erofs_lookup(struct inode *dir,
} else {
erofs_dbg("%s, %pd (nid %llu) found, d_type %u", __func__,
dentry, nid, d_type);
inode = erofs_iget(dir->i_sb, nid, d_type == FT_DIR);
inode = erofs_iget(dir->i_sb, nid);
}
return d_splice_alias(inode, dentry);
}
......
......@@ -224,10 +224,10 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
struct erofs_device_info *dif, erofs_off_t *pos)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
struct erofs_fscache *fscache;
struct erofs_deviceslot *dis;
struct block_device *bdev;
void *ptr;
int ret;
ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*pos), EROFS_KMAP);
if (IS_ERR(ptr))
......@@ -245,10 +245,10 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
}
if (erofs_is_fscache_mode(sb)) {
ret = erofs_fscache_register_cookie(sb, &dif->fscache,
dif->path, false);
if (ret)
return ret;
fscache = erofs_fscache_register_cookie(sb, dif->path, false);
if (IS_ERR(fscache))
return PTR_ERR(fscache);
dif->fscache = fscache;
} else {
bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL,
sb->s_type);
......@@ -381,6 +381,17 @@ static int erofs_read_superblock(struct super_block *sb)
#endif
sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
sbi->root_nid = le16_to_cpu(dsb->root_nid);
#ifdef CONFIG_EROFS_FS_ZIP
sbi->packed_inode = NULL;
if (erofs_sb_has_fragments(sbi) && dsb->packed_nid) {
sbi->packed_inode =
erofs_iget(sb, le64_to_cpu(dsb->packed_nid));
if (IS_ERR(sbi->packed_inode)) {
ret = PTR_ERR(sbi->packed_inode);
goto out;
}
}
#endif
sbi->inos = le64_to_cpu(dsb->inos);
sbi->build_time = le64_to_cpu(dsb->build_time);
......@@ -411,6 +422,10 @@ static int erofs_read_superblock(struct super_block *sb)
erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
if (erofs_is_fscache_mode(sb))
erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!");
if (erofs_sb_has_fragments(sbi))
erofs_info(sb, "EXPERIMENTAL compressed fragments feature in use. Use at your own risk!");
if (erofs_sb_has_dedupe(sbi))
erofs_info(sb, "EXPERIMENTAL global deduplication feature in use. Use at your own risk!");
out:
erofs_put_metabuf(&buf);
return ret;
......@@ -440,6 +455,7 @@ enum {
Opt_dax_enum,
Opt_device,
Opt_fsid,
Opt_domain_id,
Opt_err
};
......@@ -465,6 +481,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums),
fsparam_string("device", Opt_device),
fsparam_string("fsid", Opt_fsid),
fsparam_string("domain_id", Opt_domain_id),
{}
};
......@@ -568,6 +585,16 @@ static int erofs_fc_parse_param(struct fs_context *fc,
return -ENOMEM;
#else
errorfc(fc, "fsid option not supported");
#endif
break;
case Opt_domain_id:
#ifdef CONFIG_EROFS_FS_ONDEMAND
kfree(ctx->opt.domain_id);
ctx->opt.domain_id = kstrdup(param->string, GFP_KERNEL);
if (!ctx->opt.domain_id)
return -ENOMEM;
#else
errorfc(fc, "domain_id option not supported");
#endif
break;
default:
......@@ -641,7 +668,7 @@ static int erofs_init_managed_cache(struct super_block *sb) { return 0; }
static struct inode *erofs_nfs_get_inode(struct super_block *sb,
u64 ino, u32 generation)
{
return erofs_iget(sb, ino, false);
return erofs_iget(sb, ino);
}
static struct dentry *erofs_fh_to_dentry(struct super_block *sb,
......@@ -667,7 +694,7 @@ static struct dentry *erofs_get_parent(struct dentry *child)
err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type);
if (err)
return ERR_PTR(err);
return d_obtain_alias(erofs_iget(child->d_sb, nid, d_type == FT_DIR));
return d_obtain_alias(erofs_iget(child->d_sb, nid));
}
static const struct export_operations erofs_export_ops = {
......@@ -676,6 +703,13 @@ static const struct export_operations erofs_export_ops = {
.get_parent = erofs_get_parent,
};
static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc)
{
static const struct tree_descr empty_descr = {""};
return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr);
}
static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
......@@ -695,6 +729,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_fs_info = sbi;
sbi->opt = ctx->opt;
ctx->opt.fsid = NULL;
ctx->opt.domain_id = NULL;
sbi->devs = ctx->devs;
ctx->devs = NULL;
......@@ -706,11 +741,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
if (err)
return err;
err = erofs_fscache_register_cookie(sb, &sbi->s_fscache,
sbi->opt.fsid, true);
if (err)
return err;
err = super_setup_bdi(sb);
if (err)
return err;
......@@ -752,7 +782,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
#endif
/* get the root inode */
inode = erofs_iget(sb, ROOT_NID(sbi), true);
inode = erofs_iget(sb, ROOT_NID(sbi));
if (IS_ERR(inode))
return PTR_ERR(inode);
......@@ -781,6 +811,11 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
return 0;
}
static int erofs_fc_anon_get_tree(struct fs_context *fc)
{
return get_tree_nodev(fc, erofs_fc_fill_pseudo_super);
}
static int erofs_fc_get_tree(struct fs_context *fc)
{
struct erofs_fs_context *ctx = fc->fs_private;
......@@ -817,7 +852,8 @@ static int erofs_release_device_info(int id, void *ptr, void *data)
fs_put_dax(dif->dax_dev, NULL);
if (dif->bdev)
blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL);
erofs_fscache_unregister_cookie(&dif->fscache);
erofs_fscache_unregister_cookie(dif->fscache);
dif->fscache = NULL;
kfree(dif->path);
kfree(dif);
return 0;
......@@ -838,6 +874,7 @@ static void erofs_fc_free(struct fs_context *fc)
erofs_free_dev_context(ctx->devs);
kfree(ctx->opt.fsid);
kfree(ctx->opt.domain_id);
kfree(ctx);
}
......@@ -848,10 +885,21 @@ static const struct fs_context_operations erofs_context_ops = {
.free = erofs_fc_free,
};
static const struct fs_context_operations erofs_anon_context_ops = {
.get_tree = erofs_fc_anon_get_tree,
};
static int erofs_init_fs_context(struct fs_context *fc)
{
struct erofs_fs_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
struct erofs_fs_context *ctx;
/* pseudo mount for anon inodes */
if (fc->sb_flags & SB_KERNMOUNT) {
fc->ops = &erofs_anon_context_ops;
return 0;
}
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL);
......@@ -878,8 +926,14 @@ static void erofs_kill_sb(struct super_block *sb)
WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
/* pseudo mount for anon inodes */
if (sb->s_flags & SB_KERNMOUNT) {
kill_anon_super(sb);
return;
}
if (erofs_is_fscache_mode(sb))
generic_shutdown_super(sb);
kill_anon_super(sb);
else
kill_block_super(sb);
......@@ -889,9 +943,9 @@ static void erofs_kill_sb(struct super_block *sb)
erofs_free_dev_context(sbi->devs);
fs_put_dax(sbi->dax_dev, NULL);
erofs_fscache_unregister_cookie(&sbi->s_fscache);
erofs_fscache_unregister_fs(sb);
kfree(sbi->opt.fsid);
kfree(sbi->opt.domain_id);
kfree(sbi);
sb->s_fs_info = NULL;
}
......@@ -908,11 +962,13 @@ static void erofs_put_super(struct super_block *sb)
#ifdef CONFIG_EROFS_FS_ZIP
iput(sbi->managed_cache);
sbi->managed_cache = NULL;
iput(sbi->packed_inode);
sbi->packed_inode = NULL;
#endif
erofs_fscache_unregister_cookie(&sbi->s_fscache);
erofs_fscache_unregister_fs(sb);
}
static struct file_system_type erofs_fs_type = {
struct file_system_type erofs_fs_type = {
.owner = THIS_MODULE,
.name = "erofs",
.init_fs_context = erofs_init_fs_context,
......@@ -1044,6 +1100,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
#ifdef CONFIG_EROFS_FS_ONDEMAND
if (opt->fsid)
seq_printf(seq, ",fsid=%s", opt->fsid);
if (opt->domain_id)
seq_printf(seq, ",domain_id=%s", opt->domain_id);
#endif
return 0;
}
......
......@@ -76,6 +76,8 @@ EROFS_ATTR_FEATURE(device_table);
EROFS_ATTR_FEATURE(compr_head2);
EROFS_ATTR_FEATURE(sb_chksum);
EROFS_ATTR_FEATURE(ztailpacking);
EROFS_ATTR_FEATURE(fragments);
EROFS_ATTR_FEATURE(dedupe);
static struct attribute *erofs_feat_attrs[] = {
ATTR_LIST(zero_padding),
......@@ -86,6 +88,8 @@ static struct attribute *erofs_feat_attrs[] = {
ATTR_LIST(compr_head2),
ATTR_LIST(sb_chksum),
ATTR_LIST(ztailpacking),
ATTR_LIST(fragments),
ATTR_LIST(dedupe),
NULL,
};
ATTRIBUTE_GROUPS(erofs_feat);
......@@ -201,12 +205,27 @@ static struct kobject erofs_feat = {
int erofs_register_sysfs(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
char *name;
char *str = NULL;
int err;
if (erofs_is_fscache_mode(sb)) {
if (sbi->opt.domain_id) {
str = kasprintf(GFP_KERNEL, "%s,%s", sbi->opt.domain_id,
sbi->opt.fsid);
if (!str)
return -ENOMEM;
name = str;
} else {
name = sbi->opt.fsid;
}
} else {
name = sb->s_id;
}
sbi->s_kobj.kset = &erofs_root;
init_completion(&sbi->s_kobj_unregister);
err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, "%s",
erofs_is_fscache_mode(sb) ? sbi->opt.fsid : sb->s_id);
err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, "%s", name);
kfree(str);
if (err)
goto put_sb_kobj;
return 0;
......
......@@ -39,9 +39,7 @@ static inline unsigned int xattrblock_offset(struct erofs_sb_info *sbi,
#ifdef CONFIG_EROFS_FS_XATTR
extern const struct xattr_handler erofs_xattr_user_handler;
extern const struct xattr_handler erofs_xattr_trusted_handler;
#ifdef CONFIG_EROFS_FS_SECURITY
extern const struct xattr_handler erofs_xattr_security_handler;
#endif
static inline const struct xattr_handler *erofs_xattr_handler(unsigned int idx)
{
......
......@@ -650,6 +650,35 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
la < fe->headoffset;
}
static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos,
struct page *page, unsigned int pageofs,
unsigned int len)
{
struct inode *packed_inode = EROFS_I_SB(inode)->packed_inode;
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
u8 *src, *dst;
unsigned int i, cnt;
pos += EROFS_I(inode)->z_fragmentoff;
for (i = 0; i < len; i += cnt) {
cnt = min_t(unsigned int, len - i,
EROFS_BLKSIZ - erofs_blkoff(pos));
src = erofs_bread(&buf, packed_inode,
erofs_blknr(pos), EROFS_KMAP);
if (IS_ERR(src)) {
erofs_put_metabuf(&buf);
return PTR_ERR(src);
}
dst = kmap_local_page(page);
memcpy(dst + pageofs + i, src + erofs_blkoff(pos), cnt);
kunmap_local(dst);
pos += cnt;
}
erofs_put_metabuf(&buf);
return 0;
}
static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
struct page *page, struct page **pagepool)
{
......@@ -688,7 +717,8 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
/* didn't get a valid pcluster previously (very rare) */
}
if (!(map->m_flags & EROFS_MAP_MAPPED))
if (!(map->m_flags & EROFS_MAP_MAPPED) ||
map->m_flags & EROFS_MAP_FRAGMENT)
goto hitted;
err = z_erofs_collector_begin(fe);
......@@ -735,6 +765,24 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
zero_user_segment(page, cur, end);
goto next_part;
}
if (map->m_flags & EROFS_MAP_FRAGMENT) {
unsigned int pageofs, skip, len;
if (offset > map->m_la) {
pageofs = 0;
skip = offset - map->m_la;
} else {
pageofs = map->m_la & ~PAGE_MASK;
skip = 0;
}
len = min_t(unsigned int, map->m_llen - skip, end - cur);
err = z_erofs_read_fragment(inode, skip, page, pageofs, len);
if (err)
goto out;
++spiltted;
tight = false;
goto next_part;
}
exclusive = (!cur && (!spiltted || tight));
if (cur)
......@@ -766,6 +814,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
fe->pcl->multibases = true;
if ((map->m_flags & EROFS_MAP_FULL_MAPPED) &&
!(map->m_flags & EROFS_MAP_PARTIAL_REF) &&
fe->pcl->length == map->m_llen)
fe->pcl->partial = false;
if (fe->pcl->length < offset + end - map->m_la) {
......
......@@ -17,7 +17,7 @@ int z_erofs_fill_inode(struct inode *inode)
struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
if (!erofs_sb_has_big_pcluster(sbi) &&
!erofs_sb_has_ztailpacking(sbi) &&
!erofs_sb_has_ztailpacking(sbi) && !erofs_sb_has_fragments(sbi) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
vi->z_advise = 0;
vi->z_algorithmtype[0] = 0;
......@@ -55,10 +55,6 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
goto out_unlock;
DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
!erofs_sb_has_ztailpacking(EROFS_SB(sb)) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
vi->xattr_isize, 8);
kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos),
......@@ -69,6 +65,16 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
}
h = kaddr + erofs_blkoff(pos);
/*
* if the highest bit of the 8-byte map header is set, the whole file
* is stored in the packed inode. The rest bits keeps z_fragmentoff.
*/
if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
vi->z_tailextent_headlcn = 0;
goto unmap_done;
}
vi->z_advise = le16_to_cpu(h->h_advise);
vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
......@@ -123,6 +129,20 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
if (err < 0)
goto out_unlock;
}
if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
!(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
struct erofs_map_blocks map = {
.buf = __EROFS_BUF_INITIALIZER
};
vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
err = z_erofs_do_map_blocks(inode, &map,
EROFS_GET_BLOCKS_FINDTAIL);
erofs_put_metabuf(&map.buf);
if (err < 0)
goto out_unlock;
}
/* paired with smp_mb() at the beginning of the function */
smp_mb();
set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
......@@ -143,20 +163,9 @@ struct z_erofs_maprecorder {
u16 delta[2];
erofs_blk_t pblk, compressedblks;
erofs_off_t nextpackoff;
bool partialref;
};
static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
erofs_blk_t eblk)
{
struct super_block *const sb = m->inode->i_sb;
m->kaddr = erofs_read_metabuf(&m->map->buf, sb, eblk,
EROFS_KMAP_ATOMIC);
if (IS_ERR(m->kaddr))
return PTR_ERR(m->kaddr);
return 0;
}
static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
unsigned long lcn)
{
......@@ -169,11 +178,11 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
lcn * sizeof(struct z_erofs_vle_decompressed_index);
struct z_erofs_vle_decompressed_index *di;
unsigned int advise, type;
int err;
err = z_erofs_reload_indexes(m, erofs_blknr(pos));
if (err)
return err;
m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
erofs_blknr(pos), EROFS_KMAP_ATOMIC);
if (IS_ERR(m->kaddr))
return PTR_ERR(m->kaddr);
m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index);
m->lcn = lcn;
......@@ -201,6 +210,8 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
if (advise & Z_EROFS_VLE_DI_PARTIAL_REF)
m->partialref = true;
m->clusterofs = le16_to_cpu(di->di_clusterofs);
m->pblk = le32_to_cpu(di->di_u.blkaddr);
break;
......@@ -370,7 +381,6 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
unsigned int compacted_4b_initial, compacted_2b;
unsigned int amortizedshift;
erofs_off_t pos;
int err;
if (lclusterbits != 12)
return -EOPNOTSUPP;
......@@ -407,9 +417,10 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
amortizedshift = 2;
out:
pos += lcn * (1 << amortizedshift);
err = z_erofs_reload_indexes(m, erofs_blknr(pos));
if (err)
return err;
m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
erofs_blknr(pos), EROFS_KMAP_ATOMIC);
if (IS_ERR(m->kaddr))
return PTR_ERR(m->kaddr);
return unpack_compacted_index(m, amortizedshift, pos, lookahead);
}
......@@ -598,6 +609,7 @@ static int z_erofs_do_map_blocks(struct inode *inode,
{
struct erofs_inode *const vi = EROFS_I(inode);
bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
struct z_erofs_maprecorder m = {
.inode = inode,
.map = map,
......@@ -663,15 +675,23 @@ static int z_erofs_do_map_blocks(struct inode *inode,
err = -EOPNOTSUPP;
goto unmap_out;
}
if (m.partialref)
map->m_flags |= EROFS_MAP_PARTIAL_REF;
map->m_llen = end - map->m_la;
if (flags & EROFS_GET_BLOCKS_FINDTAIL)
if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
vi->z_tailextent_headlcn = m.lcn;
/* for non-compact indexes, fragmentoff is 64 bits */
if (fragment &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
vi->z_fragmentoff |= (u64)m.pblk << 32;
}
if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
map->m_flags |= EROFS_MAP_META;
map->m_pa = vi->z_idataoff;
map->m_plen = vi->z_idata_size;
} else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
map->m_flags |= EROFS_MAP_FRAGMENT;
} else {
map->m_pa = blknr_to_addr(m.pblk);
err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
......@@ -679,12 +699,18 @@ static int z_erofs_do_map_blocks(struct inode *inode,
goto out;
}
if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN)
map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2)
if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) {
if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
map->m_algorithmformat =
Z_EROFS_COMPRESSION_INTERLACED;
else
map->m_algorithmformat =
Z_EROFS_COMPRESSION_SHIFTED;
} else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) {
map->m_algorithmformat = vi->z_algorithmtype[1];
else
} else {
map->m_algorithmformat = vi->z_algorithmtype[0];
}
if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
((flags & EROFS_GET_BLOCKS_READMORE) &&
......@@ -705,10 +731,10 @@ static int z_erofs_do_map_blocks(struct inode *inode,
return err;
}
int z_erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_blocks *map,
int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
int flags)
{
struct erofs_inode *const vi = EROFS_I(inode);
int err = 0;
trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
......@@ -725,6 +751,15 @@ int z_erofs_map_blocks_iter(struct inode *inode,
if (err)
goto out;
if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) &&
!vi->z_tailextent_headlcn) {
map->m_la = 0;
map->m_llen = inode->i_size;
map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED |
EROFS_MAP_FRAGMENT;
goto out;
}
err = z_erofs_do_map_blocks(inode, map, flags);
out:
trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
......@@ -751,7 +786,8 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
iomap->length = map.m_llen;
if (map.m_flags & EROFS_MAP_MAPPED) {
iomap->type = IOMAP_MAPPED;
iomap->addr = map.m_pa;
iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ?
IOMAP_NULL_ADDR : map.m_pa;
} else {
iomap->type = IOMAP_HOLE;
iomap->addr = IOMAP_NULL_ADDR;
......
......@@ -53,15 +53,14 @@ TRACE_EVENT(erofs_lookup,
);
TRACE_EVENT(erofs_fill_inode,
TP_PROTO(struct inode *inode, int isdir),
TP_ARGS(inode, isdir),
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(erofs_nid_t, nid )
__field(erofs_blk_t, blkaddr )
__field(unsigned int, ofs )
__field(int, isdir )
),
TP_fast_assign(
......@@ -69,13 +68,11 @@ TRACE_EVENT(erofs_fill_inode,
__entry->nid = EROFS_I(inode)->nid;
__entry->blkaddr = erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid));
__entry->ofs = erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid));
__entry->isdir = isdir;
),
TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u, isdir %d",
TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u",
show_dev_nid(__entry),
__entry->blkaddr, __entry->ofs,
__entry->isdir)
__entry->blkaddr, __entry->ofs)
);
TRACE_EVENT(erofs_readpage,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment