Commit 87ca34a7 authored by Gao Xiang's avatar Gao Xiang

erofs: get rid of `struct z_erofs_collection'

It was incompletely introduced for deduplication between different
logical extents backed with the same pcluster.

We will have a better in-memory representation in the next release
cycle for this, as well as partial memory folios support. So get rid
of it instead.

No logic changes.

Link: https://lore.kernel.org/r/20220529055425.226363-2-xiang@kernel.orgAcked-by: default avatarChao Yu <chao@kernel.org>
Signed-off-by: default avatarGao Xiang <hsiangkao@linux.alibaba.com>
parent 6e95d0a0
...@@ -199,7 +199,6 @@ struct z_erofs_decompress_frontend { ...@@ -199,7 +199,6 @@ struct z_erofs_decompress_frontend {
struct z_erofs_pagevec_ctor vector; struct z_erofs_pagevec_ctor vector;
struct z_erofs_pcluster *pcl, *tailpcl; struct z_erofs_pcluster *pcl, *tailpcl;
struct z_erofs_collection *cl;
/* a pointer used to pick up inplace I/O pages */ /* a pointer used to pick up inplace I/O pages */
struct page **icpage_ptr; struct page **icpage_ptr;
z_erofs_next_pcluster_t owned_head; z_erofs_next_pcluster_t owned_head;
...@@ -357,7 +356,7 @@ static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe, ...@@ -357,7 +356,7 @@ static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe,
return false; return false;
} }
/* callers must be with collection lock held */ /* callers must be with pcluster lock held */
static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
struct page *page, enum z_erofs_page_type type, struct page *page, enum z_erofs_page_type type,
bool pvec_safereuse) bool pvec_safereuse)
...@@ -372,7 +371,7 @@ static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, ...@@ -372,7 +371,7 @@ static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
ret = z_erofs_pagevec_enqueue(&fe->vector, page, type, ret = z_erofs_pagevec_enqueue(&fe->vector, page, type,
pvec_safereuse); pvec_safereuse);
fe->cl->vcnt += (unsigned int)ret; fe->pcl->vcnt += (unsigned int)ret;
return ret ? 0 : -EAGAIN; return ret ? 0 : -EAGAIN;
} }
...@@ -405,12 +404,11 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f) ...@@ -405,12 +404,11 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
f->mode = COLLECT_PRIMARY; f->mode = COLLECT_PRIMARY;
} }
static int z_erofs_lookup_collection(struct z_erofs_decompress_frontend *fe, static int z_erofs_lookup_pcluster(struct z_erofs_decompress_frontend *fe,
struct inode *inode, struct inode *inode,
struct erofs_map_blocks *map) struct erofs_map_blocks *map)
{ {
struct z_erofs_pcluster *pcl = fe->pcl; struct z_erofs_pcluster *pcl = fe->pcl;
struct z_erofs_collection *cl;
unsigned int length; unsigned int length;
/* to avoid unexpected loop formed by corrupted images */ /* to avoid unexpected loop formed by corrupted images */
...@@ -419,8 +417,7 @@ static int z_erofs_lookup_collection(struct z_erofs_decompress_frontend *fe, ...@@ -419,8 +417,7 @@ static int z_erofs_lookup_collection(struct z_erofs_decompress_frontend *fe,
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
cl = z_erofs_primarycollection(pcl); if (pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) {
if (cl->pageofs != (map->m_la & ~PAGE_MASK)) {
DBG_BUGON(1); DBG_BUGON(1);
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
...@@ -443,23 +440,21 @@ static int z_erofs_lookup_collection(struct z_erofs_decompress_frontend *fe, ...@@ -443,23 +440,21 @@ static int z_erofs_lookup_collection(struct z_erofs_decompress_frontend *fe,
length = READ_ONCE(pcl->length); length = READ_ONCE(pcl->length);
} }
} }
mutex_lock(&cl->lock); mutex_lock(&pcl->lock);
/* used to check tail merging loop due to corrupted images */ /* used to check tail merging loop due to corrupted images */
if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL) if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
fe->tailpcl = pcl; fe->tailpcl = pcl;
z_erofs_try_to_claim_pcluster(fe); z_erofs_try_to_claim_pcluster(fe);
fe->cl = cl;
return 0; return 0;
} }
static int z_erofs_register_collection(struct z_erofs_decompress_frontend *fe, static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe,
struct inode *inode, struct inode *inode,
struct erofs_map_blocks *map) struct erofs_map_blocks *map)
{ {
bool ztailpacking = map->m_flags & EROFS_MAP_META; bool ztailpacking = map->m_flags & EROFS_MAP_META;
struct z_erofs_pcluster *pcl; struct z_erofs_pcluster *pcl;
struct z_erofs_collection *cl;
struct erofs_workgroup *grp; struct erofs_workgroup *grp;
int err; int err;
...@@ -482,17 +477,15 @@ static int z_erofs_register_collection(struct z_erofs_decompress_frontend *fe, ...@@ -482,17 +477,15 @@ static int z_erofs_register_collection(struct z_erofs_decompress_frontend *fe,
/* new pclusters should be claimed as type 1, primary and followed */ /* new pclusters should be claimed as type 1, primary and followed */
pcl->next = fe->owned_head; pcl->next = fe->owned_head;
pcl->pageofs_out = map->m_la & ~PAGE_MASK;
fe->mode = COLLECT_PRIMARY_FOLLOWED; fe->mode = COLLECT_PRIMARY_FOLLOWED;
cl = z_erofs_primarycollection(pcl);
cl->pageofs = map->m_la & ~PAGE_MASK;
/* /*
* lock all primary followed works before visible to others * lock all primary followed works before visible to others
* and mutex_trylock *never* fails for a new pcluster. * and mutex_trylock *never* fails for a new pcluster.
*/ */
mutex_init(&cl->lock); mutex_init(&pcl->lock);
DBG_BUGON(!mutex_trylock(&cl->lock)); DBG_BUGON(!mutex_trylock(&pcl->lock));
if (ztailpacking) { if (ztailpacking) {
pcl->obj.index = 0; /* which indicates ztailpacking */ pcl->obj.index = 0; /* which indicates ztailpacking */
...@@ -519,11 +512,10 @@ static int z_erofs_register_collection(struct z_erofs_decompress_frontend *fe, ...@@ -519,11 +512,10 @@ static int z_erofs_register_collection(struct z_erofs_decompress_frontend *fe,
fe->tailpcl = pcl; fe->tailpcl = pcl;
fe->owned_head = &pcl->next; fe->owned_head = &pcl->next;
fe->pcl = pcl; fe->pcl = pcl;
fe->cl = cl;
return 0; return 0;
err_out: err_out:
mutex_unlock(&cl->lock); mutex_unlock(&pcl->lock);
z_erofs_free_pcluster(pcl); z_erofs_free_pcluster(pcl);
return err; return err;
} }
...@@ -535,9 +527,9 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe, ...@@ -535,9 +527,9 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
struct erofs_workgroup *grp; struct erofs_workgroup *grp;
int ret; int ret;
DBG_BUGON(fe->cl); DBG_BUGON(fe->pcl);
/* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */ /* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous pcluster */
DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL); DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
...@@ -554,14 +546,14 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe, ...@@ -554,14 +546,14 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
fe->pcl = container_of(grp, struct z_erofs_pcluster, obj); fe->pcl = container_of(grp, struct z_erofs_pcluster, obj);
} else { } else {
tailpacking: tailpacking:
ret = z_erofs_register_collection(fe, inode, map); ret = z_erofs_register_pcluster(fe, inode, map);
if (!ret) if (!ret)
goto out; goto out;
if (ret != -EEXIST) if (ret != -EEXIST)
return ret; return ret;
} }
ret = z_erofs_lookup_collection(fe, inode, map); ret = z_erofs_lookup_pcluster(fe, inode, map);
if (ret) { if (ret) {
erofs_workgroup_put(&fe->pcl->obj); erofs_workgroup_put(&fe->pcl->obj);
return ret; return ret;
...@@ -569,7 +561,7 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe, ...@@ -569,7 +561,7 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
out: out:
z_erofs_pagevec_ctor_init(&fe->vector, Z_EROFS_NR_INLINE_PAGEVECS, z_erofs_pagevec_ctor_init(&fe->vector, Z_EROFS_NR_INLINE_PAGEVECS,
fe->cl->pagevec, fe->cl->vcnt); fe->pcl->pagevec, fe->pcl->vcnt);
/* since file-backed online pages are traversed in reverse order */ /* since file-backed online pages are traversed in reverse order */
fe->icpage_ptr = fe->pcl->compressed_pages + fe->icpage_ptr = fe->pcl->compressed_pages +
z_erofs_pclusterpages(fe->pcl); z_erofs_pclusterpages(fe->pcl);
...@@ -582,48 +574,36 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe, ...@@ -582,48 +574,36 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
*/ */
static void z_erofs_rcu_callback(struct rcu_head *head) static void z_erofs_rcu_callback(struct rcu_head *head)
{ {
struct z_erofs_collection *const cl = z_erofs_free_pcluster(container_of(head,
container_of(head, struct z_erofs_collection, rcu); struct z_erofs_pcluster, rcu));
z_erofs_free_pcluster(container_of(cl, struct z_erofs_pcluster,
primary_collection));
} }
void erofs_workgroup_free_rcu(struct erofs_workgroup *grp) void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
{ {
struct z_erofs_pcluster *const pcl = struct z_erofs_pcluster *const pcl =
container_of(grp, struct z_erofs_pcluster, obj); container_of(grp, struct z_erofs_pcluster, obj);
struct z_erofs_collection *const cl = z_erofs_primarycollection(pcl);
call_rcu(&cl->rcu, z_erofs_rcu_callback); call_rcu(&pcl->rcu, z_erofs_rcu_callback);
}
static void z_erofs_collection_put(struct z_erofs_collection *cl)
{
struct z_erofs_pcluster *const pcl =
container_of(cl, struct z_erofs_pcluster, primary_collection);
erofs_workgroup_put(&pcl->obj);
} }
static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe) static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
{ {
struct z_erofs_collection *cl = fe->cl; struct z_erofs_pcluster *pcl = fe->pcl;
if (!cl) if (!pcl)
return false; return false;
z_erofs_pagevec_ctor_exit(&fe->vector, false); z_erofs_pagevec_ctor_exit(&fe->vector, false);
mutex_unlock(&cl->lock); mutex_unlock(&pcl->lock);
/* /*
* if all pending pages are added, don't hold its reference * if all pending pages are added, don't hold its reference
* any longer if the pcluster isn't hosted by ourselves. * any longer if the pcluster isn't hosted by ourselves.
*/ */
if (fe->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE) if (fe->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
z_erofs_collection_put(cl); erofs_workgroup_put(&pcl->obj);
fe->cl = NULL; fe->pcl = NULL;
return true; return true;
} }
...@@ -666,8 +646,8 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, ...@@ -666,8 +646,8 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
/* lucky, within the range of the current map_blocks */ /* lucky, within the range of the current map_blocks */
if (offset + cur >= map->m_la && if (offset + cur >= map->m_la &&
offset + cur < map->m_la + map->m_llen) { offset + cur < map->m_la + map->m_llen) {
/* didn't get a valid collection previously (very rare) */ /* didn't get a valid pcluster previously (very rare) */
if (!fe->cl) if (!fe->pcl)
goto restart_now; goto restart_now;
goto hitted; goto hitted;
} }
...@@ -766,7 +746,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, ...@@ -766,7 +746,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
/* bump up the number of spiltted parts of a page */ /* bump up the number of spiltted parts of a page */
++spiltted; ++spiltted;
/* also update nr_pages */ /* also update nr_pages */
fe->cl->nr_pages = max_t(pgoff_t, fe->cl->nr_pages, index + 1); fe->pcl->nr_pages = max_t(pgoff_t, fe->pcl->nr_pages, index + 1);
next_part: next_part:
/* can be used for verification */ /* can be used for verification */
map->m_llen = offset + cur - map->m_la; map->m_llen = offset + cur - map->m_la;
...@@ -821,15 +801,13 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, ...@@ -821,15 +801,13 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
enum z_erofs_page_type page_type; enum z_erofs_page_type page_type;
bool overlapped, partial; bool overlapped, partial;
struct z_erofs_collection *cl;
int err; int err;
might_sleep(); might_sleep();
cl = z_erofs_primarycollection(pcl); DBG_BUGON(!READ_ONCE(pcl->nr_pages));
DBG_BUGON(!READ_ONCE(cl->nr_pages));
mutex_lock(&cl->lock); mutex_lock(&pcl->lock);
nr_pages = cl->nr_pages; nr_pages = pcl->nr_pages;
if (nr_pages <= Z_EROFS_VMAP_ONSTACK_PAGES) { if (nr_pages <= Z_EROFS_VMAP_ONSTACK_PAGES) {
pages = pages_onstack; pages = pages_onstack;
...@@ -857,9 +835,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, ...@@ -857,9 +835,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
err = 0; err = 0;
z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS, z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS,
cl->pagevec, 0); pcl->pagevec, 0);
for (i = 0; i < cl->vcnt; ++i) { for (i = 0; i < pcl->vcnt; ++i) {
unsigned int pagenr; unsigned int pagenr;
page = z_erofs_pagevec_dequeue(&ctor, &page_type); page = z_erofs_pagevec_dequeue(&ctor, &page_type);
...@@ -945,11 +923,11 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, ...@@ -945,11 +923,11 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
goto out; goto out;
llen = pcl->length >> Z_EROFS_PCLUSTER_LENGTH_BIT; llen = pcl->length >> Z_EROFS_PCLUSTER_LENGTH_BIT;
if (nr_pages << PAGE_SHIFT >= cl->pageofs + llen) { if (nr_pages << PAGE_SHIFT >= pcl->pageofs_out + llen) {
outputsize = llen; outputsize = llen;
partial = !(pcl->length & Z_EROFS_PCLUSTER_FULL_LENGTH); partial = !(pcl->length & Z_EROFS_PCLUSTER_FULL_LENGTH);
} else { } else {
outputsize = (nr_pages << PAGE_SHIFT) - cl->pageofs; outputsize = (nr_pages << PAGE_SHIFT) - pcl->pageofs_out;
partial = true; partial = true;
} }
...@@ -963,7 +941,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, ...@@ -963,7 +941,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
.in = compressed_pages, .in = compressed_pages,
.out = pages, .out = pages,
.pageofs_in = pcl->pageofs_in, .pageofs_in = pcl->pageofs_in,
.pageofs_out = cl->pageofs, .pageofs_out = pcl->pageofs_out,
.inputsize = inputsize, .inputsize = inputsize,
.outputsize = outputsize, .outputsize = outputsize,
.alg = pcl->algorithmformat, .alg = pcl->algorithmformat,
...@@ -1012,16 +990,12 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, ...@@ -1012,16 +990,12 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
else if (pages != pages_onstack) else if (pages != pages_onstack)
kvfree(pages); kvfree(pages);
cl->nr_pages = 0; pcl->nr_pages = 0;
cl->vcnt = 0; pcl->vcnt = 0;
/* all cl locks MUST be taken before the following line */ /* pcluster lock MUST be taken before the following line */
WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL); WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL);
mutex_unlock(&pcl->lock);
/* all cl locks SHOULD be released right now */
mutex_unlock(&cl->lock);
z_erofs_collection_put(cl);
return err; return err;
} }
...@@ -1043,6 +1017,7 @@ static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, ...@@ -1043,6 +1017,7 @@ static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
owned = READ_ONCE(pcl->next); owned = READ_ONCE(pcl->next);
z_erofs_decompress_pcluster(io->sb, pcl, pagepool); z_erofs_decompress_pcluster(io->sb, pcl, pagepool);
erofs_workgroup_put(&pcl->obj);
} }
} }
......
...@@ -12,21 +12,40 @@ ...@@ -12,21 +12,40 @@
#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE) #define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE)
#define Z_EROFS_NR_INLINE_PAGEVECS 3 #define Z_EROFS_NR_INLINE_PAGEVECS 3
#define Z_EROFS_PCLUSTER_FULL_LENGTH 0x00000001
#define Z_EROFS_PCLUSTER_LENGTH_BIT 1
/*
* let's leave a type here in case of introducing
* another tagged pointer later.
*/
typedef void *z_erofs_next_pcluster_t;
/* /*
* Structure fields follow one of the following exclusion rules. * Structure fields follow one of the following exclusion rules.
* *
* I: Modifiable by initialization/destruction paths and read-only * I: Modifiable by initialization/destruction paths and read-only
* for everyone else; * for everyone else;
* *
* L: Field should be protected by pageset lock; * L: Field should be protected by the pcluster lock;
* *
* A: Field should be accessed / updated in atomic for parallelized code. * A: Field should be accessed / updated in atomic for parallelized code.
*/ */
struct z_erofs_collection { struct z_erofs_pcluster {
struct erofs_workgroup obj;
struct mutex lock; struct mutex lock;
/* A: point to next chained pcluster or TAILs */
z_erofs_next_pcluster_t next;
/* A: lower limit of decompressed length and if full length or not */
unsigned int length;
/* I: page offset of start position of decompression */ /* I: page offset of start position of decompression */
unsigned short pageofs; unsigned short pageofs_out;
/* I: page offset of inline compressed data */
unsigned short pageofs_in;
/* L: maximum relative page index in pagevec[] */ /* L: maximum relative page index in pagevec[] */
unsigned short nr_pages; unsigned short nr_pages;
...@@ -41,29 +60,6 @@ struct z_erofs_collection { ...@@ -41,29 +60,6 @@ struct z_erofs_collection {
/* I: can be used to free the pcluster by RCU. */ /* I: can be used to free the pcluster by RCU. */
struct rcu_head rcu; struct rcu_head rcu;
}; };
};
#define Z_EROFS_PCLUSTER_FULL_LENGTH 0x00000001
#define Z_EROFS_PCLUSTER_LENGTH_BIT 1
/*
* let's leave a type here in case of introducing
* another tagged pointer later.
*/
typedef void *z_erofs_next_pcluster_t;
struct z_erofs_pcluster {
struct erofs_workgroup obj;
struct z_erofs_collection primary_collection;
/* A: point to next chained pcluster or TAILs */
z_erofs_next_pcluster_t next;
/* A: lower limit of decompressed length and if full length or not */
unsigned int length;
/* I: page offset of inline compressed data */
unsigned short pageofs_in;
union { union {
/* I: physical cluster size in pages */ /* I: physical cluster size in pages */
...@@ -80,8 +76,6 @@ struct z_erofs_pcluster { ...@@ -80,8 +76,6 @@ struct z_erofs_pcluster {
struct page *compressed_pages[]; struct page *compressed_pages[];
}; };
#define z_erofs_primarycollection(pcluster) (&(pcluster)->primary_collection)
/* let's avoid the valid 32-bit kernel addresses */ /* let's avoid the valid 32-bit kernel addresses */
/* the chained workgroup has't submitted io (still open) */ /* the chained workgroup has't submitted io (still open) */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment