Commit 7674a42f authored by Gao Xiang's avatar Gao Xiang

erofs: use struct lockref to replace handcrafted approach

Let's avoid the current handcrafted lockref although `struct lockref`
inclusion usually increases extra 4 bytes with an explicit spinlock if
CONFIG_DEBUG_SPINLOCK is off.

Apart from the size difference, note that the meaning of refcount is
also changed to active users. IOWs, it doesn't take an extra refcount
for XArray tree insertion.

I don't observe any significant performance difference at least on
our cloud compute server but the new one indeed simplifies the
overall codebase a bit.
Signed-off-by: default avatarGao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: default avatarYue Hu <huyue2@coolpad.com>
Link: https://lore.kernel.org/r/20230529123727.79943-1-hsiangkao@linux.alibaba.com
parent 7b4e372c
...@@ -208,46 +208,12 @@ enum { ...@@ -208,46 +208,12 @@ enum {
EROFS_ZIP_CACHE_READAROUND EROFS_ZIP_CACHE_READAROUND
}; };
#define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL)
/* basic unit of the workstation of a super_block */ /* basic unit of the workstation of a super_block */
struct erofs_workgroup { struct erofs_workgroup {
/* the workgroup index in the workstation */
pgoff_t index; pgoff_t index;
struct lockref lockref;
/* overall workgroup reference count */
atomic_t refcount;
}; };
static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
int val)
{
preempt_disable();
if (val != atomic_cmpxchg(&grp->refcount, val, EROFS_LOCKED_MAGIC)) {
preempt_enable();
return false;
}
return true;
}
static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
int orig_val)
{
/*
* other observers should notice all modifications
* in the freezing period.
*/
smp_mb();
atomic_set(&grp->refcount, orig_val);
preempt_enable();
}
static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
{
return atomic_cond_read_relaxed(&grp->refcount,
VAL != EROFS_LOCKED_MAGIC);
}
enum erofs_kmap_type { enum erofs_kmap_type {
EROFS_NO_KMAP, /* don't map the buffer */ EROFS_NO_KMAP, /* don't map the buffer */
EROFS_KMAP, /* use kmap_local_page() to map the buffer */ EROFS_KMAP, /* use kmap_local_page() to map the buffer */
...@@ -486,7 +452,7 @@ static inline void erofs_pagepool_add(struct page **pagepool, struct page *page) ...@@ -486,7 +452,7 @@ static inline void erofs_pagepool_add(struct page **pagepool, struct page *page)
void erofs_release_pages(struct page **pagepool); void erofs_release_pages(struct page **pagepool);
#ifdef CONFIG_EROFS_FS_ZIP #ifdef CONFIG_EROFS_FS_ZIP
int erofs_workgroup_put(struct erofs_workgroup *grp); void erofs_workgroup_put(struct erofs_workgroup *grp);
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
pgoff_t index); pgoff_t index);
struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
* https://www.huawei.com/ * https://www.huawei.com/
*/ */
#include "internal.h" #include "internal.h"
#include <linux/pagevec.h>
struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp) struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp)
{ {
...@@ -33,22 +32,21 @@ void erofs_release_pages(struct page **pagepool) ...@@ -33,22 +32,21 @@ void erofs_release_pages(struct page **pagepool)
/* global shrink count (for all mounted EROFS instances) */ /* global shrink count (for all mounted EROFS instances) */
static atomic_long_t erofs_global_shrink_cnt; static atomic_long_t erofs_global_shrink_cnt;
static int erofs_workgroup_get(struct erofs_workgroup *grp) static bool erofs_workgroup_get(struct erofs_workgroup *grp)
{ {
int o; if (lockref_get_not_zero(&grp->lockref))
return true;
repeat: spin_lock(&grp->lockref.lock);
o = erofs_wait_on_workgroup_freezed(grp); if (__lockref_is_dead(&grp->lockref)) {
if (o <= 0) spin_unlock(&grp->lockref.lock);
return -1; return false;
}
if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
goto repeat;
/* decrease refcount paired by erofs_workgroup_put */ if (!grp->lockref.count++)
if (o == 1)
atomic_long_dec(&erofs_global_shrink_cnt); atomic_long_dec(&erofs_global_shrink_cnt);
return 0; spin_unlock(&grp->lockref.lock);
return true;
} }
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
...@@ -61,7 +59,7 @@ struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, ...@@ -61,7 +59,7 @@ struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
rcu_read_lock(); rcu_read_lock();
grp = xa_load(&sbi->managed_pslots, index); grp = xa_load(&sbi->managed_pslots, index);
if (grp) { if (grp) {
if (erofs_workgroup_get(grp)) { if (!erofs_workgroup_get(grp)) {
/* prefer to relax rcu read side */ /* prefer to relax rcu read side */
rcu_read_unlock(); rcu_read_unlock();
goto repeat; goto repeat;
...@@ -80,11 +78,10 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, ...@@ -80,11 +78,10 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
struct erofs_workgroup *pre; struct erofs_workgroup *pre;
/* /*
* Bump up a reference count before making this visible * Bump up before making this visible to others for the XArray in order
* to others for the XArray in order to avoid potential * to avoid potential UAF without serialized by xa_lock.
* UAF without serialized by xa_lock.
*/ */
atomic_inc(&grp->refcount); lockref_get(&grp->lockref);
repeat: repeat:
xa_lock(&sbi->managed_pslots); xa_lock(&sbi->managed_pslots);
...@@ -93,13 +90,13 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, ...@@ -93,13 +90,13 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
if (pre) { if (pre) {
if (xa_is_err(pre)) { if (xa_is_err(pre)) {
pre = ERR_PTR(xa_err(pre)); pre = ERR_PTR(xa_err(pre));
} else if (erofs_workgroup_get(pre)) { } else if (!erofs_workgroup_get(pre)) {
/* try to legitimize the current in-tree one */ /* try to legitimize the current in-tree one */
xa_unlock(&sbi->managed_pslots); xa_unlock(&sbi->managed_pslots);
cond_resched(); cond_resched();
goto repeat; goto repeat;
} }
atomic_dec(&grp->refcount); lockref_put_return(&grp->lockref);
grp = pre; grp = pre;
} }
xa_unlock(&sbi->managed_pslots); xa_unlock(&sbi->managed_pslots);
...@@ -112,38 +109,34 @@ static void __erofs_workgroup_free(struct erofs_workgroup *grp) ...@@ -112,38 +109,34 @@ static void __erofs_workgroup_free(struct erofs_workgroup *grp)
erofs_workgroup_free_rcu(grp); erofs_workgroup_free_rcu(grp);
} }
int erofs_workgroup_put(struct erofs_workgroup *grp) void erofs_workgroup_put(struct erofs_workgroup *grp)
{ {
int count = atomic_dec_return(&grp->refcount); if (lockref_put_or_lock(&grp->lockref))
return;
if (count == 1) DBG_BUGON(__lockref_is_dead(&grp->lockref));
if (grp->lockref.count == 1)
atomic_long_inc(&erofs_global_shrink_cnt); atomic_long_inc(&erofs_global_shrink_cnt);
else if (!count) --grp->lockref.count;
__erofs_workgroup_free(grp); spin_unlock(&grp->lockref.lock);
return count;
} }
static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
struct erofs_workgroup *grp) struct erofs_workgroup *grp)
{ {
/* int free = false;
* If managed cache is on, refcount of workgroups
* themselves could be < 0 (freezed). In other words, spin_lock(&grp->lockref.lock);
* there is no guarantee that all refcounts > 0. if (grp->lockref.count)
*/ goto out;
if (!erofs_workgroup_try_to_freeze(grp, 1))
return false;
/* /*
* Note that all cached pages should be unattached * Note that all cached pages should be detached before deleted from
* before deleted from the XArray. Otherwise some * the XArray. Otherwise some cached pages could be still attached to
* cached pages could be still attached to the orphan * the orphan old workgroup when the new one is available in the tree.
* old workgroup when the new one is available in the tree.
*/ */
if (erofs_try_to_free_all_cached_pages(sbi, grp)) { if (erofs_try_to_free_all_cached_pages(sbi, grp))
erofs_workgroup_unfreeze(grp, 1); goto out;
return false;
}
/* /*
* It's impossible to fail after the workgroup is freezed, * It's impossible to fail after the workgroup is freezed,
...@@ -152,10 +145,13 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, ...@@ -152,10 +145,13 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
*/ */
DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp); DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
/* last refcount should be connected with its managed pslot. */ lockref_mark_dead(&grp->lockref);
erofs_workgroup_unfreeze(grp, 0); free = true;
__erofs_workgroup_free(grp); out:
return true; spin_unlock(&grp->lockref.lock);
if (free)
__erofs_workgroup_free(grp);
return free;
} }
static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
......
...@@ -641,7 +641,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, ...@@ -641,7 +641,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
/* /*
* refcount of workgroup is now freezed as 1, * refcount of workgroup is now freezed as 0,
* therefore no need to worry about available decompression users. * therefore no need to worry about available decompression users.
*/ */
for (i = 0; i < pcl->pclusterpages; ++i) { for (i = 0; i < pcl->pclusterpages; ++i) {
...@@ -674,10 +674,11 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) ...@@ -674,10 +674,11 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
if (!folio_test_private(folio)) if (!folio_test_private(folio))
return true; return true;
if (!erofs_workgroup_try_to_freeze(&pcl->obj, 1))
return false;
ret = false; ret = false;
spin_lock(&pcl->obj.lockref.lock);
if (pcl->obj.lockref.count > 0)
goto out;
DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
for (i = 0; i < pcl->pclusterpages; ++i) { for (i = 0; i < pcl->pclusterpages; ++i) {
if (pcl->compressed_bvecs[i].page == &folio->page) { if (pcl->compressed_bvecs[i].page == &folio->page) {
...@@ -686,10 +687,10 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) ...@@ -686,10 +687,10 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
break; break;
} }
} }
erofs_workgroup_unfreeze(&pcl->obj, 1);
if (ret) if (ret)
folio_detach_private(folio); folio_detach_private(folio);
out:
spin_unlock(&pcl->obj.lockref.lock);
return ret; return ret;
} }
...@@ -805,7 +806,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe) ...@@ -805,7 +806,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
if (IS_ERR(pcl)) if (IS_ERR(pcl))
return PTR_ERR(pcl); return PTR_ERR(pcl);
atomic_set(&pcl->obj.refcount, 1); spin_lock_init(&pcl->obj.lockref.lock);
pcl->algorithmformat = map->m_algorithmformat; pcl->algorithmformat = map->m_algorithmformat;
pcl->length = 0; pcl->length = 0;
pcl->partial = true; pcl->partial = true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment