Commit bb29fd77 authored by Chengming Zhou's avatar Chengming Zhou Committed by Andrew Morton

mm/zswap: make sure each swapfile always have zswap rb-tree

Patch series "mm/zswap: optimize the scalability of zswap rb-tree", v2.

When testing the zswap performance by using kernel build -j32 in a tmpfs
directory, I found the scalability of zswap rb-tree is not good, which is
protected by the only spinlock.  That would cause heavy lock contention if
multiple tasks zswap_store/load concurrently.

So a simple solution is to split the only one zswap rb-tree into multiple
rb-trees, each corresponds to SWAP_ADDRESS_SPACE_PAGES (64M).  This idea
is from the commit 4b3ef9da ("mm/swap: split swap cache into 64MB
trunks").

Although this method can't solve the spinlock contention completely, it
can mitigate much of that contention.  Below is the results of kernel
build in tmpfs with zswap shrinker enabled:

     linux-next  zswap-lock-optimize
real 1m9.181s    1m3.820s
user 17m44.036s  17m40.100s
sys  7m37.297s   4m54.622s

So there are clearly improvements.  And it's complementary with the
ongoing zswap xarray conversion by Chris.  Anyway, I think we can also
merge this first, it's complementary IMHO.  So I just refresh and resend
this for further discussion.


This patch (of 2):

Not all zswap interfaces can handle the absence of the zswap rb-tree,
actually only zswap_store() has handled it for now.

To make things simple, we make sure each swapfile always have the zswap
rb-tree prepared before being enabled and used.  The preparation is
unlikely to fail in practice, this patch just make it explicit.

Link: https://lkml.kernel.org/r/20240117-b4-zswap-lock-optimize-v2-0-b5cc55479090@bytedance.com
Link: https://lkml.kernel.org/r/20240117-b4-zswap-lock-optimize-v2-1-b5cc55479090@bytedance.comSigned-off-by: default avatarChengming Zhou <zhouchengming@bytedance.com>
Acked-by: default avatarNhat Pham <nphamcs@gmail.com>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarYosry Ahmed <yosryahmed@google.com>
Cc: Chris Li <chriscli@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 3efbe13e
......@@ -30,7 +30,7 @@ struct zswap_lruvec_state {
bool zswap_store(struct folio *folio);
bool zswap_load(struct folio *folio);
void zswap_invalidate(int type, pgoff_t offset);
void zswap_swapon(int type);
int zswap_swapon(int type);
void zswap_swapoff(int type);
void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg);
void zswap_lruvec_state_init(struct lruvec *lruvec);
......@@ -51,7 +51,10 @@ static inline bool zswap_load(struct folio *folio)
}
static inline void zswap_invalidate(int type, pgoff_t offset) {}
static inline void zswap_swapon(int type) {}
static inline int zswap_swapon(int type)
{
return 0;
}
static inline void zswap_swapoff(int type) {}
static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {}
static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {}
......
......@@ -2348,8 +2348,6 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
unsigned char *swap_map,
struct swap_cluster_info *cluster_info)
{
zswap_swapon(p->type);
spin_lock(&swap_lock);
spin_lock(&p->lock);
setup_swap_info(p, prio, swap_map, cluster_info);
......@@ -3167,6 +3165,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
if (error)
goto bad_swap_unlock_inode;
error = zswap_swapon(p->type);
if (error)
goto free_swap_address_space;
/*
* Flush any pending IO and dirty mappings before we start using this
* swap device.
......@@ -3175,7 +3177,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = inode_drain_writes(inode);
if (error) {
inode->i_flags &= ~S_SWAPFILE;
goto free_swap_address_space;
goto free_swap_zswap;
}
mutex_lock(&swapon_mutex);
......@@ -3199,6 +3201,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = 0;
goto out;
free_swap_zswap:
zswap_swapoff(p->type);
free_swap_address_space:
exit_swap_address_space(p->type);
bad_swap_unlock_inode:
......
......@@ -1518,9 +1518,6 @@ bool zswap_store(struct folio *folio)
if (folio_test_large(folio))
return false;
if (!tree)
return false;
/*
* If this is a duplicate, it must be removed before attempting to store
* it, otherwise, if the store fails the old page won't be removed from
......@@ -1775,19 +1772,20 @@ void zswap_invalidate(int type, pgoff_t offset)
spin_unlock(&tree->lock);
}
void zswap_swapon(int type)
int zswap_swapon(int type)
{
struct zswap_tree *tree;
tree = kzalloc(sizeof(*tree), GFP_KERNEL);
if (!tree) {
pr_err("alloc failed, zswap disabled for swap type %d\n", type);
return;
return -ENOMEM;
}
tree->rbroot = RB_ROOT;
spin_lock_init(&tree->lock);
zswap_trees[type] = tree;
return 0;
}
void zswap_swapoff(int type)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment