Commit 8edc9c4e authored by Chengming Zhou's avatar Chengming Zhou Committed by Andrew Morton

mm/zswap: use only one pool in zswap

Zswap uses 32 pools to workaround the locking scalability problem in zswap
backends (mainly zsmalloc nowadays), which brings its own problems like
memory waste and more memory fragmentation.

Testing results show that we can have near performance with only one pool
in zswap after changing zsmalloc to use per-size_class lock instead of
pool spinlock.

Testing kernel build (make bzImage -j32) on tmpfs with memory.max=1GB, and
zswap shrinker enabled with 10GB swapfile on ext4.

                                real    user    sys
6.10.0-rc3                      138.18  1241.38 1452.73
6.10.0-rc3-onepool              149.45  1240.45 1844.69
6.10.0-rc3-onepool-perclass     138.23  1242.37 1469.71

And do the same testing using zbud, which shows a little worse performance
as expected since we don't do any locking optimization for zbud.  I think
it's acceptable since zsmalloc became a lot more popular than other
backends, and we may want to support only zsmalloc in the future.

                                real    user    sys
6.10.0-rc3-zbud			138.23  1239.58 1430.09
6.10.0-rc3-onepool-zbud		139.64  1241.37 1516.59

[chengming.zhou@linux.dev: fix error handling in zswap_pool_create(), per Dan Carpenter]
  Link: https://lkml.kernel.org/r/20240621-zsmalloc-lock-mm-everything-v2-2-d30e9cd2b793@linux.dev
[chengming.zhou@linux.dev: fix error handling again in zswap_pool_create(), per Yosry]
  Link: https://lkml.kernel.org/r/20240625-zsmalloc-lock-mm-everything-v3-2-ad941699cb61@linux.dev
Link: https://lkml.kernel.org/r/20240617-zsmalloc-lock-mm-everything-v1-2-5e5081ea11b3@linux.devSigned-off-by: default avatarChengming Zhou <chengming.zhou@linux.dev>
Reviewed-by: default avatarNhat Pham <nphamcs@gmail.com>
Acked-by: default avatarYosry Ahmed <yosryahmed@google.com>
Cc: Chengming Zhou <zhouchengming@bytedance.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 64bd0197
...@@ -124,9 +124,6 @@ static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ ...@@ -124,9 +124,6 @@ static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
module_param_named(accept_threshold_percent, zswap_accept_thr_percent, module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
uint, 0644); uint, 0644);
/* Number of zpools in zswap_pool (empirically determined for scalability) */
#define ZSWAP_NR_ZPOOLS 32
/* Enable/disable memory pressure-based shrinker. */ /* Enable/disable memory pressure-based shrinker. */
static bool zswap_shrinker_enabled = IS_ENABLED( static bool zswap_shrinker_enabled = IS_ENABLED(
CONFIG_ZSWAP_SHRINKER_DEFAULT_ON); CONFIG_ZSWAP_SHRINKER_DEFAULT_ON);
...@@ -162,7 +159,7 @@ struct crypto_acomp_ctx { ...@@ -162,7 +159,7 @@ struct crypto_acomp_ctx {
* needs to be verified that it's still valid in the tree. * needs to be verified that it's still valid in the tree.
*/ */
struct zswap_pool { struct zswap_pool {
struct zpool *zpools[ZSWAP_NR_ZPOOLS]; struct zpool *zpool;
struct crypto_acomp_ctx __percpu *acomp_ctx; struct crypto_acomp_ctx __percpu *acomp_ctx;
struct percpu_ref ref; struct percpu_ref ref;
struct list_head list; struct list_head list;
...@@ -244,7 +241,7 @@ static inline struct xarray *swap_zswap_tree(swp_entry_t swp) ...@@ -244,7 +241,7 @@ static inline struct xarray *swap_zswap_tree(swp_entry_t swp)
#define zswap_pool_debug(msg, p) \ #define zswap_pool_debug(msg, p) \
pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \
zpool_get_type((p)->zpools[0])) zpool_get_type((p)->zpool))
/********************************* /*********************************
* pool functions * pool functions
...@@ -253,7 +250,6 @@ static void __zswap_pool_empty(struct percpu_ref *ref); ...@@ -253,7 +250,6 @@ static void __zswap_pool_empty(struct percpu_ref *ref);
static struct zswap_pool *zswap_pool_create(char *type, char *compressor) static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
{ {
int i;
struct zswap_pool *pool; struct zswap_pool *pool;
char name[38]; /* 'zswap' + 32 char (max) num + \0 */ char name[38]; /* 'zswap' + 32 char (max) num + \0 */
gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
...@@ -274,18 +270,14 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) ...@@ -274,18 +270,14 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
if (!pool) if (!pool)
return NULL; return NULL;
for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) { /* unique name for each pool specifically required by zsmalloc */
/* unique name for each pool specifically required by zsmalloc */ snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
snprintf(name, 38, "zswap%x", pool->zpool = zpool_create_pool(type, name, gfp);
atomic_inc_return(&zswap_pools_count)); if (!pool->zpool) {
pr_err("%s zpool not available\n", type);
pool->zpools[i] = zpool_create_pool(type, name, gfp); goto error;
if (!pool->zpools[i]) {
pr_err("%s zpool not available\n", type);
goto error;
}
} }
pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0])); pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
...@@ -318,8 +310,8 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) ...@@ -318,8 +310,8 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
error: error:
if (pool->acomp_ctx) if (pool->acomp_ctx)
free_percpu(pool->acomp_ctx); free_percpu(pool->acomp_ctx);
while (i--) if (pool->zpool)
zpool_destroy_pool(pool->zpools[i]); zpool_destroy_pool(pool->zpool);
kfree(pool); kfree(pool);
return NULL; return NULL;
} }
...@@ -368,15 +360,12 @@ static struct zswap_pool *__zswap_pool_create_fallback(void) ...@@ -368,15 +360,12 @@ static struct zswap_pool *__zswap_pool_create_fallback(void)
static void zswap_pool_destroy(struct zswap_pool *pool) static void zswap_pool_destroy(struct zswap_pool *pool)
{ {
int i;
zswap_pool_debug("destroying", pool); zswap_pool_debug("destroying", pool);
cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
free_percpu(pool->acomp_ctx); free_percpu(pool->acomp_ctx);
for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) zpool_destroy_pool(pool->zpool);
zpool_destroy_pool(pool->zpools[i]);
kfree(pool); kfree(pool);
} }
...@@ -471,8 +460,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) ...@@ -471,8 +460,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
list_for_each_entry_rcu(pool, &zswap_pools, list) { list_for_each_entry_rcu(pool, &zswap_pools, list) {
if (strcmp(pool->tfm_name, compressor)) if (strcmp(pool->tfm_name, compressor))
continue; continue;
/* all zpools share the same type */ if (strcmp(zpool_get_type(pool->zpool), type))
if (strcmp(zpool_get_type(pool->zpools[0]), type))
continue; continue;
/* if we can't get it, it's about to be destroyed */ /* if we can't get it, it's about to be destroyed */
if (!zswap_pool_get(pool)) if (!zswap_pool_get(pool))
...@@ -499,12 +487,8 @@ unsigned long zswap_total_pages(void) ...@@ -499,12 +487,8 @@ unsigned long zswap_total_pages(void)
unsigned long total = 0; unsigned long total = 0;
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(pool, &zswap_pools, list) { list_for_each_entry_rcu(pool, &zswap_pools, list)
int i; total += zpool_get_total_pages(pool->zpool);
for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
total += zpool_get_total_pages(pool->zpools[i]);
}
rcu_read_unlock(); rcu_read_unlock();
return total; return total;
...@@ -809,11 +793,6 @@ static void zswap_entry_cache_free(struct zswap_entry *entry) ...@@ -809,11 +793,6 @@ static void zswap_entry_cache_free(struct zswap_entry *entry)
kmem_cache_free(zswap_entry_cache, entry); kmem_cache_free(zswap_entry_cache, entry);
} }
static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
{
return entry->pool->zpools[hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS))];
}
/* /*
* Carries out the common pattern of freeing and entry's zpool allocation, * Carries out the common pattern of freeing and entry's zpool allocation,
* freeing the entry itself, and decrementing the number of stored pages. * freeing the entry itself, and decrementing the number of stored pages.
...@@ -824,7 +803,7 @@ static void zswap_entry_free(struct zswap_entry *entry) ...@@ -824,7 +803,7 @@ static void zswap_entry_free(struct zswap_entry *entry)
atomic_dec(&zswap_same_filled_pages); atomic_dec(&zswap_same_filled_pages);
else { else {
zswap_lru_del(&zswap_list_lru, entry); zswap_lru_del(&zswap_list_lru, entry);
zpool_free(zswap_find_zpool(entry), entry->handle); zpool_free(entry->pool->zpool, entry->handle);
zswap_pool_put(entry->pool); zswap_pool_put(entry->pool);
} }
if (entry->objcg) { if (entry->objcg) {
...@@ -950,7 +929,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) ...@@ -950,7 +929,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
if (comp_ret) if (comp_ret)
goto unlock; goto unlock;
zpool = zswap_find_zpool(entry); zpool = entry->pool->zpool;
gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
if (zpool_malloc_support_movable(zpool)) if (zpool_malloc_support_movable(zpool))
gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
...@@ -979,7 +958,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) ...@@ -979,7 +958,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
static void zswap_decompress(struct zswap_entry *entry, struct folio *folio) static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
{ {
struct zpool *zpool = zswap_find_zpool(entry); struct zpool *zpool = entry->pool->zpool;
struct scatterlist input, output; struct scatterlist input, output;
struct crypto_acomp_ctx *acomp_ctx; struct crypto_acomp_ctx *acomp_ctx;
u8 *src; u8 *src;
...@@ -1531,7 +1510,7 @@ bool zswap_store(struct folio *folio) ...@@ -1531,7 +1510,7 @@ bool zswap_store(struct folio *folio)
if (!entry->length) if (!entry->length)
atomic_dec(&zswap_same_filled_pages); atomic_dec(&zswap_same_filled_pages);
else { else {
zpool_free(zswap_find_zpool(entry), entry->handle); zpool_free(entry->pool->zpool, entry->handle);
put_pool: put_pool:
zswap_pool_put(entry->pool); zswap_pool_put(entry->pool);
} }
...@@ -1753,7 +1732,7 @@ static int zswap_setup(void) ...@@ -1753,7 +1732,7 @@ static int zswap_setup(void)
pool = __zswap_pool_create_fallback(); pool = __zswap_pool_create_fallback();
if (pool) { if (pool) {
pr_info("loaded using pool %s/%s\n", pool->tfm_name, pr_info("loaded using pool %s/%s\n", pool->tfm_name,
zpool_get_type(pool->zpools[0])); zpool_get_type(pool->zpool));
list_add(&pool->list, &zswap_pools); list_add(&pool->list, &zswap_pools);
zswap_has_pool = true; zswap_has_pool = true;
static_branch_enable(&zswap_ever_enabled); static_branch_enable(&zswap_ever_enabled);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment