Commit 966b9016 authored by Dan Magenheimer's avatar Dan Magenheimer Committed by Greg Kroah-Hartman

staging: zcache: support multiple clients, prep for KVM and RAMster

This is version 3 of an update to zcache, incorporating feedback from the list.
This patch adds support to the in-kernel transcendent memory ("tmem") code
and the zcache driver for multiple clients, which will be needed for both
RAMster and KVM support.  It also adds additional tmem callbacks to support
RAMster and corresponding no-op stubs in the zcache driver.  In v2, I've
also taken the liberty of adding some additional sysfs variables to
both surface information and allow policy control.  Those experimenting
with zcache should find them useful.  V3 clarifies some code walking
and declaring arrays.
Signed-off-by: default avatarDan Magenheimer <dan.magenheimer@oracle.com>

[v3: error27@gmail.com: fix array bounds/walking]
[v2: konrad.wilk@oracle.com: fix bools, add check for NULL, fix a comment]
[v2: sjenning@linux.vnet.ibm.com: add info/tunables for poor compression]
[v2: marcusklemm@googlemail.com: add tunable for max persistent pages]
Acked-by: default avatarDan Carpenter <error27@gmail.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: linux-mm@kvack.org
Cc: kvm@vger.kernel.org
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@suse.de>
parent 94c97e8e
...@@ -142,6 +142,7 @@ static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb, ...@@ -142,6 +142,7 @@ static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb,
obj->oid = *oidp; obj->oid = *oidp;
obj->objnode_count = 0; obj->objnode_count = 0;
obj->pampd_count = 0; obj->pampd_count = 0;
(*tmem_pamops.new_obj)(obj);
SET_SENTINEL(obj, OBJ); SET_SENTINEL(obj, OBJ);
while (*new) { while (*new) {
BUG_ON(RB_EMPTY_NODE(*new)); BUG_ON(RB_EMPTY_NODE(*new));
...@@ -274,7 +275,7 @@ static void tmem_objnode_free(struct tmem_objnode *objnode) ...@@ -274,7 +275,7 @@ static void tmem_objnode_free(struct tmem_objnode *objnode)
/* /*
* lookup index in object and return associated pampd (or NULL if not found) * lookup index in object and return associated pampd (or NULL if not found)
*/ */
static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
{ {
unsigned int height, shift; unsigned int height, shift;
struct tmem_objnode **slot = NULL; struct tmem_objnode **slot = NULL;
...@@ -303,9 +304,33 @@ static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) ...@@ -303,9 +304,33 @@ static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
height--; height--;
} }
out: out:
return slot != NULL ? (void **)slot : NULL;
}
static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
{
struct tmem_objnode **slot;
slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
return slot != NULL ? *slot : NULL; return slot != NULL ? *slot : NULL;
} }
static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index,
void *new_pampd)
{
struct tmem_objnode **slot;
void *ret = NULL;
slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
if ((slot != NULL) && (*slot != NULL)) {
void *old_pampd = *(void **)slot;
*(void **)slot = new_pampd;
(*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0);
ret = new_pampd;
}
return ret;
}
static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index, static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index,
void *pampd) void *pampd)
{ {
...@@ -456,7 +481,7 @@ static void tmem_objnode_node_destroy(struct tmem_obj *obj, ...@@ -456,7 +481,7 @@ static void tmem_objnode_node_destroy(struct tmem_obj *obj,
if (ht == 1) { if (ht == 1) {
obj->pampd_count--; obj->pampd_count--;
(*tmem_pamops.free)(objnode->slots[i], (*tmem_pamops.free)(objnode->slots[i],
obj->pool); obj->pool, NULL, 0);
objnode->slots[i] = NULL; objnode->slots[i] = NULL;
continue; continue;
} }
...@@ -473,7 +498,7 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj) ...@@ -473,7 +498,7 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
return; return;
if (obj->objnode_tree_height == 0) { if (obj->objnode_tree_height == 0) {
obj->pampd_count--; obj->pampd_count--;
(*tmem_pamops.free)(obj->objnode_tree_root, obj->pool); (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0);
} else { } else {
tmem_objnode_node_destroy(obj, obj->objnode_tree_root, tmem_objnode_node_destroy(obj, obj->objnode_tree_root,
obj->objnode_tree_height); obj->objnode_tree_height);
...@@ -481,6 +506,7 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj) ...@@ -481,6 +506,7 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
obj->objnode_tree_height = 0; obj->objnode_tree_height = 0;
} }
obj->objnode_tree_root = NULL; obj->objnode_tree_root = NULL;
(*tmem_pamops.free_obj)(obj->pool, obj);
} }
/* /*
...@@ -503,15 +529,13 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj) ...@@ -503,15 +529,13 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
* always flushes for simplicity. * always flushes for simplicity.
*/ */
int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
struct page *page) char *data, size_t size, bool raw, bool ephemeral)
{ {
struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL; struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL;
void *pampd = NULL, *pampd_del = NULL; void *pampd = NULL, *pampd_del = NULL;
int ret = -ENOMEM; int ret = -ENOMEM;
bool ephemeral;
struct tmem_hashbucket *hb; struct tmem_hashbucket *hb;
ephemeral = is_ephemeral(pool);
hb = &pool->hashbucket[tmem_oid_hash(oidp)]; hb = &pool->hashbucket[tmem_oid_hash(oidp)];
spin_lock(&hb->lock); spin_lock(&hb->lock);
obj = objfound = tmem_obj_find(hb, oidp); obj = objfound = tmem_obj_find(hb, oidp);
...@@ -521,7 +545,7 @@ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, ...@@ -521,7 +545,7 @@ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
/* if found, is a dup put, flush the old one */ /* if found, is a dup put, flush the old one */
pampd_del = tmem_pampd_delete_from_obj(obj, index); pampd_del = tmem_pampd_delete_from_obj(obj, index);
BUG_ON(pampd_del != pampd); BUG_ON(pampd_del != pampd);
(*tmem_pamops.free)(pampd, pool); (*tmem_pamops.free)(pampd, pool, oidp, index);
if (obj->pampd_count == 0) { if (obj->pampd_count == 0) {
objnew = obj; objnew = obj;
objfound = NULL; objfound = NULL;
...@@ -538,7 +562,8 @@ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, ...@@ -538,7 +562,8 @@ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
} }
BUG_ON(obj == NULL); BUG_ON(obj == NULL);
BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound)); BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound));
pampd = (*tmem_pamops.create)(obj->pool, &obj->oid, index, page); pampd = (*tmem_pamops.create)(data, size, raw, ephemeral,
obj->pool, &obj->oid, index);
if (unlikely(pampd == NULL)) if (unlikely(pampd == NULL))
goto free; goto free;
ret = tmem_pampd_add_to_obj(obj, index, pampd); ret = tmem_pampd_add_to_obj(obj, index, pampd);
...@@ -551,7 +576,7 @@ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, ...@@ -551,7 +576,7 @@ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
(void)tmem_pampd_delete_from_obj(obj, index); (void)tmem_pampd_delete_from_obj(obj, index);
free: free:
if (pampd) if (pampd)
(*tmem_pamops.free)(pampd, pool); (*tmem_pamops.free)(pampd, pool, NULL, 0);
if (objnew) { if (objnew) {
tmem_obj_free(objnew, hb); tmem_obj_free(objnew, hb);
(*tmem_hostops.obj_free)(objnew, pool); (*tmem_hostops.obj_free)(objnew, pool);
...@@ -573,40 +598,51 @@ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, ...@@ -573,40 +598,51 @@ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
* "put" done with the same handle). * "put" done with the same handle).
*/ */
int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
uint32_t index, struct page *page) char *data, size_t *size, bool raw, int get_and_free)
{ {
struct tmem_obj *obj; struct tmem_obj *obj;
void *pampd; void *pampd;
bool ephemeral = is_ephemeral(pool); bool ephemeral = is_ephemeral(pool);
uint32_t ret = -1; uint32_t ret = -1;
struct tmem_hashbucket *hb; struct tmem_hashbucket *hb;
bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);
bool lock_held = false;
hb = &pool->hashbucket[tmem_oid_hash(oidp)]; hb = &pool->hashbucket[tmem_oid_hash(oidp)];
spin_lock(&hb->lock); spin_lock(&hb->lock);
lock_held = true;
obj = tmem_obj_find(hb, oidp); obj = tmem_obj_find(hb, oidp);
if (obj == NULL) if (obj == NULL)
goto out; goto out;
ephemeral = is_ephemeral(pool); if (free)
if (ephemeral)
pampd = tmem_pampd_delete_from_obj(obj, index); pampd = tmem_pampd_delete_from_obj(obj, index);
else else
pampd = tmem_pampd_lookup_in_obj(obj, index); pampd = tmem_pampd_lookup_in_obj(obj, index);
if (pampd == NULL) if (pampd == NULL)
goto out; goto out;
ret = (*tmem_pamops.get_data)(page, pampd, pool); if (free) {
if (ret < 0)
goto out;
if (ephemeral) {
(*tmem_pamops.free)(pampd, pool);
if (obj->pampd_count == 0) { if (obj->pampd_count == 0) {
tmem_obj_free(obj, hb); tmem_obj_free(obj, hb);
(*tmem_hostops.obj_free)(obj, pool); (*tmem_hostops.obj_free)(obj, pool);
obj = NULL; obj = NULL;
} }
} }
if (tmem_pamops.is_remote(pampd)) {
lock_held = false;
spin_unlock(&hb->lock);
}
if (free)
ret = (*tmem_pamops.get_data_and_free)(
data, size, raw, pampd, pool, oidp, index);
else
ret = (*tmem_pamops.get_data)(
data, size, raw, pampd, pool, oidp, index);
if (ret < 0)
goto out;
ret = 0; ret = 0;
out: out:
if (lock_held)
spin_unlock(&hb->lock); spin_unlock(&hb->lock);
return ret; return ret;
} }
...@@ -632,7 +668,7 @@ int tmem_flush_page(struct tmem_pool *pool, ...@@ -632,7 +668,7 @@ int tmem_flush_page(struct tmem_pool *pool,
pampd = tmem_pampd_delete_from_obj(obj, index); pampd = tmem_pampd_delete_from_obj(obj, index);
if (pampd == NULL) if (pampd == NULL)
goto out; goto out;
(*tmem_pamops.free)(pampd, pool); (*tmem_pamops.free)(pampd, pool, oidp, index);
if (obj->pampd_count == 0) { if (obj->pampd_count == 0) {
tmem_obj_free(obj, hb); tmem_obj_free(obj, hb);
(*tmem_hostops.obj_free)(obj, pool); (*tmem_hostops.obj_free)(obj, pool);
...@@ -644,6 +680,30 @@ int tmem_flush_page(struct tmem_pool *pool, ...@@ -644,6 +680,30 @@ int tmem_flush_page(struct tmem_pool *pool,
return ret; return ret;
} }
/*
* If a page in tmem matches the handle, replace the page so that any
* subsequent "get" gets the new page. Returns 0 if
* there was a page to replace, else returns -1.
*/
int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp,
uint32_t index, void *new_pampd)
{
struct tmem_obj *obj;
int ret = -1;
struct tmem_hashbucket *hb;
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
spin_lock(&hb->lock);
obj = tmem_obj_find(hb, oidp);
if (obj == NULL)
goto out;
new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd);
ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj);
out:
spin_unlock(&hb->lock);
return ret;
}
/* /*
* "Flush" all pages in tmem matching this oid. * "Flush" all pages in tmem matching this oid.
*/ */
......
...@@ -147,6 +147,7 @@ struct tmem_obj { ...@@ -147,6 +147,7 @@ struct tmem_obj {
unsigned int objnode_tree_height; unsigned int objnode_tree_height;
unsigned long objnode_count; unsigned long objnode_count;
long pampd_count; long pampd_count;
void *extra; /* for private use by pampd implementation */
DECL_SENTINEL DECL_SENTINEL
}; };
...@@ -166,10 +167,18 @@ struct tmem_objnode { ...@@ -166,10 +167,18 @@ struct tmem_objnode {
/* pampd abstract datatype methods provided by the PAM implementation */ /* pampd abstract datatype methods provided by the PAM implementation */
struct tmem_pamops { struct tmem_pamops {
void *(*create)(struct tmem_pool *, struct tmem_oid *, uint32_t, void *(*create)(char *, size_t, bool, int,
struct page *); struct tmem_pool *, struct tmem_oid *, uint32_t);
int (*get_data)(struct page *, void *, struct tmem_pool *); int (*get_data)(char *, size_t *, bool, void *, struct tmem_pool *,
void (*free)(void *, struct tmem_pool *); struct tmem_oid *, uint32_t);
int (*get_data_and_free)(char *, size_t *, bool, void *,
struct tmem_pool *, struct tmem_oid *,
uint32_t);
void (*free)(void *, struct tmem_pool *, struct tmem_oid *, uint32_t);
void (*free_obj)(struct tmem_pool *, struct tmem_obj *);
bool (*is_remote)(void *);
void (*new_obj)(struct tmem_obj *);
int (*replace_in_obj)(void *, struct tmem_obj *);
}; };
extern void tmem_register_pamops(struct tmem_pamops *m); extern void tmem_register_pamops(struct tmem_pamops *m);
...@@ -184,9 +193,11 @@ extern void tmem_register_hostops(struct tmem_hostops *m); ...@@ -184,9 +193,11 @@ extern void tmem_register_hostops(struct tmem_hostops *m);
/* core tmem accessor functions */ /* core tmem accessor functions */
extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index, extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index,
struct page *page); char *, size_t, bool, bool);
extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index, extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index,
struct page *page); char *, size_t *, bool, int);
extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index,
void *);
extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *, extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *,
uint32_t index); uint32_t index);
extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *); extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment