Commit dcf5aedb authored by Vitaly Wool's avatar Vitaly Wool Committed by Linus Torvalds

z3fold: stricter locking and more careful reclaim

Use temporary slots in reclaim function to avoid possible race when
freeing those.

While at it, make sure we check CLAIMED flag under page lock in the
reclaim function to make sure we are not racing with z3fold_alloc().

Link: https://lkml.kernel.org/r/20201209145151.18994-4-vitaly.wool@konsulko.comSigned-off-by: default avatarVitaly Wool <vitaly.wool@konsulko.com>
Cc: <stable@vger.kernel.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent fc548865
...@@ -181,6 +181,13 @@ enum z3fold_page_flags { ...@@ -181,6 +181,13 @@ enum z3fold_page_flags {
PAGE_CLAIMED, /* by either reclaim or free */ PAGE_CLAIMED, /* by either reclaim or free */
}; };
/*
* handle flags, go under HANDLE_FLAG_MASK
*/
enum z3fold_handle_flags {
HANDLES_NOFREE = 0,
};
/* /*
* Forward declarations * Forward declarations
*/ */
...@@ -311,6 +318,12 @@ static inline void free_handle(unsigned long handle, struct z3fold_header *zhdr) ...@@ -311,6 +318,12 @@ static inline void free_handle(unsigned long handle, struct z3fold_header *zhdr)
slots = handle_to_slots(handle); slots = handle_to_slots(handle);
write_lock(&slots->lock); write_lock(&slots->lock);
*(unsigned long *)handle = 0; *(unsigned long *)handle = 0;
if (test_bit(HANDLES_NOFREE, &slots->pool)) {
write_unlock(&slots->lock);
return; /* simple case, nothing else to do */
}
if (zhdr->slots != slots) if (zhdr->slots != slots)
zhdr->foreign_handles--; zhdr->foreign_handles--;
...@@ -621,6 +634,28 @@ static inline void add_to_unbuddied(struct z3fold_pool *pool, ...@@ -621,6 +634,28 @@ static inline void add_to_unbuddied(struct z3fold_pool *pool,
} }
} }
static inline enum buddy get_free_buddy(struct z3fold_header *zhdr, int chunks)
{
enum buddy bud = HEADLESS;
if (zhdr->middle_chunks) {
if (!zhdr->first_chunks &&
chunks <= zhdr->start_middle - ZHDR_CHUNKS)
bud = FIRST;
else if (!zhdr->last_chunks)
bud = LAST;
} else {
if (!zhdr->first_chunks)
bud = FIRST;
else if (!zhdr->last_chunks)
bud = LAST;
else
bud = MIDDLE;
}
return bud;
}
static inline void *mchunk_memmove(struct z3fold_header *zhdr, static inline void *mchunk_memmove(struct z3fold_header *zhdr,
unsigned short dst_chunk) unsigned short dst_chunk)
{ {
...@@ -682,18 +717,7 @@ static struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr) ...@@ -682,18 +717,7 @@ static struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr)
if (WARN_ON(new_zhdr == zhdr)) if (WARN_ON(new_zhdr == zhdr))
goto out_fail; goto out_fail;
if (new_zhdr->first_chunks == 0) { new_bud = get_free_buddy(new_zhdr, chunks);
if (new_zhdr->middle_chunks != 0 &&
chunks >= new_zhdr->start_middle) {
new_bud = LAST;
} else {
new_bud = FIRST;
}
} else if (new_zhdr->last_chunks == 0) {
new_bud = LAST;
} else if (new_zhdr->middle_chunks == 0) {
new_bud = MIDDLE;
}
q = new_zhdr; q = new_zhdr;
switch (new_bud) { switch (new_bud) {
case FIRST: case FIRST:
...@@ -815,9 +839,8 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked) ...@@ -815,9 +839,8 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
return; return;
} }
if (unlikely(PageIsolated(page) || if (test_bit(PAGE_STALE, &page->private) ||
test_bit(PAGE_CLAIMED, &page->private) || test_and_set_bit(PAGE_CLAIMED, &page->private)) {
test_bit(PAGE_STALE, &page->private))) {
z3fold_page_unlock(zhdr); z3fold_page_unlock(zhdr);
return; return;
} }
...@@ -826,13 +849,16 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked) ...@@ -826,13 +849,16 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) { zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) {
if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) if (kref_put(&zhdr->refcount, release_z3fold_page_locked))
atomic64_dec(&pool->pages_nr); atomic64_dec(&pool->pages_nr);
else else {
clear_bit(PAGE_CLAIMED, &page->private);
z3fold_page_unlock(zhdr); z3fold_page_unlock(zhdr);
}
return; return;
} }
z3fold_compact_page(zhdr); z3fold_compact_page(zhdr);
add_to_unbuddied(pool, zhdr); add_to_unbuddied(pool, zhdr);
clear_bit(PAGE_CLAIMED, &page->private);
z3fold_page_unlock(zhdr); z3fold_page_unlock(zhdr);
} }
...@@ -1080,17 +1106,8 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, ...@@ -1080,17 +1106,8 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
retry: retry:
zhdr = __z3fold_alloc(pool, size, can_sleep); zhdr = __z3fold_alloc(pool, size, can_sleep);
if (zhdr) { if (zhdr) {
if (zhdr->first_chunks == 0) { bud = get_free_buddy(zhdr, chunks);
if (zhdr->middle_chunks != 0 && if (bud == HEADLESS) {
chunks >= zhdr->start_middle)
bud = LAST;
else
bud = FIRST;
} else if (zhdr->last_chunks == 0)
bud = LAST;
else if (zhdr->middle_chunks == 0)
bud = MIDDLE;
else {
if (kref_put(&zhdr->refcount, if (kref_put(&zhdr->refcount,
release_z3fold_page_locked)) release_z3fold_page_locked))
atomic64_dec(&pool->pages_nr); atomic64_dec(&pool->pages_nr);
...@@ -1236,7 +1253,6 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) ...@@ -1236,7 +1253,6 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
pr_err("%s: unknown bud %d\n", __func__, bud); pr_err("%s: unknown bud %d\n", __func__, bud);
WARN_ON(1); WARN_ON(1);
put_z3fold_header(zhdr); put_z3fold_header(zhdr);
clear_bit(PAGE_CLAIMED, &page->private);
return; return;
} }
...@@ -1251,8 +1267,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) ...@@ -1251,8 +1267,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
z3fold_page_unlock(zhdr); z3fold_page_unlock(zhdr);
return; return;
} }
if (unlikely(PageIsolated(page)) || if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
put_z3fold_header(zhdr); put_z3fold_header(zhdr);
clear_bit(PAGE_CLAIMED, &page->private); clear_bit(PAGE_CLAIMED, &page->private);
return; return;
...@@ -1316,6 +1331,10 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) ...@@ -1316,6 +1331,10 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
struct page *page = NULL; struct page *page = NULL;
struct list_head *pos; struct list_head *pos;
unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
struct z3fold_buddy_slots slots __attribute__((aligned(SLOTS_ALIGN)));
rwlock_init(&slots.lock);
slots.pool = (unsigned long)pool | (1 << HANDLES_NOFREE);
spin_lock(&pool->lock); spin_lock(&pool->lock);
if (!pool->ops || !pool->ops->evict || retries == 0) { if (!pool->ops || !pool->ops->evict || retries == 0) {
...@@ -1330,35 +1349,36 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) ...@@ -1330,35 +1349,36 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
list_for_each_prev(pos, &pool->lru) { list_for_each_prev(pos, &pool->lru) {
page = list_entry(pos, struct page, lru); page = list_entry(pos, struct page, lru);
/* this bit could have been set by free, in which case
* we pass over to the next page in the pool.
*/
if (test_and_set_bit(PAGE_CLAIMED, &page->private)) {
page = NULL;
continue;
}
if (unlikely(PageIsolated(page))) {
clear_bit(PAGE_CLAIMED, &page->private);
page = NULL;
continue;
}
zhdr = page_address(page); zhdr = page_address(page);
if (test_bit(PAGE_HEADLESS, &page->private)) if (test_bit(PAGE_HEADLESS, &page->private))
break; break;
if (kref_get_unless_zero(&zhdr->refcount) == 0) {
zhdr = NULL;
break;
}
if (!z3fold_page_trylock(zhdr)) { if (!z3fold_page_trylock(zhdr)) {
clear_bit(PAGE_CLAIMED, &page->private); if (kref_put(&zhdr->refcount,
release_z3fold_page))
atomic64_dec(&pool->pages_nr);
zhdr = NULL; zhdr = NULL;
continue; /* can't evict at this point */ continue; /* can't evict at this point */
} }
if (zhdr->foreign_handles) {
clear_bit(PAGE_CLAIMED, &page->private); /* test_and_set_bit is of course atomic, but we still
* need to do it under page lock, otherwise checking
* that bit in __z3fold_alloc wouldn't make sense
*/
if (zhdr->foreign_handles ||
test_and_set_bit(PAGE_CLAIMED, &page->private)) {
if (kref_put(&zhdr->refcount,
release_z3fold_page))
atomic64_dec(&pool->pages_nr);
else
z3fold_page_unlock(zhdr); z3fold_page_unlock(zhdr);
zhdr = NULL; zhdr = NULL;
continue; /* can't evict such page */ continue; /* can't evict such page */
} }
kref_get(&zhdr->refcount);
list_del_init(&zhdr->buddy); list_del_init(&zhdr->buddy);
zhdr->cpu = -1; zhdr->cpu = -1;
break; break;
...@@ -1380,12 +1400,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) ...@@ -1380,12 +1400,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
first_handle = 0; first_handle = 0;
last_handle = 0; last_handle = 0;
middle_handle = 0; middle_handle = 0;
memset(slots.slot, 0, sizeof(slots.slot));
if (zhdr->first_chunks) if (zhdr->first_chunks)
first_handle = encode_handle(zhdr, FIRST); first_handle = __encode_handle(zhdr, &slots,
FIRST);
if (zhdr->middle_chunks) if (zhdr->middle_chunks)
middle_handle = encode_handle(zhdr, MIDDLE); middle_handle = __encode_handle(zhdr, &slots,
MIDDLE);
if (zhdr->last_chunks) if (zhdr->last_chunks)
last_handle = encode_handle(zhdr, LAST); last_handle = __encode_handle(zhdr, &slots,
LAST);
/* /*
* it's safe to unlock here because we hold a * it's safe to unlock here because we hold a
* reference to this page * reference to this page
...@@ -1400,19 +1424,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) ...@@ -1400,19 +1424,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
ret = pool->ops->evict(pool, middle_handle); ret = pool->ops->evict(pool, middle_handle);
if (ret) if (ret)
goto next; goto next;
free_handle(middle_handle, zhdr);
} }
if (first_handle) { if (first_handle) {
ret = pool->ops->evict(pool, first_handle); ret = pool->ops->evict(pool, first_handle);
if (ret) if (ret)
goto next; goto next;
free_handle(first_handle, zhdr);
} }
if (last_handle) { if (last_handle) {
ret = pool->ops->evict(pool, last_handle); ret = pool->ops->evict(pool, last_handle);
if (ret) if (ret)
goto next; goto next;
free_handle(last_handle, zhdr);
} }
next: next:
if (test_bit(PAGE_HEADLESS, &page->private)) { if (test_bit(PAGE_HEADLESS, &page->private)) {
...@@ -1426,9 +1447,11 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) ...@@ -1426,9 +1447,11 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
spin_unlock(&pool->lock); spin_unlock(&pool->lock);
clear_bit(PAGE_CLAIMED, &page->private); clear_bit(PAGE_CLAIMED, &page->private);
} else { } else {
struct z3fold_buddy_slots *slots = zhdr->slots;
z3fold_page_lock(zhdr); z3fold_page_lock(zhdr);
if (kref_put(&zhdr->refcount, if (kref_put(&zhdr->refcount,
release_z3fold_page_locked)) { release_z3fold_page_locked)) {
kmem_cache_free(pool->c_handle, slots);
atomic64_dec(&pool->pages_nr); atomic64_dec(&pool->pages_nr);
return 0; return 0;
} }
...@@ -1544,8 +1567,7 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) ...@@ -1544,8 +1567,7 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageMovable(page), page);
VM_BUG_ON_PAGE(PageIsolated(page), page); VM_BUG_ON_PAGE(PageIsolated(page), page);
if (test_bit(PAGE_HEADLESS, &page->private) || if (test_bit(PAGE_HEADLESS, &page->private))
test_bit(PAGE_CLAIMED, &page->private))
return false; return false;
zhdr = page_address(page); zhdr = page_address(page);
...@@ -1557,6 +1579,8 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) ...@@ -1557,6 +1579,8 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0)
goto out; goto out;
if (test_and_set_bit(PAGE_CLAIMED, &page->private))
goto out;
pool = zhdr_to_pool(zhdr); pool = zhdr_to_pool(zhdr);
spin_lock(&pool->lock); spin_lock(&pool->lock);
if (!list_empty(&zhdr->buddy)) if (!list_empty(&zhdr->buddy))
...@@ -1583,16 +1607,17 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa ...@@ -1583,16 +1607,17 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageMovable(page), page);
VM_BUG_ON_PAGE(!PageIsolated(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page);
VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED, &page->private), page);
VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
zhdr = page_address(page); zhdr = page_address(page);
pool = zhdr_to_pool(zhdr); pool = zhdr_to_pool(zhdr);
if (!z3fold_page_trylock(zhdr)) { if (!z3fold_page_trylock(zhdr))
return -EAGAIN; return -EAGAIN;
}
if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) { if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) {
z3fold_page_unlock(zhdr); z3fold_page_unlock(zhdr);
clear_bit(PAGE_CLAIMED, &page->private);
return -EBUSY; return -EBUSY;
} }
if (work_pending(&zhdr->work)) { if (work_pending(&zhdr->work)) {
...@@ -1634,6 +1659,7 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa ...@@ -1634,6 +1659,7 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
page_mapcount_reset(page); page_mapcount_reset(page);
clear_bit(PAGE_CLAIMED, &page->private);
put_page(page); put_page(page);
return 0; return 0;
} }
...@@ -1657,6 +1683,7 @@ static void z3fold_page_putback(struct page *page) ...@@ -1657,6 +1683,7 @@ static void z3fold_page_putback(struct page *page)
spin_lock(&pool->lock); spin_lock(&pool->lock);
list_add(&page->lru, &pool->lru); list_add(&page->lru, &pool->lru);
spin_unlock(&pool->lock); spin_unlock(&pool->lock);
clear_bit(PAGE_CLAIMED, &page->private);
z3fold_page_unlock(zhdr); z3fold_page_unlock(zhdr);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment