Commit a0c516cb authored by Minchan Kim's avatar Minchan Kim Committed by Greg Kroah-Hartman

zram: don't grab mutex in zram_slot_free_noity

[1] introduced down_write in zram_slot_free_notify to prevent race
between zram_slot_free_notify and zram_bvec_[read|write]. The race
could happen if somebody who has right permission to open swap device
is reading swap device while it is used by swap in parallel.

However, zram_slot_free_notify is called with holding spin_lock of
swap layer so we shouldn't avoid holing mutex. Otherwise, lockdep
warns it.

This patch adds new list to handle free slot and workqueue
so zram_slot_free_notify just registers slot index to be freed and
registers the request to workqueue. If workqueue is expired,
it holds mutex_lock so there is no problem any more.

If any I/O is issued, zram handles pending slot-free request
caused by zram_slot_free_notify right before handling issued
request because workqueue wouldn't be expired yet so zram I/O
request handling function can miss it.

Lastly, when zram is reset, flush_work could handle all of pending
free request so we shouldn't have memory leak.

NOTE: If zram_slot_free_notify's kmalloc with GFP_ATOMIC would be
failed, the slot will be freed when next write I/O write the slot.

[1] [57ab0485, zram: use zram->lock to protect zram_free_page()
    in swap free notify path]

* from v2
  * refactoring

* from v1
  * totally redesign

Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: stable@vger.kernel.org
Signed-off-by: default avatarMinchan Kim <minchan@kernel.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 2b86ab9c
......@@ -440,6 +440,14 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
goto out;
}
/*
* zram_slot_free_notify could miss free so that let's
* double check.
*/
if (unlikely(meta->table[index].handle ||
zram_test_flag(meta, index, ZRAM_ZERO)))
zram_free_page(zram, index);
ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
meta->compress_workmem);
......@@ -505,6 +513,20 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
return ret;
}
static void handle_pending_slot_free(struct zram *zram)
{
struct zram_slot_free *free_rq;
spin_lock(&zram->slot_free_lock);
while (zram->slot_free_rq) {
free_rq = zram->slot_free_rq;
zram->slot_free_rq = free_rq->next;
zram_free_page(zram, free_rq->index);
kfree(free_rq);
}
spin_unlock(&zram->slot_free_lock);
}
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
int offset, struct bio *bio, int rw)
{
......@@ -512,10 +534,12 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
if (rw == READ) {
down_read(&zram->lock);
handle_pending_slot_free(zram);
ret = zram_bvec_read(zram, bvec, index, offset, bio);
up_read(&zram->lock);
} else {
down_write(&zram->lock);
handle_pending_slot_free(zram);
ret = zram_bvec_write(zram, bvec, index, offset);
up_write(&zram->lock);
}
......@@ -528,6 +552,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)
size_t index;
struct zram_meta *meta;
flush_work(&zram->free_work);
down_write(&zram->init_lock);
if (!zram->init_done) {
up_write(&zram->init_lock);
......@@ -722,16 +748,40 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
bio_io_error(bio);
}
static void zram_slot_free(struct work_struct *work)
{
struct zram *zram;
zram = container_of(work, struct zram, free_work);
down_write(&zram->lock);
handle_pending_slot_free(zram);
up_write(&zram->lock);
}
static void add_slot_free(struct zram *zram, struct zram_slot_free *free_rq)
{
spin_lock(&zram->slot_free_lock);
free_rq->next = zram->slot_free_rq;
zram->slot_free_rq = free_rq;
spin_unlock(&zram->slot_free_lock);
}
static void zram_slot_free_notify(struct block_device *bdev,
unsigned long index)
{
struct zram *zram;
struct zram_slot_free *free_rq;
zram = bdev->bd_disk->private_data;
down_write(&zram->lock);
zram_free_page(zram, index);
up_write(&zram->lock);
atomic64_inc(&zram->stats.notify_free);
free_rq = kmalloc(sizeof(struct zram_slot_free), GFP_ATOMIC);
if (!free_rq)
return;
free_rq->index = index;
add_slot_free(zram, free_rq);
schedule_work(&zram->free_work);
}
static const struct block_device_operations zram_devops = {
......@@ -778,6 +828,10 @@ static int create_device(struct zram *zram, int device_id)
init_rwsem(&zram->lock);
init_rwsem(&zram->init_lock);
INIT_WORK(&zram->free_work, zram_slot_free);
spin_lock_init(&zram->slot_free_lock);
zram->slot_free_rq = NULL;
zram->queue = blk_alloc_queue(GFP_KERNEL);
if (!zram->queue) {
pr_err("Error allocating disk queue for device %d\n",
......
......@@ -94,11 +94,20 @@ struct zram_meta {
struct zs_pool *mem_pool;
};
struct zram_slot_free {
unsigned long index;
struct zram_slot_free *next;
};
struct zram {
struct zram_meta *meta;
struct rw_semaphore lock; /* protect compression buffers, table,
* 32bit stat counters against concurrent
* notifications, reads and writes */
struct work_struct free_work; /* handle pending free request */
struct zram_slot_free *slot_free_rq; /* list head of free request */
struct request_queue *queue;
struct gendisk *disk;
int init_done;
......@@ -109,6 +118,7 @@ struct zram {
* we can store in a disk.
*/
u64 disksize; /* bytes */
spinlock_t slot_free_lock;
struct zram_stats stats;
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment