Commit 9f6dc633 authored by Mike Snitzer's avatar Mike Snitzer

dm: interlock pending dm_io and dm_wait_for_bios_completion

Commit d208b894 ("dm: fix mempool NULL pointer race when
completing IO") didn't go far enough.

When bio_end_io_acct ends the count of in-flight I/Os may reach zero
and the DM device may be suspended. There is a possibility that the
suspend races with dm_stats_account_io.

Fix this by adding percpu "pending_io" counters to track outstanding
dm_io. Move kicking of suspend queue to dm_io_dec_pending(). Also,
rename md_in_flight_bios() to dm_in_flight_bios() and update it to
iterate all pending_io counters.

Fixes: d208b894 ("dm: fix mempool NULL pointer race when completing IO")
Cc: stable@vger.kernel.org
Co-developed-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent bcd2be76
...@@ -64,6 +64,8 @@ struct mapped_device { ...@@ -64,6 +64,8 @@ struct mapped_device {
struct gendisk *disk; struct gendisk *disk;
struct dax_device *dax_dev; struct dax_device *dax_dev;
unsigned long __percpu *pending_io;
/* /*
* A list of ios that arrived while we were suspended. * A list of ios that arrived while we were suspended.
*/ */
......
...@@ -508,10 +508,6 @@ static void end_io_acct(struct mapped_device *md, struct bio *bio, ...@@ -508,10 +508,6 @@ static void end_io_acct(struct mapped_device *md, struct bio *bio,
dm_stats_account_io(&md->stats, bio_data_dir(bio), dm_stats_account_io(&md->stats, bio_data_dir(bio),
bio->bi_iter.bi_sector, bio_sectors(bio), bio->bi_iter.bi_sector, bio_sectors(bio),
true, duration, stats_aux); true, duration, stats_aux);
/* nudge anyone waiting on suspend queue */
if (unlikely(wq_has_sleeper(&md->wait)))
wake_up(&md->wait);
} }
static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
...@@ -530,6 +526,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) ...@@ -530,6 +526,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
io->magic = DM_IO_MAGIC; io->magic = DM_IO_MAGIC;
io->status = 0; io->status = 0;
atomic_set(&io->io_count, 1); atomic_set(&io->io_count, 1);
this_cpu_inc(*md->pending_io);
io->orig_bio = bio; io->orig_bio = bio;
io->md = md; io->md = md;
spin_lock_init(&io->endio_lock); spin_lock_init(&io->endio_lock);
...@@ -827,6 +824,12 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error) ...@@ -827,6 +824,12 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error)
stats_aux = io->stats_aux; stats_aux = io->stats_aux;
free_io(md, io); free_io(md, io);
end_io_acct(md, bio, start_time, &stats_aux); end_io_acct(md, bio, start_time, &stats_aux);
smp_wmb();
this_cpu_dec(*md->pending_io);
/* nudge anyone waiting on suspend queue */
if (unlikely(wq_has_sleeper(&md->wait)))
wake_up(&md->wait);
if (io_error == BLK_STS_DM_REQUEUE) if (io_error == BLK_STS_DM_REQUEUE)
return; return;
...@@ -1569,6 +1572,11 @@ static void cleanup_mapped_device(struct mapped_device *md) ...@@ -1569,6 +1572,11 @@ static void cleanup_mapped_device(struct mapped_device *md)
blk_cleanup_disk(md->disk); blk_cleanup_disk(md->disk);
} }
if (md->pending_io) {
free_percpu(md->pending_io);
md->pending_io = NULL;
}
cleanup_srcu_struct(&md->io_barrier); cleanup_srcu_struct(&md->io_barrier);
mutex_destroy(&md->suspend_lock); mutex_destroy(&md->suspend_lock);
...@@ -1671,6 +1679,10 @@ static struct mapped_device *alloc_dev(int minor) ...@@ -1671,6 +1679,10 @@ static struct mapped_device *alloc_dev(int minor)
if (!md->wq) if (!md->wq)
goto bad; goto bad;
md->pending_io = alloc_percpu(unsigned long);
if (!md->pending_io)
goto bad;
dm_stats_init(&md->stats); dm_stats_init(&md->stats);
/* Populate the mapping, nobody knows we exist yet */ /* Populate the mapping, nobody knows we exist yet */
...@@ -2078,16 +2090,13 @@ void dm_put(struct mapped_device *md) ...@@ -2078,16 +2090,13 @@ void dm_put(struct mapped_device *md)
} }
EXPORT_SYMBOL_GPL(dm_put); EXPORT_SYMBOL_GPL(dm_put);
static bool md_in_flight_bios(struct mapped_device *md) static bool dm_in_flight_bios(struct mapped_device *md)
{ {
int cpu; int cpu;
struct block_device *part = dm_disk(md)->part0; unsigned long sum = 0;
long sum = 0;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu)
sum += part_stat_local_read_cpu(part, in_flight[0], cpu); sum += *per_cpu_ptr(md->pending_io, cpu);
sum += part_stat_local_read_cpu(part, in_flight[1], cpu);
}
return sum != 0; return sum != 0;
} }
...@@ -2100,7 +2109,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta ...@@ -2100,7 +2109,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta
while (true) { while (true) {
prepare_to_wait(&md->wait, &wait, task_state); prepare_to_wait(&md->wait, &wait, task_state);
if (!md_in_flight_bios(md)) if (!dm_in_flight_bios(md))
break; break;
if (signal_pending_state(task_state, current)) { if (signal_pending_state(task_state, current)) {
...@@ -2112,6 +2121,8 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta ...@@ -2112,6 +2121,8 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta
} }
finish_wait(&md->wait, &wait); finish_wait(&md->wait, &wait);
smp_rmb();
return r; return r;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment