Commit dd3db2a3 authored by Jens Axboe's avatar Jens Axboe

io_uring: drop file set ref put/get on switch

Dan reports that he triggered a warning on ring exit doing some testing:

percpu ref (io_file_data_ref_zero) <= 0 (0) after switching to atomic
WARNING: CPU: 3 PID: 0 at lib/percpu-refcount.c:160 percpu_ref_switch_to_atomic_rcu+0xe8/0xf0
Modules linked in:
CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.6.0-rc3+ #5648
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
RIP: 0010:percpu_ref_switch_to_atomic_rcu+0xe8/0xf0
Code: e7 ff 55 e8 eb d2 80 3d bd 02 d2 00 00 75 8b 48 8b 55 d8 48 c7 c7 e8 70 e6 81 c6 05 a9 02 d2 00 01 48 8b 75 e8 e8 3a d0 c5 ff <0f> 0b e9 69 ff ff ff 90 55 48 89 fd 53 48 89 f3 48 83 ec 28 48 83
RSP: 0018:ffffc90000110ef8 EFLAGS: 00010292
RAX: 0000000000000045 RBX: 7fffffffffffffff RCX: 0000000000000000
RDX: 0000000000000045 RSI: ffffffff825be7a5 RDI: ffffffff825bc32c
RBP: ffff8881b75eac38 R08: 000000042364b941 R09: 0000000000000045
R10: ffffffff825beb40 R11: ffffffff825be78a R12: 0000607e46005aa0
R13: ffff888107dcdd00 R14: 0000000000000000 R15: 0000000000000009
FS:  0000000000000000(0000) GS:ffff8881b9d80000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f49e6a5ea20 CR3: 00000001b747c004 CR4: 00000000001606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <IRQ>
 rcu_core+0x1e4/0x4d0
 __do_softirq+0xdb/0x2f1
 irq_exit+0xa0/0xb0
 smp_apic_timer_interrupt+0x60/0x140
 apic_timer_interrupt+0xf/0x20
 </IRQ>
RIP: 0010:default_idle+0x23/0x170
Code: ff eb ab cc cc cc cc 0f 1f 44 00 00 41 54 55 53 65 8b 2d 10 96 92 7e 0f 1f 44 00 00 e9 07 00 00 00 0f 00 2d 21 d0 51 00 fb f4 <65> 8b 2d f6 95 92 7e 0f 1f 44 00 00 5b 5d 41 5c c3 65 8b 05 e5 95

Turns out that this is due to percpu_ref_switch_to_atomic() only
grabbing a reference to the percpu refcount if it's not already in
atomic mode. io_uring drops a ref and re-gets it when switching back to
percpu mode. We attempt to protect against this with the FFD_F_ATOMIC
bit, but that isn't reliable.

We don't actually need to juggle these refcounts between atomic and
percpu switch, we can just do them when we've switched to atomic mode.
This removes the need for FFD_F_ATOMIC, which wasn't reliable.

Fixes: 05f3fb3c ("io_uring: avoid ring quiesce for fixed file set unregister and update")
Reported-by: default avatarDan Melnic <dmm@fb.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 3a901598
......@@ -183,17 +183,12 @@ struct fixed_file_table {
struct file **files;
};
enum {
FFD_F_ATOMIC,
};
struct fixed_file_data {
struct fixed_file_table *table;
struct io_ring_ctx *ctx;
struct percpu_ref refs;
struct llist_head put_llist;
unsigned long state;
struct work_struct ref_work;
struct completion done;
};
......@@ -5595,7 +5590,6 @@ static void io_ring_file_ref_switch(struct work_struct *work)
data = container_of(work, struct fixed_file_data, ref_work);
io_ring_file_ref_flush(data);
percpu_ref_get(&data->refs);
percpu_ref_switch_to_percpu(&data->refs);
}
......@@ -5771,8 +5765,13 @@ static void io_atomic_switch(struct percpu_ref *ref)
{
struct fixed_file_data *data;
/*
* Juggle reference to ensure we hit zero, if needed, so we can
* switch back to percpu mode
*/
data = container_of(ref, struct fixed_file_data, refs);
clear_bit(FFD_F_ATOMIC, &data->state);
percpu_ref_put(&data->refs);
percpu_ref_get(&data->refs);
}
static bool io_queue_file_removal(struct fixed_file_data *data,
......@@ -5795,11 +5794,7 @@ static bool io_queue_file_removal(struct fixed_file_data *data,
llist_add(&pfile->llist, &data->put_llist);
if (pfile == &pfile_stack) {
if (!test_and_set_bit(FFD_F_ATOMIC, &data->state)) {
percpu_ref_put(&data->refs);
percpu_ref_switch_to_atomic(&data->refs,
io_atomic_switch);
}
percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch);
wait_for_completion(&done);
flush_work(&data->ref_work);
return false;
......@@ -5873,10 +5868,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
up->offset++;
}
if (ref_switch && !test_and_set_bit(FFD_F_ATOMIC, &data->state)) {
percpu_ref_put(&data->refs);
if (ref_switch)
percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch);
}
return done ? done : err;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment