Commit 62cf51cb authored by Hou Tao's avatar Hou Tao Committed by Alexei Starovoitov

bpf: Enable IRQ after irq_work_raise() completes in unit_free{_rcu}()

Both unit_free() and unit_free_rcu() invoke irq_work_raise() to free
freed objects back to slab and the invocation may also be preempted by
unit_alloc() and unit_alloc() may return NULL unexpectedly as shown in
the following case:

task A         task B

unit_free()
  // high_watermark = 48
  // free_cnt = 49 after free
  irq_work_raise()
    // mark irq work as IRQ_WORK_PENDING
    irq_work_claim()

               // task B preempts task A
               unit_alloc()
                 // free_cnt = 48 after alloc

               // does unit_alloc() 32-times
	       ......
	       // free_cnt = 16

	       unit_alloc()
	         // free_cnt = 15 after alloc
                 // irq work is already PENDING,
                 // so just return
                 irq_work_raise()

	       // does unit_alloc() 15-times
               ......
	       // free_cnt = 0

               unit_alloc()
                 // free_cnt = 0 before alloc
                 return NULL

Fix it by enabling IRQ after irq_work_raise() completes.
Signed-off-by: default avatarHou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20230901111954.1804721-3-houtao@huaweicloud.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 566f6de3
...@@ -778,11 +778,16 @@ static void notrace unit_free(struct bpf_mem_cache *c, void *ptr) ...@@ -778,11 +778,16 @@ static void notrace unit_free(struct bpf_mem_cache *c, void *ptr)
llist_add(llnode, &c->free_llist_extra); llist_add(llnode, &c->free_llist_extra);
} }
local_dec(&c->active); local_dec(&c->active);
local_irq_restore(flags);
if (cnt > c->high_watermark) if (cnt > c->high_watermark)
/* free few objects from current cpu into global kmalloc pool */ /* free few objects from current cpu into global kmalloc pool */
irq_work_raise(c); irq_work_raise(c);
/* Enable IRQ after irq_work_raise() completes, otherwise when current
* task is preempted by task which does unit_alloc(), unit_alloc() may
* return NULL unexpectedly because irq work is already pending but can
* not been triggered and free_llist can not be refilled timely.
*/
local_irq_restore(flags);
} }
static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr) static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr)
...@@ -800,10 +805,10 @@ static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr) ...@@ -800,10 +805,10 @@ static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr)
llist_add(llnode, &c->free_llist_extra_rcu); llist_add(llnode, &c->free_llist_extra_rcu);
} }
local_dec(&c->active); local_dec(&c->active);
local_irq_restore(flags);
if (!atomic_read(&c->call_rcu_in_progress)) if (!atomic_read(&c->call_rcu_in_progress))
irq_work_raise(c); irq_work_raise(c);
local_irq_restore(flags);
} }
/* Called from BPF program or from sys_bpf syscall. /* Called from BPF program or from sys_bpf syscall.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment