Commit 79d878f7 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'Remove unnecessary RCU grace period chaining'

Hou Tao says:

====================
Now bpf uses RCU grace period chaining to wait for the completion of
access from both sleepable and non-sleepable bpf program: calling
call_rcu_tasks_trace() firstly to wait for a RCU-tasks-trace grace
period, then in its callback calls call_rcu() or kfree_rcu() to wait for
a normal RCU grace period.

According to the implementation of RCU Tasks Trace, it inovkes
->postscan_func() to wait for one RCU-tasks-trace grace period and
rcu_tasks_trace_postscan() inovkes synchronize_rcu() to wait for one
normal RCU grace period in turn, so one RCU-tasks-trace grace period
will imply one normal RCU grace period. To codify the implication,
introduces rcu_trace_implies_rcu_gp() in patch #1. And using it in patch
Other two uses of call_rcu_tasks_trace() are unchanged: for
__bpf_prog_put_rcu() there is no gp chain and for
__bpf_tramp_image_put_rcu_tasks() it chains RCU tasks trace GP and RCU
tasks GP.

An alternative way to remove these unnecessary RCU grace period
chainings is using the RCU polling API to check whether or not a normal
RCU grace period has passed (e.g. get_state_synchronize_rcu()). But it
needs an unsigned long space for each free element or each call, and
it is not affordable for local storage element, so as for now always
rcu_trace_implies_rcu_gp().

Comments are always welcome.

Change Log:

v2:
 * codify the implication of RCU Tasks Trace grace period instead of
   assuming for it

v1: https://lore.kernel.org/bpf/20221011071128.3470622-1-houtao@huaweicloud.com

Hou Tao (3):
  bpf: Use rcu_trace_implies_rcu_gp() in bpf memory allocator
  bpf: Use rcu_trace_implies_rcu_gp() in local storage map
  bpf: Use rcu_trace_implies_rcu_gp() for program array freeing
====================
Reviewed-by: default avatarPaul E. McKenney <paulmck@kernel.org>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 62c69e89 4835f9ee
...@@ -240,6 +240,18 @@ static inline void exit_tasks_rcu_start(void) { } ...@@ -240,6 +240,18 @@ static inline void exit_tasks_rcu_start(void) { }
static inline void exit_tasks_rcu_finish(void) { } static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
/**
* rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period?
*
* As an accident of implementation, an RCU Tasks Trace grace period also
* acts as an RCU grace period. However, this could change at any time.
* Code relying on this accident must call this function to verify that
* this accident is still happening.
*
* You have been warned!
*/
static inline bool rcu_trace_implies_rcu_gp(void) { return true; }
/** /**
* cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
* *
......
...@@ -88,8 +88,14 @@ void bpf_local_storage_free_rcu(struct rcu_head *rcu) ...@@ -88,8 +88,14 @@ void bpf_local_storage_free_rcu(struct rcu_head *rcu)
{ {
struct bpf_local_storage *local_storage; struct bpf_local_storage *local_storage;
/* If RCU Tasks Trace grace period implies RCU grace period, do
* kfree(), else do kfree_rcu().
*/
local_storage = container_of(rcu, struct bpf_local_storage, rcu); local_storage = container_of(rcu, struct bpf_local_storage, rcu);
kfree_rcu(local_storage, rcu); if (rcu_trace_implies_rcu_gp())
kfree(local_storage);
else
kfree_rcu(local_storage, rcu);
} }
static void bpf_selem_free_rcu(struct rcu_head *rcu) static void bpf_selem_free_rcu(struct rcu_head *rcu)
...@@ -97,7 +103,10 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu) ...@@ -97,7 +103,10 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
struct bpf_local_storage_elem *selem; struct bpf_local_storage_elem *selem;
selem = container_of(rcu, struct bpf_local_storage_elem, rcu); selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
kfree_rcu(selem, rcu); if (rcu_trace_implies_rcu_gp())
kfree(selem);
else
kfree_rcu(selem, rcu);
} }
/* local_storage->lock must be held and selem->local_storage == local_storage. /* local_storage->lock must be held and selem->local_storage == local_storage.
......
...@@ -2251,8 +2251,14 @@ static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu) ...@@ -2251,8 +2251,14 @@ static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu)
{ {
struct bpf_prog_array *progs; struct bpf_prog_array *progs;
/* If RCU Tasks Trace grace period implies RCU grace period, there is
* no need to call kfree_rcu(), just call kfree() directly.
*/
progs = container_of(rcu, struct bpf_prog_array, rcu); progs = container_of(rcu, struct bpf_prog_array, rcu);
kfree_rcu(progs, rcu); if (rcu_trace_implies_rcu_gp())
kfree(progs);
else
kfree_rcu(progs, rcu);
} }
void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs) void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs)
......
...@@ -222,9 +222,13 @@ static void __free_rcu(struct rcu_head *head) ...@@ -222,9 +222,13 @@ static void __free_rcu(struct rcu_head *head)
static void __free_rcu_tasks_trace(struct rcu_head *head) static void __free_rcu_tasks_trace(struct rcu_head *head)
{ {
struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu); /* If RCU Tasks Trace grace period implies RCU grace period,
* there is no need to invoke call_rcu().
call_rcu(&c->rcu, __free_rcu); */
if (rcu_trace_implies_rcu_gp())
__free_rcu(head);
else
call_rcu(head, __free_rcu);
} }
static void enque_to_free(struct bpf_mem_cache *c, void *obj) static void enque_to_free(struct bpf_mem_cache *c, void *obj)
...@@ -253,8 +257,9 @@ static void do_call_rcu(struct bpf_mem_cache *c) ...@@ -253,8 +257,9 @@ static void do_call_rcu(struct bpf_mem_cache *c)
*/ */
__llist_add(llnode, &c->waiting_for_gp); __llist_add(llnode, &c->waiting_for_gp);
/* Use call_rcu_tasks_trace() to wait for sleepable progs to finish. /* Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
* Then use call_rcu() to wait for normal progs to finish * If RCU Tasks Trace grace period implies RCU grace period, free
* and finally do free_one() on each element. * these elements directly, else use call_rcu() to wait for normal
* progs to finish and finally do free_one() on each element.
*/ */
call_rcu_tasks_trace(&c->rcu, __free_rcu_tasks_trace); call_rcu_tasks_trace(&c->rcu, __free_rcu_tasks_trace);
} }
......
...@@ -1535,6 +1535,8 @@ static void rcu_tasks_trace_postscan(struct list_head *hop) ...@@ -1535,6 +1535,8 @@ static void rcu_tasks_trace_postscan(struct list_head *hop)
{ {
// Wait for late-stage exiting tasks to finish exiting. // Wait for late-stage exiting tasks to finish exiting.
// These might have passed the call to exit_tasks_rcu_finish(). // These might have passed the call to exit_tasks_rcu_finish().
// If you remove the following line, update rcu_trace_implies_rcu_gp()!!!
synchronize_rcu(); synchronize_rcu();
// Any tasks that exit after this point will set // Any tasks that exit after this point will set
// TRC_NEED_QS_CHECKED in ->trc_reader_special.b.need_qs. // TRC_NEED_QS_CHECKED in ->trc_reader_special.b.need_qs.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment