Commit 75b607fa authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'sched_ext-for-6.12-rc2-fixes' of...

Merge tag 'sched_ext-for-6.12-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext

Pull sched_ext fixes from Tejun Heo:

 - ops.enqueue() didn't have a way to tell whether select_task_rq_scx()
   and thus ops.select() were skipped. Some schedulers were incorrectly
   using SCX_ENQ_WAKEUP. Add SCX_ENQ_CPU_SELECTED and fix scx_qmap using
   it.

 - Remove a spurious WARN_ON_ONCE() in scx_cgroup_exit()

 - Fix error information clobbering during load

 - Add missing __weak markers to BPF helper declarations

 - Doc update

* tag 'sched_ext-for-6.12-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
  sched_ext: Documentation: Update instructions for running example schedulers
  sched_ext, scx_qmap: Add and use SCX_ENQ_CPU_SELECTED
  sched/core: Add ENQUEUE_RQ_SELECTED to indicate whether ->select_task_rq() was called
  sched/core: Make select_task_rq() take the pointer to wake_flags instead of value
  sched_ext: scx_cgroup_exit() may be called without successful scx_cgroup_init()
  sched_ext: Improve error reporting during loading
  sched_ext: Add __weak markers to BPF helper function decalarations
parents 5b7c893e e0ed5215
...@@ -66,7 +66,7 @@ BPF scheduler and reverts all tasks back to CFS. ...@@ -66,7 +66,7 @@ BPF scheduler and reverts all tasks back to CFS.
.. code-block:: none .. code-block:: none
# make -j16 -C tools/sched_ext # make -j16 -C tools/sched_ext
# tools/sched_ext/scx_simple # tools/sched_ext/build/bin/scx_simple
local=0 global=3 local=0 global=3
local=5 global=24 local=5 global=24
local=9 global=44 local=9 global=44
......
...@@ -3518,14 +3518,16 @@ static int select_fallback_rq(int cpu, struct task_struct *p) ...@@ -3518,14 +3518,16 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
* The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable. * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
*/ */
static inline static inline
int select_task_rq(struct task_struct *p, int cpu, int wake_flags) int select_task_rq(struct task_struct *p, int cpu, int *wake_flags)
{ {
lockdep_assert_held(&p->pi_lock); lockdep_assert_held(&p->pi_lock);
if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) {
cpu = p->sched_class->select_task_rq(p, cpu, wake_flags); cpu = p->sched_class->select_task_rq(p, cpu, *wake_flags);
else *wake_flags |= WF_RQ_SELECTED;
} else {
cpu = cpumask_any(p->cpus_ptr); cpu = cpumask_any(p->cpus_ptr);
}
/* /*
* In order not to call set_task_cpu() on a blocking task we need * In order not to call set_task_cpu() on a blocking task we need
...@@ -3659,6 +3661,8 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, ...@@ -3659,6 +3661,8 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
rq->nr_uninterruptible--; rq->nr_uninterruptible--;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
if (wake_flags & WF_RQ_SELECTED)
en_flags |= ENQUEUE_RQ_SELECTED;
if (wake_flags & WF_MIGRATED) if (wake_flags & WF_MIGRATED)
en_flags |= ENQUEUE_MIGRATED; en_flags |= ENQUEUE_MIGRATED;
else else
...@@ -4120,6 +4124,8 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) ...@@ -4120,6 +4124,8 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
guard(preempt)(); guard(preempt)();
int cpu, success = 0; int cpu, success = 0;
wake_flags |= WF_TTWU;
if (p == current) { if (p == current) {
/* /*
* We're waking current, this means 'p->on_rq' and 'task_cpu(p) * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
...@@ -4252,7 +4258,7 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) ...@@ -4252,7 +4258,7 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
*/ */
smp_cond_load_acquire(&p->on_cpu, !VAL); smp_cond_load_acquire(&p->on_cpu, !VAL);
cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU); cpu = select_task_rq(p, p->wake_cpu, &wake_flags);
if (task_cpu(p) != cpu) { if (task_cpu(p) != cpu) {
if (p->in_iowait) { if (p->in_iowait) {
delayacct_blkio_end(p); delayacct_blkio_end(p);
...@@ -4793,6 +4799,7 @@ void wake_up_new_task(struct task_struct *p) ...@@ -4793,6 +4799,7 @@ void wake_up_new_task(struct task_struct *p)
{ {
struct rq_flags rf; struct rq_flags rf;
struct rq *rq; struct rq *rq;
int wake_flags = WF_FORK;
raw_spin_lock_irqsave(&p->pi_lock, rf.flags); raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
WRITE_ONCE(p->__state, TASK_RUNNING); WRITE_ONCE(p->__state, TASK_RUNNING);
...@@ -4807,7 +4814,7 @@ void wake_up_new_task(struct task_struct *p) ...@@ -4807,7 +4814,7 @@ void wake_up_new_task(struct task_struct *p)
*/ */
p->recent_used_cpu = task_cpu(p); p->recent_used_cpu = task_cpu(p);
rseq_migrate(p); rseq_migrate(p);
__set_task_cpu(p, select_task_rq(p, task_cpu(p), WF_FORK)); __set_task_cpu(p, select_task_rq(p, task_cpu(p), &wake_flags));
#endif #endif
rq = __task_rq_lock(p, &rf); rq = __task_rq_lock(p, &rf);
update_rq_clock(rq); update_rq_clock(rq);
...@@ -4815,7 +4822,7 @@ void wake_up_new_task(struct task_struct *p) ...@@ -4815,7 +4822,7 @@ void wake_up_new_task(struct task_struct *p)
activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL); activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL);
trace_sched_wakeup_new(p); trace_sched_wakeup_new(p);
wakeup_preempt(rq, p, WF_FORK); wakeup_preempt(rq, p, wake_flags);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
if (p->sched_class->task_woken) { if (p->sched_class->task_woken) {
/* /*
......
...@@ -625,6 +625,10 @@ struct sched_ext_ops { ...@@ -625,6 +625,10 @@ struct sched_ext_ops {
/** /**
* exit - Clean up after the BPF scheduler * exit - Clean up after the BPF scheduler
* @info: Exit info * @info: Exit info
*
* ops.exit() is also called on ops.init() failure, which is a bit
* unusual. This is to allow rich reporting through @info on how
* ops.init() failed.
*/ */
void (*exit)(struct scx_exit_info *info); void (*exit)(struct scx_exit_info *info);
...@@ -692,6 +696,7 @@ enum scx_enq_flags { ...@@ -692,6 +696,7 @@ enum scx_enq_flags {
/* expose select ENQUEUE_* flags as enums */ /* expose select ENQUEUE_* flags as enums */
SCX_ENQ_WAKEUP = ENQUEUE_WAKEUP, SCX_ENQ_WAKEUP = ENQUEUE_WAKEUP,
SCX_ENQ_HEAD = ENQUEUE_HEAD, SCX_ENQ_HEAD = ENQUEUE_HEAD,
SCX_ENQ_CPU_SELECTED = ENQUEUE_RQ_SELECTED,
/* high 32bits are SCX specific */ /* high 32bits are SCX specific */
...@@ -4048,7 +4053,6 @@ static void scx_cgroup_exit(void) ...@@ -4048,7 +4053,6 @@ static void scx_cgroup_exit(void)
percpu_rwsem_assert_held(&scx_cgroup_rwsem); percpu_rwsem_assert_held(&scx_cgroup_rwsem);
WARN_ON_ONCE(!scx_cgroup_enabled);
scx_cgroup_enabled = false; scx_cgroup_enabled = false;
/* /*
...@@ -4117,6 +4121,7 @@ static int scx_cgroup_init(void) ...@@ -4117,6 +4121,7 @@ static int scx_cgroup_init(void)
css->cgroup, &args); css->cgroup, &args);
if (ret) { if (ret) {
css_put(css); css_put(css);
scx_ops_error("ops.cgroup_init() failed (%d)", ret);
return ret; return ret;
} }
tg->scx_flags |= SCX_TG_INITED; tg->scx_flags |= SCX_TG_INITED;
...@@ -5041,6 +5046,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) ...@@ -5041,6 +5046,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
if (ret) { if (ret) {
ret = ops_sanitize_err("init", ret); ret = ops_sanitize_err("init", ret);
cpus_read_unlock(); cpus_read_unlock();
scx_ops_error("ops.init() failed (%d)", ret);
goto err_disable; goto err_disable;
} }
} }
...@@ -5150,7 +5156,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) ...@@ -5150,7 +5156,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
spin_lock_irq(&scx_tasks_lock); spin_lock_irq(&scx_tasks_lock);
scx_task_iter_exit(&sti); scx_task_iter_exit(&sti);
spin_unlock_irq(&scx_tasks_lock); spin_unlock_irq(&scx_tasks_lock);
pr_err("sched_ext: ops.init_task() failed (%d) for %s[%d] while loading\n", scx_ops_error("ops.init_task() failed (%d) for %s[%d]",
ret, p->comm, p->pid); ret, p->comm, p->pid);
goto err_disable_unlock_all; goto err_disable_unlock_all;
} }
...@@ -5199,14 +5205,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) ...@@ -5199,14 +5205,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
scx_ops_bypass(false); scx_ops_bypass(false);
/*
* Returning an error code here would lose the recorded error
* information. Exit indicating success so that the error is notified
* through ops.exit() with all the details.
*/
if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) { if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) {
WARN_ON_ONCE(atomic_read(&scx_exit_kind) == SCX_EXIT_NONE); WARN_ON_ONCE(atomic_read(&scx_exit_kind) == SCX_EXIT_NONE);
ret = 0;
goto err_disable; goto err_disable;
} }
...@@ -5241,10 +5241,18 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) ...@@ -5241,10 +5241,18 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
scx_ops_bypass(false); scx_ops_bypass(false);
err_disable: err_disable:
mutex_unlock(&scx_ops_enable_mutex); mutex_unlock(&scx_ops_enable_mutex);
/* must be fully disabled before returning */ /*
scx_ops_disable(SCX_EXIT_ERROR); * Returning an error code here would not pass all the error information
* to userspace. Record errno using scx_ops_error() for cases
* scx_ops_error() wasn't already invoked and exit indicating success so
* that the error is notified through ops.exit() with all the details.
*
* Flush scx_ops_disable_work to ensure that error is reported before
* init completion.
*/
scx_ops_error("scx_ops_enable() failed (%d)", ret);
kthread_flush_work(&scx_ops_disable_work); kthread_flush_work(&scx_ops_disable_work);
return ret; return 0;
} }
......
...@@ -2292,6 +2292,7 @@ static inline int task_on_rq_migrating(struct task_struct *p) ...@@ -2292,6 +2292,7 @@ static inline int task_on_rq_migrating(struct task_struct *p)
#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */ #define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
#define WF_MIGRATED 0x20 /* Internal use, task got migrated */ #define WF_MIGRATED 0x20 /* Internal use, task got migrated */
#define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */ #define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */
#define WF_RQ_SELECTED 0x80 /* ->select_task_rq() was called */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static_assert(WF_EXEC == SD_BALANCE_EXEC); static_assert(WF_EXEC == SD_BALANCE_EXEC);
...@@ -2334,6 +2335,7 @@ extern const u32 sched_prio_to_wmult[40]; ...@@ -2334,6 +2335,7 @@ extern const u32 sched_prio_to_wmult[40];
* ENQUEUE_HEAD - place at front of runqueue (tail if not specified) * ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline) * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
* ENQUEUE_MIGRATED - the task was migrated during wakeup * ENQUEUE_MIGRATED - the task was migrated during wakeup
* ENQUEUE_RQ_SELECTED - ->select_task_rq() was called
* *
*/ */
...@@ -2360,6 +2362,7 @@ extern const u32 sched_prio_to_wmult[40]; ...@@ -2360,6 +2362,7 @@ extern const u32 sched_prio_to_wmult[40];
#define ENQUEUE_INITIAL 0x80 #define ENQUEUE_INITIAL 0x80
#define ENQUEUE_MIGRATING 0x100 #define ENQUEUE_MIGRATING 0x100
#define ENQUEUE_DELAYED 0x200 #define ENQUEUE_DELAYED 0x200
#define ENQUEUE_RQ_SELECTED 0x400
#define RETRY_TASK ((void *)-1UL) #define RETRY_TASK ((void *)-1UL)
......
...@@ -41,8 +41,8 @@ void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vt ...@@ -41,8 +41,8 @@ void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vt
u32 scx_bpf_dispatch_nr_slots(void) __ksym; u32 scx_bpf_dispatch_nr_slots(void) __ksym;
void scx_bpf_dispatch_cancel(void) __ksym; void scx_bpf_dispatch_cancel(void) __ksym;
bool scx_bpf_consume(u64 dsq_id) __ksym; bool scx_bpf_consume(u64 dsq_id) __ksym;
void scx_bpf_dispatch_from_dsq_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym; void scx_bpf_dispatch_from_dsq_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak;
void scx_bpf_dispatch_from_dsq_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym; void scx_bpf_dispatch_from_dsq_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
bool scx_bpf_dispatch_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; bool scx_bpf_dispatch_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
bool scx_bpf_dispatch_vtime_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; bool scx_bpf_dispatch_vtime_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
u32 scx_bpf_reenqueue_local(void) __ksym; u32 scx_bpf_reenqueue_local(void) __ksym;
...@@ -71,7 +71,7 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; ...@@ -71,7 +71,7 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
bool scx_bpf_task_running(const struct task_struct *p) __ksym; bool scx_bpf_task_running(const struct task_struct *p) __ksym;
s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym; struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym; struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak;
/* /*
* Use the following as @it__iter when calling * Use the following as @it__iter when calling
......
...@@ -230,8 +230,8 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) ...@@ -230,8 +230,8 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
return; return;
} }
/* if !WAKEUP, select_cpu() wasn't called, try direct dispatch */ /* if select_cpu() wasn't called, try direct dispatch */
if (!(enq_flags & SCX_ENQ_WAKEUP) && if (!(enq_flags & SCX_ENQ_CPU_SELECTED) &&
(cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) { (cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) {
__sync_fetch_and_add(&nr_ddsp_from_enq, 1); __sync_fetch_and_add(&nr_ddsp_from_enq, 1);
scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags); scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment