summaryrefslogtreecommitdiffstats
path: root/kernel/sched/ext.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/ext.c')
-rw-r--r--kernel/sched/ext.c89
1 files changed, 68 insertions, 21 deletions
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 7fff1d045477..19813b387ef9 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2747,6 +2747,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
{
struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
bool prev_on_scx = prev->sched_class == &ext_sched_class;
+ bool prev_on_rq = prev->scx.flags & SCX_TASK_QUEUED;
int nr_loops = SCX_DSP_MAX_LOOPS;
lockdep_assert_rq_held(rq);
@@ -2779,8 +2780,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
* See scx_ops_disable_workfn() for the explanation on the
* bypassing test.
*/
- if ((prev->scx.flags & SCX_TASK_QUEUED) &&
- prev->scx.slice && !scx_rq_bypassing(rq)) {
+ if (prev_on_rq && prev->scx.slice && !scx_rq_bypassing(rq)) {
rq->scx.flags |= SCX_RQ_BAL_KEEP;
goto has_tasks;
}
@@ -2813,6 +2813,10 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
flush_dispatch_buf(rq);
+ if (prev_on_rq && prev->scx.slice) {
+ rq->scx.flags |= SCX_RQ_BAL_KEEP;
+ goto has_tasks;
+ }
if (rq->scx.local_dsq.nr)
goto has_tasks;
if (consume_global_dsq(rq))
@@ -2838,8 +2842,7 @@ no_tasks:
* Didn't find another task to run. Keep running @prev unless
* %SCX_OPS_ENQ_LAST is in effect.
*/
- if ((prev->scx.flags & SCX_TASK_QUEUED) &&
- (!static_branch_unlikely(&scx_ops_enq_last) ||
+ if (prev_on_rq && (!static_branch_unlikely(&scx_ops_enq_last) ||
scx_rq_bypassing(rq))) {
rq->scx.flags |= SCX_RQ_BAL_KEEP;
goto has_tasks;
@@ -3034,7 +3037,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
*/
if (p->scx.slice && !scx_rq_bypassing(rq)) {
dispatch_enqueue(&rq->scx.local_dsq, p, SCX_ENQ_HEAD);
- return;
+ goto switch_class;
}
/*
@@ -3051,6 +3054,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
}
}
+switch_class:
if (next && next->sched_class != &ext_sched_class)
switch_class(rq, next);
}
@@ -3586,16 +3590,8 @@ static void reset_idle_masks(void)
cpumask_copy(idle_masks.smt, cpu_online_mask);
}
-void __scx_update_idle(struct rq *rq, bool idle)
+static void update_builtin_idle(int cpu, bool idle)
{
- int cpu = cpu_of(rq);
-
- if (SCX_HAS_OP(update_idle) && !scx_rq_bypassing(rq)) {
- SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);
- if (!static_branch_unlikely(&scx_builtin_idle_enabled))
- return;
- }
-
if (idle)
cpumask_set_cpu(cpu, idle_masks.cpu);
else
@@ -3622,6 +3618,57 @@ void __scx_update_idle(struct rq *rq, bool idle)
#endif
}
+/*
+ * Update the idle state of a CPU to @idle.
+ *
+ * If @do_notify is true, ops.update_idle() is invoked to notify the scx
+ * scheduler of an actual idle state transition (idle to busy or vice
+ * versa). If @do_notify is false, only the idle state in the idle masks is
+ * refreshed without invoking ops.update_idle().
+ *
+ * This distinction is necessary, because an idle CPU can be "reserved" and
+ * awakened via scx_bpf_pick_idle_cpu() + scx_bpf_kick_cpu(), marking it as
+ * busy even if no tasks are dispatched. In this case, the CPU may return
+ * to idle without a true state transition. Refreshing the idle masks
+ * without invoking ops.update_idle() ensures accurate idle state tracking
+ * while avoiding unnecessary updates and maintaining balanced state
+ * transitions.
+ */
+void __scx_update_idle(struct rq *rq, bool idle, bool do_notify)
+{
+ int cpu = cpu_of(rq);
+
+ lockdep_assert_rq_held(rq);
+
+ /*
+ * Trigger ops.update_idle() only when transitioning from a task to
+ * the idle thread and vice versa.
+ *
+ * Idle transitions are indicated by do_notify being set to true,
+ * managed by put_prev_task_idle()/set_next_task_idle().
+ */
+ if (SCX_HAS_OP(update_idle) && do_notify && !scx_rq_bypassing(rq))
+ SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);
+
+ /*
+ * Update the idle masks:
+ * - for real idle transitions (do_notify == true)
+ * - for idle-to-idle transitions (indicated by the previous task
+ * being the idle thread, managed by pick_task_idle())
+ *
+ * Skip updating idle masks if the previous task is not the idle
+ * thread, since set_next_task_idle() has already handled it when
+ * transitioning from a task to the idle thread (calling this
+ * function with do_notify == true).
+ *
+ * In this way we can avoid updating the idle masks twice,
+ * unnecessarily.
+ */
+ if (static_branch_likely(&scx_builtin_idle_enabled))
+ if (do_notify || is_idle_task(rq->curr))
+ update_builtin_idle(cpu, idle);
+}
+
static void handle_hotplug(struct rq *rq, bool online)
{
int cpu = cpu_of(rq);
@@ -4744,10 +4791,9 @@ static void scx_ops_bypass(bool bypass)
*/
for_each_possible_cpu(cpu) {
struct rq *rq = cpu_rq(cpu);
- struct rq_flags rf;
struct task_struct *p, *n;
- rq_lock(rq, &rf);
+ raw_spin_rq_lock(rq);
if (bypass) {
WARN_ON_ONCE(rq->scx.flags & SCX_RQ_BYPASSING);
@@ -4763,7 +4809,7 @@ static void scx_ops_bypass(bool bypass)
* sees scx_rq_bypassing() before moving tasks to SCX.
*/
if (!scx_enabled()) {
- rq_unlock_irqrestore(rq, &rf);
+ raw_spin_rq_unlock(rq);
continue;
}
@@ -4783,10 +4829,11 @@ static void scx_ops_bypass(bool bypass)
sched_enq_and_set_task(&ctx);
}
- rq_unlock(rq, &rf);
-
/* resched to restore ticks and idle state */
- resched_cpu(cpu);
+ if (cpu_online(cpu) || cpu == smp_processor_id())
+ resched_curr(rq);
+
+ raw_spin_rq_unlock(rq);
}
atomic_dec(&scx_ops_breather_depth);
@@ -7013,7 +7060,7 @@ __bpf_kfunc int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id,
return -ENOENT;
INIT_LIST_HEAD(&kit->cursor.node);
- kit->cursor.flags |= SCX_DSQ_LNODE_ITER_CURSOR | flags;
+ kit->cursor.flags = SCX_DSQ_LNODE_ITER_CURSOR | flags;
kit->cursor.priv = READ_ONCE(kit->dsq->seq);
return 0;