Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH 00/12] sched: cleanup set_task_cpu() issues

0 views
Skip to first unread message

Peter Zijlstra

unread,
Dec 16, 2009, 12:20:02 PM12/16/09
to
A lot more invasive than I would have liked, but at least it seems to cover all
races I spotted.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Peter Zijlstra

unread,
Dec 16, 2009, 12:20:02 PM12/16/09
to
foo0.patch

Peter Zijlstra

unread,
Dec 16, 2009, 12:20:03 PM12/16/09
to
foo5.patch

Peter Zijlstra

unread,
Dec 16, 2009, 12:20:02 PM12/16/09
to
foo6.patch

Peter Zijlstra

unread,
Dec 16, 2009, 12:20:03 PM12/16/09
to
foo2.patch

tip-bot for Peter Zijlstra

unread,
Dec 16, 2009, 1:40:02 PM12/16/09
to
Commit-ID: 881232b70b195768a71cd74ff4b4e8ab9502997b
Gitweb: http://git.kernel.org/tip/881232b70b195768a71cd74ff4b4e8ab9502997b
Author: Peter Zijlstra <a.p.zi...@chello.nl>
AuthorDate: Wed, 16 Dec 2009 18:04:39 +0100
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Wed, 16 Dec 2009 19:01:57 +0100

sched: Move kthread_bind() back to kthread.c

Since kthread_bind() lost its dependencies on sched.c, move it
back where it came from.

Signed-off-by: Peter Zijlstra <a.p.zi...@chello.nl>
Cc: Mike Galbraith <efa...@gmx.de>
LKML-Reference: <200912161705...@chello.nl>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
kernel/kthread.c | 23 +++++++++++++++++++++++
kernel/sched.c | 26 --------------------------
2 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index ab7ae57..fbb6222 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -150,6 +150,29 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
EXPORT_SYMBOL(kthread_create);

/**
+ * kthread_bind - bind a just-created kthread to a cpu.
+ * @p: thread created by kthread_create().
+ * @cpu: cpu (might not be online, must be possible) for @k to run on.
+ *
+ * Description: This function is equivalent to set_cpus_allowed(),
+ * except that @cpu doesn't need to be online, and the thread must be
+ * stopped (i.e., just returned from kthread_create()).
+ */
+void kthread_bind(struct task_struct *p, unsigned int cpu)
+{
+ /* Must have done schedule() in kthread() before we set_task_cpu */
+ if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
+ WARN_ON(1);
+ return;
+ }
+
+ p->cpus_allowed = cpumask_of_cpu(cpu);
+ p->rt.nr_cpus_allowed = 1;
+ p->flags |= PF_THREAD_BOUND;
+}
+EXPORT_SYMBOL(kthread_bind);
+
+/**
* kthread_stop - stop a thread created by kthread_create().
* @k: thread created by kthread_create().
*
diff --git a/kernel/sched.c b/kernel/sched.c
index cc40bda..297dc44 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2004,32 +2004,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
p->sched_class->prio_changed(rq, p, oldprio, running);
}

-/**
- * kthread_bind - bind a just-created kthread to a cpu.
- * @p: thread created by kthread_create().
- * @cpu: cpu (might not be online, must be possible) for @k to run on.
- *
- * Description: This function is equivalent to set_cpus_allowed(),
- * except that @cpu doesn't need to be online, and the thread must be
- * stopped (i.e., just returned from kthread_create()).
- *
- * Function lives here instead of kthread.c because it messes with
- * scheduler internals which require locking.
- */
-void kthread_bind(struct task_struct *p, unsigned int cpu)
-{
- /* Must have done schedule() in kthread() before we set_task_cpu */
- if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
- WARN_ON(1);
- return;
- }
-
- p->cpus_allowed = cpumask_of_cpu(cpu);
- p->rt.nr_cpus_allowed = 1;
- p->flags |= PF_THREAD_BOUND;
-}
-EXPORT_SYMBOL(kthread_bind);
-
#ifdef CONFIG_SMP
/*
* Is this task likely cache-hot:

tip-bot for Peter Zijlstra

unread,
Dec 16, 2009, 1:40:03 PM12/16/09
to
Commit-ID: 3802290628348674985d14914f9bfee7b9084548
Gitweb: http://git.kernel.org/tip/3802290628348674985d14914f9bfee7b9084548
Author: Peter Zijlstra <a.p.zi...@chello.nl>
AuthorDate: Wed, 16 Dec 2009 18:04:37 +0100
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Wed, 16 Dec 2009 19:01:56 +0100

sched: Fix sched_exec() balancing

Since we access ->cpus_allowed without holding rq->lock we need
a retry loop to validate the result, this comes for near free
when we merge sched_migrate_task() into sched_exec() since that
already does the needed check.

Signed-off-by: Peter Zijlstra <a.p.zi...@chello.nl>
Cc: Mike Galbraith <efa...@gmx.de>
LKML-Reference: <200912161705...@chello.nl>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---

kernel/sched.c | 45 +++++++++++++++++++++++----------------------
1 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 33d7965..63e55ac 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2322,7 +2322,7 @@ void task_oncpu_function_call(struct task_struct *p,
*
* - fork, @p is stable because it isn't on the tasklist yet
*
- * - exec, @p is unstable XXX
+ * - exec, @p is unstable, retry loop
*
* - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
* we should be good.
@@ -3132,21 +3132,36 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
}

/*
- * If dest_cpu is allowed for this process, migrate the task to it.
- * This is accomplished by forcing the cpu_allowed mask to only
- * allow dest_cpu, which will force the cpu onto dest_cpu. Then
- * the cpu_allowed mask is restored.
+ * sched_exec - execve() is a valuable balancing opportunity, because at
+ * this point the task has the smallest effective memory and cache footprint.
*/
-static void sched_migrate_task(struct task_struct *p, int dest_cpu)
+void sched_exec(void)
{
+ struct task_struct *p = current;
struct migration_req req;
+ int dest_cpu, this_cpu;
unsigned long flags;
struct rq *rq;

+again:
+ this_cpu = get_cpu();
+ dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0);
+ if (dest_cpu == this_cpu) {
+ put_cpu();
+ return;
+ }
+
rq = task_rq_lock(p, &flags);
+ put_cpu();
+
+ /*
+ * select_task_rq() can race against ->cpus_allowed
+ */
if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
- || unlikely(!cpu_active(dest_cpu)))
- goto out;
+ || unlikely(!cpu_active(dest_cpu))) {
+ task_rq_unlock(rq, &flags);
+ goto again;
+ }

/* force the process onto the specified CPU */
if (migrate_task(p, dest_cpu, &req)) {
@@ -3161,24 +3176,10 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)

return;
}
-out:
task_rq_unlock(rq, &flags);
}

/*
- * sched_exec - execve() is a valuable balancing opportunity, because at
- * this point the task has the smallest effective memory and cache footprint.
- */
-void sched_exec(void)
-{
- int new_cpu, this_cpu = get_cpu();
- new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0);
- put_cpu();
- if (new_cpu != this_cpu)
- sched_migrate_task(current, new_cpu);
-}
-
-/*
* pull_task - move a task from a remote runqueue to the local runqueue.
* Both runqueues must be locked.
*/

tip-bot for Peter Zijlstra

unread,
Dec 16, 2009, 1:50:02 PM12/16/09
to
Commit-ID: efbbd05a595343a413964ad85a2ad359b7b7efbd
Gitweb: http://git.kernel.org/tip/efbbd05a595343a413964ad85a2ad359b7b7efbd
Author: Peter Zijlstra <a.p.zi...@chello.nl>
AuthorDate: Wed, 16 Dec 2009 18:04:40 +0100
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Wed, 16 Dec 2009 19:01:58 +0100

sched: Add pre and post wakeup hooks

As will be apparent in the next patch, we need a pre wakeup hook
for sched_fair task migration, hence rename the post wakeup hook
and one pre wakeup.

Signed-off-by: Peter Zijlstra <a.p.zi...@chello.nl>
Cc: Mike Galbraith <efa...@gmx.de>
LKML-Reference: <200912161705...@chello.nl>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---

include/linux/sched.h | 3 ++-
kernel/sched.c | 12 ++++++++----
kernel/sched_rt.c | 4 ++--
3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c858f3..2c9fa1c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1091,7 +1091,8 @@ struct sched_class {
enum cpu_idle_type idle);
void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
void (*post_schedule) (struct rq *this_rq);
- void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
+ void (*task_waking) (struct rq *this_rq, struct task_struct *task);
+ void (*task_woken) (struct rq *this_rq, struct task_struct *task);

void (*set_cpus_allowed)(struct task_struct *p,
const struct cpumask *newmask);
diff --git a/kernel/sched.c b/kernel/sched.c
index 297dc44..6c571bd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2412,6 +2412,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
if (task_contributes_to_load(p))
rq->nr_uninterruptible--;
p->state = TASK_WAKING;
+
+ if (p->sched_class->task_waking)
+ p->sched_class->task_waking(rq, p);
+
__task_rq_unlock(rq);

cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
@@ -2475,8 +2479,8 @@ out_running:

p->state = TASK_RUNNING;
#ifdef CONFIG_SMP
- if (p->sched_class->task_wake_up)
- p->sched_class->task_wake_up(rq, p);
+ if (p->sched_class->task_woken)
+ p->sched_class->task_woken(rq, p);

if (unlikely(rq->idle_stamp)) {
u64 delta = rq->clock - rq->idle_stamp;
@@ -2666,8 +2670,8 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
trace_sched_wakeup_new(rq, p, 1);
check_preempt_curr(rq, p, WF_FORK);
#ifdef CONFIG_SMP
- if (p->sched_class->task_wake_up)
- p->sched_class->task_wake_up(rq, p);
+ if (p->sched_class->task_woken)
+ p->sched_class->task_woken(rq, p);
#endif
task_rq_unlock(rq, &flags);
}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d2ea282..f48328a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1472,7 +1472,7 @@ static void post_schedule_rt(struct rq *rq)
* If we are not running and we are not going to reschedule soon, we should
* try to push tasks away now
*/
-static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
+static void task_woken_rt(struct rq *rq, struct task_struct *p)
{
if (!task_running(rq, p) &&
!test_tsk_need_resched(rq->curr) &&
@@ -1753,7 +1753,7 @@ static const struct sched_class rt_sched_class = {
.rq_offline = rq_offline_rt,
.pre_schedule = pre_schedule_rt,
.post_schedule = post_schedule_rt,
- .task_wake_up = task_wake_up_rt,
+ .task_woken = task_woken_rt,
.switched_from = switched_from_rt,
#endif

tip-bot for Peter Zijlstra

unread,
Dec 16, 2009, 1:50:02 PM12/16/09
to
Commit-ID: 738d2be4301007f054541c5c4bf7fb6a361c9b3a
Gitweb: http://git.kernel.org/tip/738d2be4301007f054541c5c4bf7fb6a361c9b3a
Author: Peter Zijlstra <a.p.zi...@chello.nl>
AuthorDate: Wed, 16 Dec 2009 18:04:42 +0100
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Wed, 16 Dec 2009 19:01:59 +0100

sched: Simplify set_task_cpu()

Rearrange code a bit now that its a simpler function.

Signed-off-by: Peter Zijlstra <a.p.zi...@chello.nl>
Cc: Mike Galbraith <efa...@gmx.de>
LKML-Reference: <200912161705...@chello.nl>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---

kernel/sched.c | 13 +++++--------
1 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index f92ce63..8a2bfd3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2034,11 +2034,8 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
return delta < (s64)sysctl_sched_migration_cost;
}

-
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
{
- int old_cpu = task_cpu(p);
-
#ifdef CONFIG_SCHED_DEBUG
/*
* We should never call set_task_cpu() on a blocked task,
@@ -2049,11 +2046,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)

trace_sched_migrate_task(p, new_cpu);

- if (old_cpu != new_cpu) {
- p->se.nr_migrations++;
- perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
- 1, 1, NULL, 0);
- }
+ if (task_cpu(p) == new_cpu)
+ return;
+
+ p->se.nr_migrations++;
+ perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);

__set_task_cpu(p, new_cpu);

Peter Zijlstra

unread,
Dec 16, 2009, 4:20:02 PM12/16/09
to
Since you're running into this warning and not seeing any obvious ill
side-effects (you'd see some serious starvation issues), please stick
this patch in for now.

I can't seem to convince myself if its a real problem or not, and how it
could possibly happen, but my brain is giving up for today.

---
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -2036,14 +2036,6 @@ task_hot(struct task_struct *p, u64 now,



void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
{

-#ifdef CONFIG_SCHED_DEBUG
- /*
- * We should never call set_task_cpu() on a blocked task,
- * ttwu() will sort out the placement.
- */
- WARN_ON(p->state != TASK_RUNNING && p->state != TASK_WAKING);
-#endif
-
trace_sched_migrate_task(p, new_cpu);

if (task_cpu(p) == new_cpu)

0 new messages