[PATCH 2/5] sched.c: change branch hint

Tim Blechmann

unread,

Nov 24, 2009, 6:20:01 AM11/24/09

to

branch hint profiling of my nehalem machine, showed over 90% incorrect
branch hints:

10420275 170645395 94 context_switch sched.c
3043
10408421 171098521 94 context_switch sched.c
3050

Signed-off-by: Tim Blechmann <t...@klingt.org>
---
kernel/sched.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index c78dcfe..2a78b38 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3040,14 +3040,14 @@ context_switch(struct rq *rq, struct task_struct
*prev,
*/
arch_start_context_switch(prev);
- if (unlikely(!mm)) {
+ if (likely(!mm)) {
next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next);
} else
switch_mm(oldmm, mm, next);
- if (unlikely(!prev->mm)) {
+ if (likely(!prev->mm)) {
prev->active_mm = NULL;
rq->prev_mm = oldmm;
}
--
1.6.4.2

signature.asc

Tim Blechmann

unread,

Nov 24, 2009, 6:20:01 AM11/24/09

to

branch hint profiling of my nehalem machine, showed 96% incorrect branch
hints:

6548732 174664120 96 __switch_to process_64.c
406
6548745 174565593 96 __switch_to process_64.c
410

Signed-off-by: Tim Blechmann <t...@klingt.org>
---

arch/x86/kernel/process_64.c | 4 ++--

1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c8d0ece..1e2e1fe 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -403,11 +403,11 @@ __switch_to(struct task_struct *prev_p, struct
task_struct *next_p)
* This won't pick up thread selector changes, but I guess that is ok.
*/
savesegment(es, prev->es);
- if (unlikely(next->es | prev->es))
+ if (next->es | prev->es)
loadsegment(es, next->es);
savesegment(ds, prev->ds);
- if (unlikely(next->ds | prev->ds))
+ if (next->ds | prev->ds)
loadsegment(ds, next->ds);
-- 1.6.4.2

signature.asc

Tim Blechmann

unread,

Nov 24, 2009, 6:20:02 AM11/24/09

to

branch hint profiling of my nehalem machine, showed 96% incorrect branch
hints:

131609 3840434 96 queue_delayed_work_on workqueue.c 258

Signed-off-by: Tim Blechmann <t...@klingt.org>
---

kernel/workqueue.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0a98bef..6985ab0 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -255,7 +255,7 @@ int queue_delayed_work_on(int cpu, struct
workqueue_struct *wq,
timer->data = (unsigned long)dwork;
timer->function = delayed_work_timer_fn;
- if (unlikely(cpu >= 0))
+ if (cpu >= 0)
add_timer_on(timer, cpu);
else
add_timer(timer);
--
1.6.4.2

signature.asc

Tim Blechmann

unread,

Nov 24, 2009, 6:20:02 AM11/24/09

to

branch profiling on my nehalem machine showed 99% incorrect branch hints:

28459 7678524 99 __cache_alloc_node slab.c
3551

Signed-off-by: Tim Blechmann <t...@klingt.org>
---

mm/slab.c | 2 +-

1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index f70b326..4125fcd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3548,7 +3548,7 @@ __cache_alloc_node(struct kmem_cache *cachep,
gfp_t flags, int nodeid,
slab_irq_save(save_flags, this_cpu);
this_node = cpu_to_node(this_cpu);
- if (unlikely(nodeid == -1))
+ if (nodeid == -1)
nodeid = this_node;
if (unlikely(!cachep->nodelists[nodeid])) {
--
1.6.4.2

signature.asc

Tim Blechmann

unread,

Nov 24, 2009, 6:20:02 AM11/24/09

to

branch hint profiling of my nehalem machine, showed 90% incorrect
branch hints:

15728471 158903754 90 pick_next_task_fair sched_fair.c
1555

Signed-off-by: Tim Blechmann <t...@klingt.org>
---

kernel/sched_fair.c | 2 +-

1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index e5945df..5a0965c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1552,7 +1552,7 @@ static struct task_struct
*pick_next_task_fair(struct rq *rq)
struct cfs_rq *cfs_rq = &rq->cfs;
struct sched_entity *se;
- if (unlikely(!cfs_rq->nr_running))
+ if (!cfs_rq->nr_running)
return NULL;
do {
--
1.6.4.2

signature.asc

Pekka Enberg

unread,

Nov 24, 2009, 6:30:01 AM11/24/09

to

On Tue, Nov 24, 2009 at 1:20 PM, Ingo Molnar <mi...@elte.hu> wrote:
> (Pekka Cc:-ed)

That sounds odd to me. Can you see where the incorrectly predicted
calls are coming from? Calling kmem_cache_alloc_node() with node set
to -1 most of the time could be a real bug somewhere.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Ingo Molnar

unread,

Nov 24, 2009, 6:30:02 AM11/24/09

to

(Pekka Cc:-ed)

* Tim Blechmann <t...@klingt.org> wrote:

Ingo Molnar

unread,

Nov 24, 2009, 6:50:02 AM11/24/09

to

* Pekka Enberg <pen...@cs.helsinki.fi> wrote:

> On Tue, Nov 24, 2009 at 1:20 PM, Ingo Molnar <mi...@elte.hu> wrote:
> > (Pekka Cc:-ed)
> >
> > * Tim Blechmann <t...@klingt.org> wrote:
> >
> >> branch profiling on my nehalem machine showed 99% incorrect branch hints:
> >>

> >> ? ?28459 ?7678524 ?99 __cache_alloc_node ? ? ? ? ? ? slab.c
> >> ? 3551

> >>
> >> Signed-off-by: Tim Blechmann <t...@klingt.org>
> >> ---

> >> ?mm/slab.c | ? ?2 +-
> >> ?1 files changed, 1 insertions(+), 1 deletions(-)

> >>
> >> diff --git a/mm/slab.c b/mm/slab.c
> >> index f70b326..4125fcd 100644
> >> --- a/mm/slab.c
> >> +++ b/mm/slab.c
> >> @@ -3548,7 +3548,7 @@ __cache_alloc_node(struct kmem_cache *cachep,
> >> gfp_t flags, int nodeid,

> >> ? ? ? slab_irq_save(save_flags, this_cpu);
> >> ? ? ? this_node = cpu_to_node(this_cpu);
> >> - ? ? if (unlikely(nodeid == -1))
> >> + ? ? if (nodeid == -1)
> >> ? ? ? ? ? ? ? nodeid = this_node;
> >> ? ? ? if (unlikely(!cachep->nodelists[nodeid])) {

>
> That sounds odd to me. Can you see where the incorrectly predicted
> calls are coming from? Calling kmem_cache_alloc_node() with node set
> to -1 most of the time could be a real bug somewhere.

I think it could occur in too limited tests - the branch prediction
looks 'wrong' in certain tests - while it's OK in general.

Is there some easy to run workload you consider more or less
representative of typical SLAB patterns?

<plug> You might want to pull even with the scheduler subsystem and in
addition to 'perf bench sched', add a 'perf bench slab' set of
interesting testcases for SLAB performance testing. :-)
</plug>

Ingo

Tim Blechmann

unread,

Nov 24, 2009, 6:50:03 AM11/24/09

to

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

i don't know, if there is any facility in the ftrace branch profiler to
get call graph information, but i can try to manually dump backtraces in
this condition path ...
could be a specific situation on my machine, though ...

tim

- --
t...@klingt.org
http://tim.klingt.org

You don't have to call it music if the term shocks you.
John Cage
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (GNU/Linux)

iEYEARECAAYFAksLx0kACgkQdL+4qsZfVsskOQCglFQG3eYAfdgXoOAHAGTqaLcU
8e0AoIQNbzSRxttGFaXTF3PEh5O4aGEB
=3nmT
-----END PGP SIGNATURE-----

tip-bot for Tim Blechmann

unread,

Nov 24, 2009, 12:00:02 PM11/24/09

to

Commit-ID: a3a1de0c34de6f5f8332cd6151c46af7813c0fcb
Gitweb: http://git.kernel.org/tip/a3a1de0c34de6f5f8332cd6151c46af7813c0fcb
Author: Tim Blechmann <t...@klingt.org>
AuthorDate: Tue, 24 Nov 2009 11:55:15 +0100
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Tue, 24 Nov 2009 12:20:04 +0100

sched, x86: Optimize branch hint in __switch_to()

Branch hint profiling on my nehalem machine showed 96%
incorrect branch hints:

6548732 174664120 96 __switch_to process_64.c
406
6548745 174565593 96 __switch_to process_64.c
410

Signed-off-by: Tim Blechmann <t...@klingt.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
LKML-Reference: <4B0BBB93...@klingt.org>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
arch/x86/kernel/process_64.c | 5 ++---
1 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ad535b6..d9db104 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -406,11 +406,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)

* This won't pick up thread selector changes, but I guess that is ok.
*/
savesegment(es, prev->es);
- if (unlikely(next->es | prev->es))
+ if (next->es | prev->es)
loadsegment(es, next->es);

-

savesegment(ds, prev->ds);
- if (unlikely(next->ds | prev->ds))
+ if (next->ds | prev->ds)
loadsegment(ds, next->ds);

--

tip-bot for Tim Blechmann

unread,

Nov 24, 2009, 12:00:02 PM11/24/09

to

Commit-ID: 36ace27e3e60d44ea69ce394b2e45386ae98d9d9
Gitweb: http://git.kernel.org/tip/36ace27e3e60d44ea69ce394b2e45386ae98d9d9
Author: Tim Blechmann <t...@klingt.org>
AuthorDate: Tue, 24 Nov 2009 11:55:45 +0100
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Tue, 24 Nov 2009 12:18:12 +0100

sched: Optimize branch hint in pick_next_task_fair()

Branch hint profiling on my nehalem machine showed 90%
incorrect branch hints:

15728471 158903754 90 pick_next_task_fair
sched_fair.c 1555

Signed-off-by: Tim Blechmann <t...@klingt.org>

Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>

LKML-Reference: <4B0BBBB1...@klingt.org>
Signed-off-by: Ingo Molnar <mi...@elte.hu>

---
kernel/sched_fair.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f28a267..24086e7 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1704,7 +1704,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)

struct cfs_rq *cfs_rq = &rq->cfs;
struct sched_entity *se;

- if (unlikely(!cfs_rq->nr_running))
+ if (!cfs_rq->nr_running)
return NULL;

do {
--

tip-bot for Tim Blechmann

unread,

Nov 24, 2009, 12:00:04 PM11/24/09

to

Commit-ID: 710390d90f143a9ebb87a475215140f426792efd
Gitweb: http://git.kernel.org/tip/710390d90f143a9ebb87a475215140f426792efd
Author: Tim Blechmann <t...@klingt.org>
AuthorDate: Tue, 24 Nov 2009 11:55:27 +0100
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Tue, 24 Nov 2009 12:18:42 +0100

sched: Optimize branch hint in context_switch()

Branch hint profiling on my nehalem machine showed over 90%
incorrect branch hints:

10420275 170645395 94 context_switch sched.c
3043
10408421 171098521 94 context_switch sched.c
3050

Signed-off-by: Tim Blechmann <t...@klingt.org>

Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>

LKML-Reference: <4B0BBB9F...@klingt.org>
Signed-off-by: Ingo Molnar <mi...@elte.hu>

---
kernel/sched.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 93474a7..010d5e1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2829,14 +2829,14 @@ context_switch(struct rq *rq, struct task_struct *prev,

*/
arch_start_context_switch(prev);

- if (unlikely(!mm)) {
+ if (likely(!mm)) {
next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next);
} else
switch_mm(oldmm, mm, next);

- if (unlikely(!prev->mm)) {
+ if (likely(!prev->mm)) {
prev->active_mm = NULL;
rq->prev_mm = oldmm;
}
--

Brian Gerst

unread,

Nov 24, 2009, 12:30:02 PM11/24/09

to

64-bit tasks should have %ds and %es set to null selectors. The only
time they should be different is for 32-bit tasks. It lookx like some
versions of GCC just aren't using the hint. I've tested it with 4.4.2
and the generated assembly is the same with or without the hint.

--
Brian Gerst

Tim Blechmann

unread,

Nov 25, 2009, 4:10:03 AM11/25/09

to

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

this doesn't seem to be the case on my machine for 96% of the calls ...
i am just running 64bit binaries on this machine, no 32bit programs
(that i know of (ubuntu karmic amd64))

tim

- --
t...@klingt.org
http://tim.klingt.org

Relying on the government to protect your privacy is like asking a
peeping tom to install your window blinds.
John Perry Barlow

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (GNU/Linux)

iEYEARECAAYFAksM8s0ACgkQdL+4qsZfVsscjwCffcvQz7/VTtIfH1XXgJ2SxJxT
xWYAnjZ9aBD/OWmroVn68oEKqJZ7Pm3U
=C7kB
-----END PGP SIGNATURE-----

Pekka Enberg

unread,

Nov 25, 2009, 5:50:03 AM11/25/09

to

Ingo Molnar kirjoitti:

>> That sounds odd to me. Can you see where the incorrectly predicted
>> calls are coming from? Calling kmem_cache_alloc_node() with node set
>> to -1 most of the time could be a real bug somewhere.
>
> I think it could occur in too limited tests - the branch prediction
> looks 'wrong' in certain tests - while it's OK in general.
>
> Is there some easy to run workload you consider more or less
> representative of typical SLAB patterns?

I can think of three main classes: VFS, SCSI, or network intensive
applications and benchmarks tend to bring out the worst in SLAB. There
are probably some interesting NUMA related things that I'm not really
aware of.

Brian Gerst

unread,

Nov 25, 2009, 11:00:03 AM11/25/09

to

Some early kernel threads inherited KERNEL_DS from the boot process.
Patch coming soon.

--
Brian Gerst

Brian Gerst

unread,

Nov 25, 2009, 11:20:02 AM11/25/09

to

- /* set up data segments. actually 0 would do too */
- movl $__KERNEL_DS,%eax
+ /* set up data segments */
+ xorl %eax,%eax
movl %eax,%ds
movl %eax,%ss
movl %eax,%es

Brian Gerst

unread,

Nov 25, 2009, 11:20:02 AM11/25/09

to

This prevents kernel threads from inheriting non-null segment selectors,
and causing optimizations in __switch_to() to be ineffective.

Signed-off-by: Brian Gerst <brg...@gmail.com>
---
arch/x86/kernel/head_64.S | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 780cd92..22db86a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -212,8 +212,8 @@ ENTRY(secondary_startup_64)
*/
lgdt early_gdt_descr(%rip)

- /* set up data segments. actually 0 would do too */
- movl $__KERNEL_DS,%eax
+ /* set up data segments */
+ xorl %eax,%eax
movl %eax,%ds
movl %eax,%ss
movl %eax,%es
--

1.6.5.2

tip-bot for Brian Gerst

unread,

Nov 26, 2009, 5:00:03 AM11/26/09

to

Commit-ID: 8ec6993d9f7d961014af970ded57542961fe9ad9
Gitweb: http://git.kernel.org/tip/8ec6993d9f7d961014af970ded57542961fe9ad9
Author: Brian Gerst <brg...@gmail.com>
AuthorDate: Wed, 25 Nov 2009 11:17:36 -0500
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Thu, 26 Nov 2009 10:44:30 +0100

x86, 64-bit: Set data segments to null after switching to 64-bit mode

This prevents kernel threads from inheriting non-null segment
selectors, and causing optimizations in __switch_to() to be
ineffective.

Signed-off-by: Brian Gerst <brg...@gmail.com>
Cc: Tim Blechmann <t...@klingt.org>
Cc: Linus Torvalds <torv...@linux-foundation.org>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Jeremy Fitzhardinge <jer...@goop.org>
Cc: Jan Beulich <JBeu...@novell.com>
LKML-Reference: <1259165856-3512-1-gi...@gmail.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>

---
arch/x86/kernel/head_64.S | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 2640660..17ba9ec 100644

--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -212,8 +212,8 @@ ENTRY(secondary_startup_64)
*/
lgdt early_gdt_descr(%rip)

- /* set up data segments. actually 0 would do too */
- movl $__KERNEL_DS,%eax
+ /* set up data segments */
+ xorl %eax,%eax
movl %eax,%ds
movl %eax,%ss
movl %eax,%es
--

Tim Blechmann

unread,

Nov 29, 2009, 6:50:02 AM11/29/09

to

On 11/25/2009 05:17 PM, Brian Gerst wrote:
> This prevents kernel threads from inheriting non-null segment selectors,
> and causing optimizations in __switch_to() to be ineffective.
>
> Signed-off-by: Brian Gerst <brg...@gmail.com>

with this patch, the branch hint is actually correct, giving 100%
correct predictions. so please revert my earlier patch (commit
a3a1de0c34de6f5f8332cd6151c46af7813c0fcb)

thnx, tim

--
t...@klingt.org
http://tim.klingt.org

Desperation is the raw material of drastic change. Only those who can
leave behind everything they have ever believed in can hope to escape.
William S. Burroughs

signature.asc

Tim Blechmann

unread,

Nov 29, 2009, 7:00:02 AM11/29/09

to

Branch hint profiling on my nehalem machine showed 89%
incorrect branch hints:

31032542 270070841 89 __switch_to process_64.c 440

Signed-off-by: Tim Blechmann <t...@klingt.org>
---

arch/x86/kernel/process_64.c | 2 +-

0001-sched-x86-Optimize-branch-hint-in-__switch_to.patch

signature.asc

Tim Blechmann

unread,

Nov 29, 2009, 7:10:01 AM11/29/09

to

Branch hint profiling on my nehalem machine showed 88%
incorrect branch hints:

42017484 326957902 88 context_switch sched.c 3043
42038493 326953687 88 context_switch sched.c 3050

Signed-off-by: Tim Blechmann <t...@klingt.org>
---

kernel/sched.c | 4 ++--

0001-sched-Optimize-branch-hint-in-context_switch.patch

signature.asc

Avi Kivity

unread,

Nov 29, 2009, 10:20:02 AM11/29/09

to

On 11/29/2009 02:01 PM, Tim Blechmann wrote:
> Branch hint profiling on my nehalem machine showed 88%
> incorrect branch hints:
>
> 42017484 326957902 88 context_switch sched.c 3043
> 42038493 326953687 88 context_switch sched.c 3050
>

> @@ -3040,14 +3040,14 @@ context_switch(struct rq *rq, struct task_struct *prev,
> */
> arch_start_context_switch(prev);
>
> - if (likely(!mm)) {
> + if (unlikely(!mm)) {

> next->active_mm = oldmm;
> atomic_inc(&oldmm->mm_count);
> enter_lazy_tlb(oldmm, next);
> } else
> switch_mm(oldmm, mm, next);
>

> - if (likely(!prev->mm)) {
> + if (unlikely(!prev->mm)) {

> prev->active_mm = NULL;
> rq->prev_mm = oldmm;
> }
>

I don't think either the original or the patch is correct. Whether or
not a task has an mm is entirely workload dependent, we shouldn't be
giving hints here.

--
error compiling committee.c: too many arguments to function

Avi Kivity

unread,

Nov 29, 2009, 10:30:02 AM11/29/09

to

On 11/29/2009 05:20 PM, Peter Zijlstra wrote:

> On Sun, 2009-11-29 at 17:12 +0200, Avi Kivity wrote:
>
>> On 11/29/2009 02:01 PM, Tim Blechmann wrote:
>>
>>> Branch hint profiling on my nehalem machine showed 88%
>>> incorrect branch hints:
>>>
>>> 42017484 326957902 88 context_switch sched.c 3043
>>> 42038493 326953687 88 context_switch sched.c 3050
>>>
>>> @@ -3040,14 +3040,14 @@ context_switch(struct rq *rq, struct task_struct *prev,
>>> */
>>> arch_start_context_switch(prev);
>>>
>>> - if (likely(!mm)) {
>>> + if (unlikely(!mm)) {
>>> next->active_mm = oldmm;
>>> atomic_inc(&oldmm->mm_count);
>>> enter_lazy_tlb(oldmm, next);
>>> } else
>>> switch_mm(oldmm, mm, next);
>>>
>>> - if (likely(!prev->mm)) {
>>> + if (unlikely(!prev->mm)) {
>>> prev->active_mm = NULL;
>>> rq->prev_mm = oldmm;
>>> }
>>>
>>>
>> I don't think either the original or the patch is correct. Whether or
>> not a task has an mm is entirely workload dependent, we shouldn't be
>> giving hints here.
>>

> There are reasons to still use branch hints, for example if the unlikely
> branch is very expensive anyway and it pays to have the likely branch be
> ever so slightly less expensive.
>
> Now I don't think that applies here, but there are cases where such code
> generation issues are the main motivator not the actual usage patterns.
>

These should be documented then to avoid patches removing them:

#define slowpath(x) unlikely(x)

if (slowpath(condition))
expensive_operation();

Peter Zijlstra

unread,

Nov 29, 2009, 10:30:02 AM11/29/09

to

On Sun, 2009-11-29 at 17:12 +0200, Avi Kivity wrote:

> On 11/29/2009 02:01 PM, Tim Blechmann wrote:
> > Branch hint profiling on my nehalem machine showed 88%
> > incorrect branch hints:
> >
> > 42017484 326957902 88 context_switch sched.c 3043
> > 42038493 326953687 88 context_switch sched.c 3050
> >
> > @@ -3040,14 +3040,14 @@ context_switch(struct rq *rq, struct task_struct *prev,
> > */
> > arch_start_context_switch(prev);
> >
> > - if (likely(!mm)) {
> > + if (unlikely(!mm)) {
> > next->active_mm = oldmm;
> > atomic_inc(&oldmm->mm_count);
> > enter_lazy_tlb(oldmm, next);
> > } else
> > switch_mm(oldmm, mm, next);
> >
> > - if (likely(!prev->mm)) {
> > + if (unlikely(!prev->mm)) {
> > prev->active_mm = NULL;
> > rq->prev_mm = oldmm;
> > }
> >
>
> I don't think either the original or the patch is correct. Whether or
> not a task has an mm is entirely workload dependent, we shouldn't be
> giving hints here.

There are reasons to still use branch hints, for example if the unlikely

branch is very expensive anyway and it pays to have the likely branch be
ever so slightly less expensive.

Now I don't think that applies here, but there are cases where such code
generation issues are the main motivator not the actual usage patterns.

--

Tim Blechmann

unread,

Nov 29, 2009, 11:10:02 AM11/29/09

to

would be nice, if you commit a patch, removing this hint

> These should be documented then to avoid patches removing them:
>
> #define slowpath(x) unlikely(x)
>
> if (slowpath(condition))
> expensive_operation();

this would definitely improve the expressive power ...

thnx, tim

--
t...@klingt.org
http://tim.klingt.org

Only very good and very bad programmers use goto in C

signature.asc

Pekka Enberg

unread,

Nov 30, 2009, 4:10:02 AM11/30/09

to

Tim Blechmann kirjoitti:

> On 11/24/2009 12:28 PM, Pekka Enberg wrote:

> when dumping the stack for the incorrectly hinted branches, i get the
> attached stack traces...
>
> hth, tim
>
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3548,8 +3548,10 @@ __cache_alloc_node(struct kmem_cache *cachep,

> gfp_t flags, int nodeid,
> slab_irq_save(save_flags, this_cpu);
>
> this_node = cpu_to_node(this_cpu);

> - if (nodeid == -1)
> + if (nodeid == -1) {
> + dump_stack();
> nodeid = this_node;
> + }

>
> if (unlikely(!cachep->nodelists[nodeid])) {

> /* Node not bootstrapped yet */
>
>
>

OK, so it's the generic alloc_skb() function that keeps hitting
kmem_cache_alloc_node() with "-1". Christoph, are you okay with removing
the unlikely() annotation from __cache_alloc_node()?

Pekka

Christian Borntraeger

unread,

Nov 30, 2009, 4:30:03 AM11/30/09

to

Am Sonntag 29 November 2009 16:25:43 schrieb Avi Kivity:
> These should be documented then to avoid patches removing them:
>
> #define slowpath(x) unlikely(x)
>
> if (slowpath(condition))
> expensive_operation();

Neat. If we also modify the likelyhood tracer to __not__ check the
slowpath annotation by default this really looks like a good idea.

Christian

Christoph Lameter

unread,

Nov 30, 2009, 11:20:01 AM11/30/09

to

On Mon, 30 Nov 2009, Pekka Enberg wrote:

> OK, so it's the generic alloc_skb() function that keeps hitting
> kmem_cache_alloc_node() with "-1". Christoph, are you okay with removing the
> unlikely() annotation from __cache_alloc_node()?

Yes. Lets look for other cases in the allocators too.
kmem_cache_alloc_node used to be mainly used for off node allocations but
the network alloc_skb() case shows that this is changing now.

I hope the users of kmem_cache_alloc_node() realize that using -1 is not
equivalent to kmem_cache_alloc(). kmem_cache_alloc follows numa policies
for memory allocations. kmem_cache_alloc_node() does not.

Pekka Enberg

unread,

Nov 30, 2009, 12:50:02 PM11/30/09

to

On Mon, Nov 30, 2009 at 6:09 PM, Christoph Lameter
<c...@linux-foundation.org> wrote:
> On Mon, 30 Nov 2009, Pekka Enberg wrote:
>
>> OK, so it's the generic alloc_skb() function that keeps hitting
>> kmem_cache_alloc_node() with "-1". Christoph, are you okay with removing the
>> unlikely() annotation from __cache_alloc_node()?
>
> Yes. Lets look for other cases in the allocators too.
> kmem_cache_alloc_node used to be mainly used for off node allocations but
> the network alloc_skb() case shows that this is changing now.

Tim, can you please re-send the patch to me so I can apply it?

Christoph Lameter

unread,

Nov 30, 2009, 1:00:03 PM11/30/09

to

On Mon, 30 Nov 2009, Pekka Enberg wrote:

> Tim, can you please re-send the patch to me so I can apply it?

SLUB has no issue since NUMA decisions are deferred to the page allocator.

Tim Blechmann

unread,

Nov 30, 2009, 1:10:03 PM11/30/09

to

i have regenerated the patch for tip/tip ...

--

28459 7678524 99 __cache_alloc_node slab.c 3551

Discussion on lkml [1] led to the solution to remove this hint.

[1] http://patchwork.kernel.org/patch/63517/

0001-branch-profiling-on-my-nehalem-machine-showed-99-inc.patch

signature.asc

Tim Blechmann

unread,

Dec 2, 2009, 6:40:03 AM12/2/09

to

This reverts commit a3a1de0c34de6f5f8332cd6151c46af7813c0fcb.

Commit 8ec6993d9f7d961014af970ded57542961fe9ad9 cleared the es and ds
selectors, so the original branch hints are correct now. Therefore
the branch hint doesn't need to be removed.

Signed-off-by: Tim Blechmann <t...@klingt.org>
---

arch/x86/kernel/process_64.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)

0001-Revert-sched-x86-Optimize-branch-hint-in-__switch_to.patch

tip-bot for Tim Blechmann

unread,

Dec 2, 2009, 8:30:02 AM12/2/09

to

Commit-ID: be8147e68625a1adb111acfd6b98a492be4b74d0
Gitweb: http://git.kernel.org/tip/be8147e68625a1adb111acfd6b98a492be4b74d0
Author: Tim Blechmann <t...@klingt.org>
AuthorDate: Wed, 2 Dec 2009 12:32:10 +0100
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Wed, 2 Dec 2009 12:38:03 +0100

Revert "sched, x86: Optimize branch hint in __switch_to()"

This reverts commit a3a1de0c34de6f5f8332cd6151c46af7813c0fcb.

Commit 8ec6993d9f7d961014af970ded57542961fe9ad9 cleared the es
and ds selectors, so the original branch hints are correct now.
Therefore the branch hint doesn't need to be removed.

Signed-off-by: Tim Blechmann <t...@klingt.org>
LKML-Reference: <4B16503A...@klingt.org>
Signed-off-by: Ingo Molnar <mi...@elte.hu>

---
arch/x86/kernel/process_64.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 93c501d..eb62cbc 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -406,10 +406,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)

* This won't pick up thread selector changes, but I guess that is ok.
*/
savesegment(es, prev->es);

- if (next->es | prev->es)
+ if (unlikely(next->es | prev->es))
loadsegment(es, next->es);
+
savesegment(ds, prev->ds);
- if (next->ds | prev->ds)
+ if (unlikely(next->ds | prev->ds))
loadsegment(ds, next->ds);

Pekka Enberg

unread,

Dec 6, 2009, 3:40:01 AM12/6/09

to

Applied, thanks.

Please use plain text email for sending patches in the future.