10420275 170645395 94 context_switch sched.c
3043
10408421 171098521 94 context_switch sched.c
3050
Signed-off-by: Tim Blechmann <t...@klingt.org>
---
kernel/sched.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/sched.c b/kernel/sched.c
index c78dcfe..2a78b38 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3040,14 +3040,14 @@ context_switch(struct rq *rq, struct task_struct
*prev,
*/
arch_start_context_switch(prev);
- if (unlikely(!mm)) {
+ if (likely(!mm)) {
next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next);
} else
switch_mm(oldmm, mm, next);
- if (unlikely(!prev->mm)) {
+ if (likely(!prev->mm)) {
prev->active_mm = NULL;
rq->prev_mm = oldmm;
}
--
1.6.4.2
6548732 174664120 96 __switch_to process_64.c
406
6548745 174565593 96 __switch_to process_64.c
410
Signed-off-by: Tim Blechmann <t...@klingt.org>
---
arch/x86/kernel/process_64.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c8d0ece..1e2e1fe 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -403,11 +403,11 @@ __switch_to(struct task_struct *prev_p, struct
task_struct *next_p)
* This won't pick up thread selector changes, but I guess that is ok.
*/
savesegment(es, prev->es);
- if (unlikely(next->es | prev->es))
+ if (next->es | prev->es)
loadsegment(es, next->es);
savesegment(ds, prev->ds);
- if (unlikely(next->ds | prev->ds))
+ if (next->ds | prev->ds)
loadsegment(ds, next->ds);
-- 1.6.4.2
131609 3840434 96 queue_delayed_work_on workqueue.c 258
Signed-off-by: Tim Blechmann <t...@klingt.org>
---
kernel/workqueue.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0a98bef..6985ab0 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -255,7 +255,7 @@ int queue_delayed_work_on(int cpu, struct
workqueue_struct *wq,
timer->data = (unsigned long)dwork;
timer->function = delayed_work_timer_fn;
- if (unlikely(cpu >= 0))
+ if (cpu >= 0)
add_timer_on(timer, cpu);
else
add_timer(timer);
--
1.6.4.2
28459 7678524 99 __cache_alloc_node slab.c
3551
Signed-off-by: Tim Blechmann <t...@klingt.org>
---
mm/slab.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/mm/slab.c b/mm/slab.c
index f70b326..4125fcd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3548,7 +3548,7 @@ __cache_alloc_node(struct kmem_cache *cachep,
gfp_t flags, int nodeid,
slab_irq_save(save_flags, this_cpu);
this_node = cpu_to_node(this_cpu);
- if (unlikely(nodeid == -1))
+ if (nodeid == -1)
nodeid = this_node;
if (unlikely(!cachep->nodelists[nodeid])) {
--
1.6.4.2
15728471 158903754 90 pick_next_task_fair sched_fair.c
1555
Signed-off-by: Tim Blechmann <t...@klingt.org>
---
kernel/sched_fair.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index e5945df..5a0965c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1552,7 +1552,7 @@ static struct task_struct
*pick_next_task_fair(struct rq *rq)
struct cfs_rq *cfs_rq = &rq->cfs;
struct sched_entity *se;
- if (unlikely(!cfs_rq->nr_running))
+ if (!cfs_rq->nr_running)
return NULL;
do {
--
1.6.4.2
That sounds odd to me. Can you see where the incorrectly predicted
calls are coming from? Calling kmem_cache_alloc_node() with node set
to -1 most of the time could be a real bug somewhere.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
> On Tue, Nov 24, 2009 at 1:20 PM, Ingo Molnar <mi...@elte.hu> wrote:
> > (Pekka Cc:-ed)
> >
> > * Tim Blechmann <t...@klingt.org> wrote:
> >
> >> branch profiling on my nehalem machine showed 99% incorrect branch hints:
> >>
> >> ? ?28459 ?7678524 ?99 __cache_alloc_node ? ? ? ? ? ? slab.c
> >> ? 3551
> >>
> >> Signed-off-by: Tim Blechmann <t...@klingt.org>
> >> ---
> >> ?mm/slab.c | ? ?2 +-
> >> ?1 files changed, 1 insertions(+), 1 deletions(-)
> >>
> >> diff --git a/mm/slab.c b/mm/slab.c
> >> index f70b326..4125fcd 100644
> >> --- a/mm/slab.c
> >> +++ b/mm/slab.c
> >> @@ -3548,7 +3548,7 @@ __cache_alloc_node(struct kmem_cache *cachep,
> >> gfp_t flags, int nodeid,
> >> ? ? ? slab_irq_save(save_flags, this_cpu);
> >> ? ? ? this_node = cpu_to_node(this_cpu);
> >> - ? ? if (unlikely(nodeid == -1))
> >> + ? ? if (nodeid == -1)
> >> ? ? ? ? ? ? ? nodeid = this_node;
> >> ? ? ? if (unlikely(!cachep->nodelists[nodeid])) {
>
> That sounds odd to me. Can you see where the incorrectly predicted
> calls are coming from? Calling kmem_cache_alloc_node() with node set
> to -1 most of the time could be a real bug somewhere.
I think it could occur in too limited tests - the branch prediction
looks 'wrong' in certain tests - while it's OK in general.
Is there some easy to run workload you consider more or less
representative of typical SLAB patterns?
<plug> You might want to pull even with the scheduler subsystem and in
addition to 'perf bench sched', add a 'perf bench slab' set of
interesting testcases for SLAB performance testing. :-)
</plug>
Ingo
i don't know, if there is any facility in the ftrace branch profiler to
get call graph information, but i can try to manually dump backtraces in
this condition path ...
could be a specific situation on my machine, though ...
tim
- --
t...@klingt.org
http://tim.klingt.org
You don't have to call it music if the term shocks you.
John Cage
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (GNU/Linux)
iEYEARECAAYFAksLx0kACgkQdL+4qsZfVsskOQCglFQG3eYAfdgXoOAHAGTqaLcU
8e0AoIQNbzSRxttGFaXTF3PEh5O4aGEB
=3nmT
-----END PGP SIGNATURE-----
sched, x86: Optimize branch hint in __switch_to()
Branch hint profiling on my nehalem machine showed 96%
incorrect branch hints:
6548732 174664120 96 __switch_to process_64.c
406
6548745 174565593 96 __switch_to process_64.c
410
Signed-off-by: Tim Blechmann <t...@klingt.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
LKML-Reference: <4B0BBB93...@klingt.org>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
arch/x86/kernel/process_64.c | 5 ++---
1 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ad535b6..d9db104 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -406,11 +406,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* This won't pick up thread selector changes, but I guess that is ok.
*/
savesegment(es, prev->es);
- if (unlikely(next->es | prev->es))
+ if (next->es | prev->es)
loadsegment(es, next->es);
-
savesegment(ds, prev->ds);
- if (unlikely(next->ds | prev->ds))
+ if (next->ds | prev->ds)
loadsegment(ds, next->ds);
--
sched: Optimize branch hint in pick_next_task_fair()
Branch hint profiling on my nehalem machine showed 90%
incorrect branch hints:
15728471 158903754 90 pick_next_task_fair
sched_fair.c 1555
Signed-off-by: Tim Blechmann <t...@klingt.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
LKML-Reference: <4B0BBBB1...@klingt.org>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
kernel/sched_fair.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f28a267..24086e7 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1704,7 +1704,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
struct cfs_rq *cfs_rq = &rq->cfs;
struct sched_entity *se;
- if (unlikely(!cfs_rq->nr_running))
+ if (!cfs_rq->nr_running)
return NULL;
do {
--
sched: Optimize branch hint in context_switch()
Branch hint profiling on my nehalem machine showed over 90%
incorrect branch hints:
10420275 170645395 94 context_switch sched.c
3043
10408421 171098521 94 context_switch sched.c
3050
Signed-off-by: Tim Blechmann <t...@klingt.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
LKML-Reference: <4B0BBB9F...@klingt.org>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
kernel/sched.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/sched.c b/kernel/sched.c
index 93474a7..010d5e1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2829,14 +2829,14 @@ context_switch(struct rq *rq, struct task_struct *prev,
*/
arch_start_context_switch(prev);
- if (unlikely(!mm)) {
+ if (likely(!mm)) {
next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next);
} else
switch_mm(oldmm, mm, next);
- if (unlikely(!prev->mm)) {
+ if (likely(!prev->mm)) {
prev->active_mm = NULL;
rq->prev_mm = oldmm;
}
--
64-bit tasks should have %ds and %es set to null selectors. The only
time they should be different is for 32-bit tasks. It lookx like some
versions of GCC just aren't using the hint. I've tested it with 4.4.2
and the generated assembly is the same with or without the hint.
--
Brian Gerst
this doesn't seem to be the case on my machine for 96% of the calls ...
i am just running 64bit binaries on this machine, no 32bit programs
(that i know of (ubuntu karmic amd64))
tim
Relying on the government to protect your privacy is like asking a
peeping tom to install your window blinds.
John Perry Barlow
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (GNU/Linux)
iEYEARECAAYFAksM8s0ACgkQdL+4qsZfVsscjwCffcvQz7/VTtIfH1XXgJ2SxJxT
xWYAnjZ9aBD/OWmroVn68oEKqJZ7Pm3U
=C7kB
-----END PGP SIGNATURE-----
I can think of three main classes: VFS, SCSI, or network intensive
applications and benchmarks tend to bring out the worst in SLAB. There
are probably some interesting NUMA related things that I'm not really
aware of.
Some early kernel threads inherited KERNEL_DS from the boot process.
Patch coming soon.
--
Brian Gerst
Signed-off-by: Brian Gerst <brg...@gmail.com>
---
arch/x86/kernel/head_64.S | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 780cd92..22db86a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -212,8 +212,8 @@ ENTRY(secondary_startup_64)
*/
lgdt early_gdt_descr(%rip)
- /* set up data segments. actually 0 would do too */
- movl $__KERNEL_DS,%eax
+ /* set up data segments */
+ xorl %eax,%eax
movl %eax,%ds
movl %eax,%ss
movl %eax,%es
--
1.6.5.2
x86, 64-bit: Set data segments to null after switching to 64-bit mode
This prevents kernel threads from inheriting non-null segment
selectors, and causing optimizations in __switch_to() to be
ineffective.
Signed-off-by: Brian Gerst <brg...@gmail.com>
Cc: Tim Blechmann <t...@klingt.org>
Cc: Linus Torvalds <torv...@linux-foundation.org>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Jeremy Fitzhardinge <jer...@goop.org>
Cc: Jan Beulich <JBeu...@novell.com>
LKML-Reference: <1259165856-3512-1-gi...@gmail.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
arch/x86/kernel/head_64.S | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 2640660..17ba9ec 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -212,8 +212,8 @@ ENTRY(secondary_startup_64)
*/
lgdt early_gdt_descr(%rip)
- /* set up data segments. actually 0 would do too */
- movl $__KERNEL_DS,%eax
+ /* set up data segments */
+ xorl %eax,%eax
movl %eax,%ds
movl %eax,%ss
movl %eax,%es
--
with this patch, the branch hint is actually correct, giving 100%
correct predictions. so please revert my earlier patch (commit
a3a1de0c34de6f5f8332cd6151c46af7813c0fcb)
thnx, tim
--
t...@klingt.org
http://tim.klingt.org
Desperation is the raw material of drastic change. Only those who can
leave behind everything they have ever believed in can hope to escape.
William S. Burroughs
31032542 270070841 89 __switch_to process_64.c 440
Signed-off-by: Tim Blechmann <t...@klingt.org>
---
arch/x86/kernel/process_64.c | 2 +-
42017484 326957902 88 context_switch sched.c 3043
42038493 326953687 88 context_switch sched.c 3050
Signed-off-by: Tim Blechmann <t...@klingt.org>
---
kernel/sched.c | 4 ++--
I don't think either the original or the patch is correct. Whether or
not a task has an mm is entirely workload dependent, we shouldn't be
giving hints here.
--
error compiling committee.c: too many arguments to function
These should be documented then to avoid patches removing them:
#define slowpath(x) unlikely(x)
if (slowpath(condition))
expensive_operation();
There are reasons to still use branch hints, for example if the unlikely
branch is very expensive anyway and it pays to have the likely branch be
ever so slightly less expensive.
Now I don't think that applies here, but there are cases where such code
generation issues are the main motivator not the actual usage patterns.
--
would be nice, if you commit a patch, removing this hint
> These should be documented then to avoid patches removing them:
>
> #define slowpath(x) unlikely(x)
>
> if (slowpath(condition))
> expensive_operation();
this would definitely improve the expressive power ...
thnx, tim
--
t...@klingt.org
http://tim.klingt.org
Only very good and very bad programmers use goto in C
OK, so it's the generic alloc_skb() function that keeps hitting
kmem_cache_alloc_node() with "-1". Christoph, are you okay with removing
the unlikely() annotation from __cache_alloc_node()?
Pekka
Neat. If we also modify the likelyhood tracer to __not__ check the
slowpath annotation by default this really looks like a good idea.
Christian
> OK, so it's the generic alloc_skb() function that keeps hitting
> kmem_cache_alloc_node() with "-1". Christoph, are you okay with removing the
> unlikely() annotation from __cache_alloc_node()?
Yes. Lets look for other cases in the allocators too.
kmem_cache_alloc_node used to be mainly used for off node allocations but
the network alloc_skb() case shows that this is changing now.
I hope the users of kmem_cache_alloc_node() realize that using -1 is not
equivalent to kmem_cache_alloc(). kmem_cache_alloc follows numa policies
for memory allocations. kmem_cache_alloc_node() does not.
Tim, can you please re-send the patch to me so I can apply it?
> Tim, can you please re-send the patch to me so I can apply it?
SLUB has no issue since NUMA decisions are deferred to the page allocator.
--
28459 7678524 99 __cache_alloc_node slab.c 3551
Discussion on lkml [1] led to the solution to remove this hint.
[1] http://patchwork.kernel.org/patch/63517/
Commit 8ec6993d9f7d961014af970ded57542961fe9ad9 cleared the es and ds
selectors, so the original branch hints are correct now. Therefore
the branch hint doesn't need to be removed.
Signed-off-by: Tim Blechmann <t...@klingt.org>
---
arch/x86/kernel/process_64.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)
Revert "sched, x86: Optimize branch hint in __switch_to()"
This reverts commit a3a1de0c34de6f5f8332cd6151c46af7813c0fcb.
Commit 8ec6993d9f7d961014af970ded57542961fe9ad9 cleared the es
and ds selectors, so the original branch hints are correct now.
Therefore the branch hint doesn't need to be removed.
Signed-off-by: Tim Blechmann <t...@klingt.org>
LKML-Reference: <4B16503A...@klingt.org>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
arch/x86/kernel/process_64.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 93c501d..eb62cbc 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -406,10 +406,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* This won't pick up thread selector changes, but I guess that is ok.
*/
savesegment(es, prev->es);
- if (next->es | prev->es)
+ if (unlikely(next->es | prev->es))
loadsegment(es, next->es);
+
savesegment(ds, prev->ds);
- if (next->ds | prev->ds)
+ if (unlikely(next->ds | prev->ds))
loadsegment(ds, next->ds);
Applied, thanks.
Please use plain text email for sending patches in the future.