Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH] perf tools: Improve thread comm resolution in perf sched

0 views
Skip to first unread message

Frederic Weisbecker

unread,
Oct 8, 2009, 10:50:04 AM10/8/09
to
When we get sched traces that involve a task that was already
created before opening the event, we won't have the comm event
for it.

So if we can't find the comm event for a given thread, we look at the
traces that may contain these informations.

Before:

ata/1:371 | 0.000 ms | 1 | avg: 3988.693 ms | max: 3988.693 ms |
kondemand/1:421 | 0.096 ms | 3 | avg: 345.346 ms | max: 1035.989 ms |
kondemand/0:420 | 0.025 ms | 3 | avg: 421.332 ms | max: 964.014 ms |
:5124:5124 | 0.103 ms | 5 | avg: 74.082 ms | max: 277.194 ms |
:6244:6244 | 0.691 ms | 9 | avg: 125.655 ms | max: 271.306 ms |
firefox:5080 | 0.924 ms | 5 | avg: 53.833 ms | max: 257.828 ms |
npviewer.bin:6225 | 21.871 ms | 53 | avg: 22.462 ms | max: 220.835 ms |
:6245:6245 | 9.631 ms | 21 | avg: 41.864 ms | max: 213.349 ms |

After:

ata/1:371 | 0.000 ms | 1 | avg: 3988.693 ms | max: 3988.693 ms |
kondemand/1:421 | 0.096 ms | 3 | avg: 345.346 ms | max: 1035.989 ms |
kondemand/0:420 | 0.025 ms | 3 | avg: 421.332 ms | max: 964.014 ms |
firefox:5124 | 0.103 ms | 5 | avg: 74.082 ms | max: 277.194 ms |
npviewer.bin:6244 | 0.691 ms | 9 | avg: 125.655 ms | max: 271.306 ms |
firefox:5080 | 0.924 ms | 5 | avg: 53.833 ms | max: 257.828 ms |
npviewer.bin:6225 | 21.871 ms | 53 | avg: 22.462 ms | max: 220.835 ms |
npviewer.bin:6245 | 9.631 ms | 21 | avg: 41.864 ms | max: 213.349 ms |

Signed-off-by: Frederic Weisbecker <fwei...@gmail.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Paul Mackerras <pau...@samba.org>
---
tools/perf/builtin-sched.c | 44 +++++++++++++++++++++++++++++++++++++++-----
tools/perf/util/thread.c | 32 +++++++++++++++++++++++++-------
tools/perf/util/thread.h | 3 +++
3 files changed, 67 insertions(+), 12 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index e1df705..25b91e7 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1034,6 +1034,36 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
atoms->nb_atoms++;
}

+static struct thread *
+threads__findnew_from_ctx(u32 pid, struct trace_switch_event *switch_event)
+{
+ struct thread *th;
+
+ th = threads__findnew_nocomm(pid, &threads, &last_match);
+ if (th->comm)
+ return th;
+
+ if (pid == switch_event->prev_pid)
+ thread__set_comm(th, switch_event->prev_comm);
+ else
+ thread__set_comm(th, switch_event->next_comm);
+ return th;
+}
+
+static struct thread *
+threads__findnew_from_wakeup(struct trace_wakeup_event *wakeup_event)
+{
+ struct thread *th;
+
+ th = threads__findnew_nocomm(wakeup_event->pid, &threads, &last_match);
+ if (th->comm)
+ return th;
+
+ thread__set_comm(th, wakeup_event->comm);
+
+ return th;
+}
+
static void
latency_switch_event(struct trace_switch_event *switch_event,
struct event *event __used,
@@ -1059,8 +1089,10 @@ latency_switch_event(struct trace_switch_event *switch_event,
die("hm, delta: %Ld < 0 ?\n", delta);


- sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
- sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
+ sched_out = threads__findnew_from_ctx(switch_event->prev_pid,
+ switch_event);
+ sched_in = threads__findnew_from_ctx(switch_event->next_pid,
+ switch_event);

out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
if (!out_events) {
@@ -1126,7 +1158,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
if (!wakeup_event->success)
return;

- wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
+ wakee = threads__findnew_from_wakeup(wakeup_event);
atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
if (!atoms) {
thread_atoms_insert(wakee);
@@ -1386,8 +1418,10 @@ map_switch_event(struct trace_switch_event *switch_event,
die("hm, delta: %Ld < 0 ?\n", delta);


- sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
- sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
+ sched_out = threads__findnew_from_ctx(switch_event->prev_pid,
+ switch_event);
+ sched_in = threads__findnew_from_ctx(switch_event->next_pid,
+ switch_event);

curr_thread[this_cpu] = sched_in;

diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 3b56aeb..8bd5ca2 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -6,15 +6,17 @@
#include "util.h"
#include "debug.h"

-static struct thread *thread__new(pid_t pid)
+static struct thread *thread__new(pid_t pid, int set_comm)
{
struct thread *self = calloc(1, sizeof(*self));

if (self != NULL) {
self->pid = pid;
- self->comm = malloc(32);
- if (self->comm)
- snprintf(self->comm, 32, ":%d", self->pid);
+ if (set_comm) {
+ self->comm = malloc(32);
+ if (self->comm)
+ snprintf(self->comm, 32, ":%d", self->pid);
+ }
self->maps = RB_ROOT;
INIT_LIST_HEAD(&self->removed_maps);
}
@@ -50,8 +52,10 @@ static size_t thread__fprintf(struct thread *self, FILE *fp)
return ret;
}

-struct thread *
-threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
+static struct thread *
+__threads__findnew(pid_t pid, struct rb_root *threads,
+ struct thread **last_match,
+ int set_comm)
{
struct rb_node **p = &threads->rb_node;
struct rb_node *parent = NULL;
@@ -80,7 +84,8 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
p = &(*p)->rb_right;
}

- th = thread__new(pid);
+ th = thread__new(pid, set_comm);
+
if (th != NULL) {
rb_link_node(&th->rb_node, parent, p);
rb_insert_color(&th->rb_node, threads);
@@ -91,6 +96,19 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
}

struct thread *
+threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
+{
+ return __threads__findnew(pid, threads, last_match, 1);
+}
+
+struct thread *
+threads__findnew_nocomm(pid_t pid, struct rb_root *threads,
+ struct thread **last_match)
+{
+ return __threads__findnew(pid, threads, last_match, 0);
+}
+
+struct thread *
register_idle_thread(struct rb_root *threads, struct thread **last_match)
{
struct thread *thread = threads__findnew(0, threads, last_match);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 845d9b6..75bc843 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -18,6 +18,9 @@ int thread__set_comm(struct thread *self, const char *comm);
struct thread *
threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match);
struct thread *
+threads__findnew_nocomm(pid_t pid, struct rb_root *threads,
+ struct thread **last_match);
+struct thread *
register_idle_thread(struct rb_root *threads, struct thread **last_match);
void thread__insert_map(struct thread *self, struct map *map);
int thread__fork(struct thread *self, struct thread *parent);
--
1.6.2.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

tip-bot for Frederic Weisbecker

unread,
Oct 8, 2009, 11:20:09 AM10/8/09
to
Commit-ID: 9a92b479b2f088ee2d3194243f4c8e59b1b8c9c2
Gitweb: http://git.kernel.org/tip/9a92b479b2f088ee2d3194243f4c8e59b1b8c9c2
Author: Frederic Weisbecker <fwei...@gmail.com>
AuthorDate: Thu, 8 Oct 2009 16:37:12 +0200
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Thu, 8 Oct 2009 16:56:33 +0200

perf tools: Improve thread comm resolution in perf sched

Before:

After:

LKML-Reference: <1255012632-7882-1-gi...@gmail.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>

Peter Zijlstra

unread,
Oct 8, 2009, 12:40:10 PM10/8/09
to
On Thu, 2009-10-08 at 16:37 +0200, Frederic Weisbecker wrote:
> When we get sched traces that involve a task that was already
> created before opening the event, we won't have the comm event
> for it.

pid_synthesize_comm_event() should have taken care of that..

> So if we can't find the comm event for a given thread, we look at the
> traces that may contain these informations.

Sure, but it would be good to find out why the synthesize bits didn't
work as expected.

Frederic Weisbecker

unread,
Oct 8, 2009, 1:30:07 PM10/8/09
to
On Thu, Oct 08, 2009 at 06:33:02PM +0200, Peter Zijlstra wrote:
> On Thu, 2009-10-08 at 16:37 +0200, Frederic Weisbecker wrote:
> > When we get sched traces that involve a task that was already
> > created before opening the event, we won't have the comm event
> > for it.
>
> pid_synthesize_comm_event() should have taken care of that..
>
> > So if we can't find the comm event for a given thread, we look at the
> > traces that may contain these informations.
>
> Sure, but it would be good to find out why the synthesize bits didn't
> work as expected.


Oh you're right, I didn't notice it.

And it's weird, I've just done some tests, and I always
have the same pids that are found inside the events but
but not in /proc:

7989 -> npviewer.bin
5467 -> xchat
5124 -> firefox
7929 -> npviewer.bin

That's weird. I'm going to look further on the proc filesystem.

Frederic Weisbecker

unread,
Oct 8, 2009, 2:40:10 PM10/8/09
to
On Thu, Oct 08, 2009 at 07:18:16PM +0200, Frederic Weisbecker wrote:
> On Thu, Oct 08, 2009 at 06:33:02PM +0200, Peter Zijlstra wrote:
> > On Thu, 2009-10-08 at 16:37 +0200, Frederic Weisbecker wrote:
> > > When we get sched traces that involve a task that was already
> > > created before opening the event, we won't have the comm event
> > > for it.
> >
> > pid_synthesize_comm_event() should have taken care of that..
> >
> > > So if we can't find the comm event for a given thread, we look at the
> > > traces that may contain these informations.
> >
> > Sure, but it would be good to find out why the synthesize bits didn't
> > work as expected.
>
>
> Oh you're right, I didn't notice it.
>
> And it's weird, I've just done some tests, and I always
> have the same pids that are found inside the events but
> but not in /proc:
>
> 7989 -> npviewer.bin
> 5467 -> xchat
> 5124 -> firefox
> 7929 -> npviewer.bin
>
> That's weird. I'm going to look further on the proc filesystem.
>


Geeze...this is just because we are resolving the pid, not the tid..

The patch is actually a one liner :-(

Frederic Weisbecker

unread,
Oct 8, 2009, 3:00:14 PM10/8/09
to
We are failing to resolve thread names in perf sched because the table
of threads we are building, on top of comm events, has a per process
granularity. But perf sched, unlike the other perf tools, needs a per
thread granularity as we are profiling every tasks individually.

So fix it by building our threads table using the tid instead of the pid
as the thread identifier.

Signed-off-by: Frederic Weisbecker <fwei...@gmail.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Paul Mackerras <pau...@samba.org>

---
tools/perf/builtin-sched.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index e1df705..6b00529 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -638,7 +638,7 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
{
struct thread *thread;

- thread = threads__findnew(event->comm.pid, &threads, &last_match);
+ thread = threads__findnew(event->comm.tid, &threads, &last_match);

dump_printf("%p [%p]: perf_event_comm: %s:%d\n",
(void *)(offset + head),
--
1.6.2.3

Frederic Weisbecker

unread,
Oct 8, 2009, 3:20:09 PM10/8/09
to
This reverts commit 9a92b479b2f088ee2d3194243f4c8e59b1b8c9c2.
("perf tools: Improve thread comm resolution in perf sched")

The fix was wrong.

We are failing to resolve thread names in perf sched because the table
of threads we are building, on top of comm events, has a per process
granularity. But perf sched, unlike the other perf tools, needs a per
thread granularity as we are profiling every tasks individually.

So fix it by building our threads table using the tid instead of the pid
as the thread identifier.

v2: Revert the previous fix that was wrong

Signed-off-by: Frederic Weisbecker <fwei...@gmail.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Paul Mackerras <pau...@samba.org>
---

tools/perf/builtin-sched.c | 46 +++++--------------------------------------
tools/perf/util/thread.c | 32 ++++++-----------------------
tools/perf/util/thread.h | 3 --
3 files changed, 13 insertions(+), 68 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 25b91e7..6b00529 100644


--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -638,7 +638,7 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
{
struct thread *thread;

- thread = threads__findnew(event->comm.pid, &threads, &last_match);
+ thread = threads__findnew(event->comm.tid, &threads, &last_match);

dump_printf("%p [%p]: perf_event_comm: %s:%d\n",
(void *)(offset + head),

@@ -1034,36 +1034,6 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
atoms->nb_atoms++;
}

-static struct thread *
-threads__findnew_from_ctx(u32 pid, struct trace_switch_event *switch_event)
-{
- struct thread *th;
-
- th = threads__findnew_nocomm(pid, &threads, &last_match);
- if (th->comm)
- return th;
-
- if (pid == switch_event->prev_pid)
- thread__set_comm(th, switch_event->prev_comm);
- else
- thread__set_comm(th, switch_event->next_comm);
- return th;
-}
-
-static struct thread *
-threads__findnew_from_wakeup(struct trace_wakeup_event *wakeup_event)
-{
- struct thread *th;
-
- th = threads__findnew_nocomm(wakeup_event->pid, &threads, &last_match);
- if (th->comm)
- return th;
-
- thread__set_comm(th, wakeup_event->comm);
-
- return th;
-}
-


static void
latency_switch_event(struct trace_switch_event *switch_event,
struct event *event __used,

@@ -1089,10 +1059,8 @@ latency_switch_event(struct trace_switch_event *switch_event,


die("hm, delta: %Ld < 0 ?\n", delta);


- sched_out = threads__findnew_from_ctx(switch_event->prev_pid,
- switch_event);
- sched_in = threads__findnew_from_ctx(switch_event->next_pid,
- switch_event);
+ sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
+ sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);



out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
if (!out_events) {

@@ -1158,7 +1126,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
if (!wakeup_event->success)
return;

- wakee = threads__findnew_from_wakeup(wakeup_event);
+ wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);


atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
if (!atoms) {
thread_atoms_insert(wakee);

@@ -1418,10 +1386,8 @@ map_switch_event(struct trace_switch_event *switch_event,


die("hm, delta: %Ld < 0 ?\n", delta);


- sched_out = threads__findnew_from_ctx(switch_event->prev_pid,
- switch_event);
- sched_in = threads__findnew_from_ctx(switch_event->next_pid,
- switch_event);
+ sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
+ sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);



curr_thread[this_cpu] = sched_in;

diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c

index 8bd5ca2..3b56aeb 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -6,17 +6,15 @@
#include "util.h"
#include "debug.h"

-static struct thread *thread__new(pid_t pid, int set_comm)
+static struct thread *thread__new(pid_t pid)


{
struct thread *self = calloc(1, sizeof(*self));

if (self != NULL) {
self->pid = pid;

- if (set_comm) {


- self->comm = malloc(32);
- if (self->comm)
- snprintf(self->comm, 32, ":%d", self->pid);

- }


+ self->comm = malloc(32);
+ if (self->comm)
+ snprintf(self->comm, 32, ":%d", self->pid);

self->maps = RB_ROOT;
INIT_LIST_HEAD(&self->removed_maps);
}

@@ -52,10 +50,8 @@ static size_t thread__fprintf(struct thread *self, FILE *fp)
return ret;
}

-static struct thread *
-__threads__findnew(pid_t pid, struct rb_root *threads,
- struct thread **last_match,
- int set_comm)
+struct thread *


+threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)

{
struct rb_node **p = &threads->rb_node;
struct rb_node *parent = NULL;

@@ -84,8 +80,7 @@ __threads__findnew(pid_t pid, struct rb_root *threads,
p = &(*p)->rb_right;
}

- th = thread__new(pid, set_comm);
-
+ th = thread__new(pid);


if (th != NULL) {
rb_link_node(&th->rb_node, parent, p);
rb_insert_color(&th->rb_node, threads);

@@ -96,19 +91,6 @@ __threads__findnew(pid_t pid, struct rb_root *threads,


}

struct thread *
-threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)

-{
- return __threads__findnew(pid, threads, last_match, 1);
-}
-
-struct thread *
-threads__findnew_nocomm(pid_t pid, struct rb_root *threads,
- struct thread **last_match)
-{
- return __threads__findnew(pid, threads, last_match, 0);
-}
-
-struct thread *


register_idle_thread(struct rb_root *threads, struct thread **last_match)
{
struct thread *thread = threads__findnew(0, threads, last_match);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h

index 75bc843..845d9b6 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -18,9 +18,6 @@ int thread__set_comm(struct thread *self, const char *comm);


struct thread *
threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match);
struct thread *

-threads__findnew_nocomm(pid_t pid, struct rb_root *threads,
- struct thread **last_match);
-struct thread *


register_idle_thread(struct rb_root *threads, struct thread **last_match);
void thread__insert_map(struct thread *self, struct map *map);
int thread__fork(struct thread *self, struct thread *parent);

tip-bot for Frederic Weisbecker

unread,
Oct 8, 2009, 3:20:11 PM10/8/09
to
Commit-ID: 97ea1a7fa62af0d8d49a0fc12796b0073537c9d8
Gitweb: http://git.kernel.org/tip/97ea1a7fa62af0d8d49a0fc12796b0073537c9d8
Author: Frederic Weisbecker <fwei...@gmail.com>
AuthorDate: Thu, 8 Oct 2009 21:04:17 +0200
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Thu, 8 Oct 2009 21:10:21 +0200

perf tools: Fix thread comm resolution in perf sched

This reverts commit 9a92b479b2f088ee2d3194243f4c8e59b1b8c9c2 ("perf
tools: Improve thread comm resolution in perf sched") and fixes the
real bug.

The bug was elsewhere:

We are failing to resolve thread names in perf sched because the
table of threads we are building, on top of comm events, has a per
process granularity. But perf sched, unlike the other perf tools,
needs a per thread granularity as we are profiling every tasks
individually.

So fix it by building our threads table using the tid instead of
the pid as the thread identifier.

v2: Revert the previous fix - it is not really needed

Signed-off-by: Frederic Weisbecker <fwei...@gmail.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Paul Mackerras <pau...@samba.org>

LKML-Reference: <1255028657-11158-1-gi...@gmail.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>

0 new messages