So if we can't find the comm event for a given thread, we look at the
traces that may contain these informations.
Before:
ata/1:371 | 0.000 ms | 1 | avg: 3988.693 ms | max: 3988.693 ms |
kondemand/1:421 | 0.096 ms | 3 | avg: 345.346 ms | max: 1035.989 ms |
kondemand/0:420 | 0.025 ms | 3 | avg: 421.332 ms | max: 964.014 ms |
:5124:5124 | 0.103 ms | 5 | avg: 74.082 ms | max: 277.194 ms |
:6244:6244 | 0.691 ms | 9 | avg: 125.655 ms | max: 271.306 ms |
firefox:5080 | 0.924 ms | 5 | avg: 53.833 ms | max: 257.828 ms |
npviewer.bin:6225 | 21.871 ms | 53 | avg: 22.462 ms | max: 220.835 ms |
:6245:6245 | 9.631 ms | 21 | avg: 41.864 ms | max: 213.349 ms |
After:
ata/1:371 | 0.000 ms | 1 | avg: 3988.693 ms | max: 3988.693 ms |
kondemand/1:421 | 0.096 ms | 3 | avg: 345.346 ms | max: 1035.989 ms |
kondemand/0:420 | 0.025 ms | 3 | avg: 421.332 ms | max: 964.014 ms |
firefox:5124 | 0.103 ms | 5 | avg: 74.082 ms | max: 277.194 ms |
npviewer.bin:6244 | 0.691 ms | 9 | avg: 125.655 ms | max: 271.306 ms |
firefox:5080 | 0.924 ms | 5 | avg: 53.833 ms | max: 257.828 ms |
npviewer.bin:6225 | 21.871 ms | 53 | avg: 22.462 ms | max: 220.835 ms |
npviewer.bin:6245 | 9.631 ms | 21 | avg: 41.864 ms | max: 213.349 ms |
Signed-off-by: Frederic Weisbecker <fwei...@gmail.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Paul Mackerras <pau...@samba.org>
---
tools/perf/builtin-sched.c | 44 +++++++++++++++++++++++++++++++++++++++-----
tools/perf/util/thread.c | 32 +++++++++++++++++++++++++-------
tools/perf/util/thread.h | 3 +++
3 files changed, 67 insertions(+), 12 deletions(-)
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index e1df705..25b91e7 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1034,6 +1034,36 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
atoms->nb_atoms++;
}
+static struct thread *
+threads__findnew_from_ctx(u32 pid, struct trace_switch_event *switch_event)
+{
+ struct thread *th;
+
+ th = threads__findnew_nocomm(pid, &threads, &last_match);
+ if (th->comm)
+ return th;
+
+ if (pid == switch_event->prev_pid)
+ thread__set_comm(th, switch_event->prev_comm);
+ else
+ thread__set_comm(th, switch_event->next_comm);
+ return th;
+}
+
+static struct thread *
+threads__findnew_from_wakeup(struct trace_wakeup_event *wakeup_event)
+{
+ struct thread *th;
+
+ th = threads__findnew_nocomm(wakeup_event->pid, &threads, &last_match);
+ if (th->comm)
+ return th;
+
+ thread__set_comm(th, wakeup_event->comm);
+
+ return th;
+}
+
static void
latency_switch_event(struct trace_switch_event *switch_event,
struct event *event __used,
@@ -1059,8 +1089,10 @@ latency_switch_event(struct trace_switch_event *switch_event,
die("hm, delta: %Ld < 0 ?\n", delta);
- sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
- sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
+ sched_out = threads__findnew_from_ctx(switch_event->prev_pid,
+ switch_event);
+ sched_in = threads__findnew_from_ctx(switch_event->next_pid,
+ switch_event);
out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
if (!out_events) {
@@ -1126,7 +1158,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
if (!wakeup_event->success)
return;
- wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
+ wakee = threads__findnew_from_wakeup(wakeup_event);
atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
if (!atoms) {
thread_atoms_insert(wakee);
@@ -1386,8 +1418,10 @@ map_switch_event(struct trace_switch_event *switch_event,
die("hm, delta: %Ld < 0 ?\n", delta);
- sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
- sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
+ sched_out = threads__findnew_from_ctx(switch_event->prev_pid,
+ switch_event);
+ sched_in = threads__findnew_from_ctx(switch_event->next_pid,
+ switch_event);
curr_thread[this_cpu] = sched_in;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 3b56aeb..8bd5ca2 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -6,15 +6,17 @@
#include "util.h"
#include "debug.h"
-static struct thread *thread__new(pid_t pid)
+static struct thread *thread__new(pid_t pid, int set_comm)
{
struct thread *self = calloc(1, sizeof(*self));
if (self != NULL) {
self->pid = pid;
- self->comm = malloc(32);
- if (self->comm)
- snprintf(self->comm, 32, ":%d", self->pid);
+ if (set_comm) {
+ self->comm = malloc(32);
+ if (self->comm)
+ snprintf(self->comm, 32, ":%d", self->pid);
+ }
self->maps = RB_ROOT;
INIT_LIST_HEAD(&self->removed_maps);
}
@@ -50,8 +52,10 @@ static size_t thread__fprintf(struct thread *self, FILE *fp)
return ret;
}
-struct thread *
-threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
+static struct thread *
+__threads__findnew(pid_t pid, struct rb_root *threads,
+ struct thread **last_match,
+ int set_comm)
{
struct rb_node **p = &threads->rb_node;
struct rb_node *parent = NULL;
@@ -80,7 +84,8 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
p = &(*p)->rb_right;
}
- th = thread__new(pid);
+ th = thread__new(pid, set_comm);
+
if (th != NULL) {
rb_link_node(&th->rb_node, parent, p);
rb_insert_color(&th->rb_node, threads);
@@ -91,6 +96,19 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
}
struct thread *
+threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
+{
+ return __threads__findnew(pid, threads, last_match, 1);
+}
+
+struct thread *
+threads__findnew_nocomm(pid_t pid, struct rb_root *threads,
+ struct thread **last_match)
+{
+ return __threads__findnew(pid, threads, last_match, 0);
+}
+
+struct thread *
register_idle_thread(struct rb_root *threads, struct thread **last_match)
{
struct thread *thread = threads__findnew(0, threads, last_match);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 845d9b6..75bc843 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -18,6 +18,9 @@ int thread__set_comm(struct thread *self, const char *comm);
struct thread *
threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match);
struct thread *
+threads__findnew_nocomm(pid_t pid, struct rb_root *threads,
+ struct thread **last_match);
+struct thread *
register_idle_thread(struct rb_root *threads, struct thread **last_match);
void thread__insert_map(struct thread *self, struct map *map);
int thread__fork(struct thread *self, struct thread *parent);
--
1.6.2.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
perf tools: Improve thread comm resolution in perf sched
Before:
After:
LKML-Reference: <1255012632-7882-1-gi...@gmail.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
pid_synthesize_comm_event() should have taken care of that..
> So if we can't find the comm event for a given thread, we look at the
> traces that may contain these informations.
Sure, but it would be good to find out why the synthesize bits didn't
work as expected.
Oh you're right, I didn't notice it.
And it's weird, I've just done some tests, and I always
have the same pids that are found inside the events but
but not in /proc:
7989 -> npviewer.bin
5467 -> xchat
5124 -> firefox
7929 -> npviewer.bin
That's weird. I'm going to look further on the proc filesystem.
Geeze...this is just because we are resolving the pid, not the tid..
The patch is actually a one liner :-(
So fix it by building our threads table using the tid instead of the pid
as the thread identifier.
Signed-off-by: Frederic Weisbecker <fwei...@gmail.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Paul Mackerras <pau...@samba.org>
---
tools/perf/builtin-sched.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index e1df705..6b00529 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -638,7 +638,7 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
{
struct thread *thread;
- thread = threads__findnew(event->comm.pid, &threads, &last_match);
+ thread = threads__findnew(event->comm.tid, &threads, &last_match);
dump_printf("%p [%p]: perf_event_comm: %s:%d\n",
(void *)(offset + head),
--
1.6.2.3
The fix was wrong.
We are failing to resolve thread names in perf sched because the table
of threads we are building, on top of comm events, has a per process
granularity. But perf sched, unlike the other perf tools, needs a per
thread granularity as we are profiling every tasks individually.
So fix it by building our threads table using the tid instead of the pid
as the thread identifier.
v2: Revert the previous fix that was wrong
Signed-off-by: Frederic Weisbecker <fwei...@gmail.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Paul Mackerras <pau...@samba.org>
---
tools/perf/builtin-sched.c | 46 +++++--------------------------------------
tools/perf/util/thread.c | 32 ++++++-----------------------
tools/perf/util/thread.h | 3 --
3 files changed, 13 insertions(+), 68 deletions(-)
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 25b91e7..6b00529 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -638,7 +638,7 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
{
struct thread *thread;
- thread = threads__findnew(event->comm.pid, &threads, &last_match);
+ thread = threads__findnew(event->comm.tid, &threads, &last_match);
dump_printf("%p [%p]: perf_event_comm: %s:%d\n",
(void *)(offset + head),
@@ -1034,36 +1034,6 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
atoms->nb_atoms++;
}
-static struct thread *
-threads__findnew_from_ctx(u32 pid, struct trace_switch_event *switch_event)
-{
- struct thread *th;
-
- th = threads__findnew_nocomm(pid, &threads, &last_match);
- if (th->comm)
- return th;
-
- if (pid == switch_event->prev_pid)
- thread__set_comm(th, switch_event->prev_comm);
- else
- thread__set_comm(th, switch_event->next_comm);
- return th;
-}
-
-static struct thread *
-threads__findnew_from_wakeup(struct trace_wakeup_event *wakeup_event)
-{
- struct thread *th;
-
- th = threads__findnew_nocomm(wakeup_event->pid, &threads, &last_match);
- if (th->comm)
- return th;
-
- thread__set_comm(th, wakeup_event->comm);
-
- return th;
-}
-
static void
latency_switch_event(struct trace_switch_event *switch_event,
struct event *event __used,
@@ -1089,10 +1059,8 @@ latency_switch_event(struct trace_switch_event *switch_event,
die("hm, delta: %Ld < 0 ?\n", delta);
- sched_out = threads__findnew_from_ctx(switch_event->prev_pid,
- switch_event);
- sched_in = threads__findnew_from_ctx(switch_event->next_pid,
- switch_event);
+ sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
+ sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
if (!out_events) {
@@ -1158,7 +1126,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
if (!wakeup_event->success)
return;
- wakee = threads__findnew_from_wakeup(wakeup_event);
+ wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
if (!atoms) {
thread_atoms_insert(wakee);
@@ -1418,10 +1386,8 @@ map_switch_event(struct trace_switch_event *switch_event,
die("hm, delta: %Ld < 0 ?\n", delta);
- sched_out = threads__findnew_from_ctx(switch_event->prev_pid,
- switch_event);
- sched_in = threads__findnew_from_ctx(switch_event->next_pid,
- switch_event);
+ sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
+ sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
curr_thread[this_cpu] = sched_in;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 8bd5ca2..3b56aeb 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -6,17 +6,15 @@
#include "util.h"
#include "debug.h"
-static struct thread *thread__new(pid_t pid, int set_comm)
+static struct thread *thread__new(pid_t pid)
{
struct thread *self = calloc(1, sizeof(*self));
if (self != NULL) {
self->pid = pid;
- if (set_comm) {
- self->comm = malloc(32);
- if (self->comm)
- snprintf(self->comm, 32, ":%d", self->pid);
- }
+ self->comm = malloc(32);
+ if (self->comm)
+ snprintf(self->comm, 32, ":%d", self->pid);
self->maps = RB_ROOT;
INIT_LIST_HEAD(&self->removed_maps);
}
@@ -52,10 +50,8 @@ static size_t thread__fprintf(struct thread *self, FILE *fp)
return ret;
}
-static struct thread *
-__threads__findnew(pid_t pid, struct rb_root *threads,
- struct thread **last_match,
- int set_comm)
+struct thread *
+threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
{
struct rb_node **p = &threads->rb_node;
struct rb_node *parent = NULL;
@@ -84,8 +80,7 @@ __threads__findnew(pid_t pid, struct rb_root *threads,
p = &(*p)->rb_right;
}
- th = thread__new(pid, set_comm);
-
+ th = thread__new(pid);
if (th != NULL) {
rb_link_node(&th->rb_node, parent, p);
rb_insert_color(&th->rb_node, threads);
@@ -96,19 +91,6 @@ __threads__findnew(pid_t pid, struct rb_root *threads,
}
struct thread *
-threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
-{
- return __threads__findnew(pid, threads, last_match, 1);
-}
-
-struct thread *
-threads__findnew_nocomm(pid_t pid, struct rb_root *threads,
- struct thread **last_match)
-{
- return __threads__findnew(pid, threads, last_match, 0);
-}
-
-struct thread *
register_idle_thread(struct rb_root *threads, struct thread **last_match)
{
struct thread *thread = threads__findnew(0, threads, last_match);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 75bc843..845d9b6 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -18,9 +18,6 @@ int thread__set_comm(struct thread *self, const char *comm);
struct thread *
threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match);
struct thread *
-threads__findnew_nocomm(pid_t pid, struct rb_root *threads,
- struct thread **last_match);
-struct thread *
register_idle_thread(struct rb_root *threads, struct thread **last_match);
void thread__insert_map(struct thread *self, struct map *map);
int thread__fork(struct thread *self, struct thread *parent);
perf tools: Fix thread comm resolution in perf sched
This reverts commit 9a92b479b2f088ee2d3194243f4c8e59b1b8c9c2 ("perf
tools: Improve thread comm resolution in perf sched") and fixes the
real bug.
The bug was elsewhere:
We are failing to resolve thread names in perf sched because the
table of threads we are building, on top of comm events, has a per
process granularity. But perf sched, unlike the other perf tools,
needs a per thread granularity as we are profiling every tasks
individually.
So fix it by building our threads table using the tid instead of
the pid as the thread identifier.
v2: Revert the previous fix - it is not really needed
Signed-off-by: Frederic Weisbecker <fwei...@gmail.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Paul Mackerras <pau...@samba.org>
LKML-Reference: <1255028657-11158-1-gi...@gmail.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>