Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH 05/18] perf hists: Convert hist entry functions to use struct he_stat

3 views
Skip to first unread message

Namhyung Kim

unread,
Dec 18, 2013, 12:30:01 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

hist_entry__add_cpumode_period() and hist_entry__decay() are dealing
with hist_entry's stat fields only. So make them use the struct
directly.

Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/util/hist.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 63234e37583c..1f84314546a2 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -182,21 +182,21 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows)
}
}

-static void hist_entry__add_cpumode_period(struct hist_entry *he,
- unsigned int cpumode, u64 period)
+static void he_stat__add_cpumode_period(struct he_stat *he_stat,
+ unsigned int cpumode, u64 period)
{
switch (cpumode) {
case PERF_RECORD_MISC_KERNEL:
- he->stat.period_sys += period;
+ he_stat->period_sys += period;
break;
case PERF_RECORD_MISC_USER:
- he->stat.period_us += period;
+ he_stat->period_us += period;
break;
case PERF_RECORD_MISC_GUEST_KERNEL:
- he->stat.period_guest_sys += period;
+ he_stat->period_guest_sys += period;
break;
case PERF_RECORD_MISC_GUEST_USER:
- he->stat.period_guest_us += period;
+ he_stat->period_guest_us += period;
break;
default:
break;
@@ -223,10 +223,10 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
dest->weight += src->weight;
}

-static void hist_entry__decay(struct hist_entry *he)
+static void he_stat__decay(struct he_stat *he_stat)
{
- he->stat.period = (he->stat.period * 7) / 8;
- he->stat.nr_events = (he->stat.nr_events * 7) / 8;
+ he_stat->period = (he_stat->period * 7) / 8;
+ he_stat->nr_events = (he_stat->nr_events * 7) / 8;
/* XXX need decay for weight too? */
}

@@ -237,7 +237,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
if (prev_period == 0)
return true;

- hist_entry__decay(he);
+ he_stat__decay(&he->stat);

if (!he->filtered)
hists->stats.total_period -= prev_period - he->stat.period;
@@ -403,7 +403,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
rb_link_node(&he->rb_node_in, parent, p);
rb_insert_color(&he->rb_node_in, hists->entries_in);
out:
- hist_entry__add_cpumode_period(he, al->cpumode, period);
+ he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
return he;
}

--
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Namhyung Kim

unread,
Dec 18, 2013, 12:30:02 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

Call __hists__add_entry() for each callchain node to get an
accumulated stat for an entry. Introduce new cumulative_iter ops to
process them properly.

Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/builtin-report.c | 136 +++++++++++++++++++++++++++++++++++++++++++-
tools/perf/ui/stdio/hist.c | 2 +-
2 files changed, 136 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 4e4572b47e04..3bc48e410d06 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -398,6 +398,130 @@ iter_finish_normal_entry(struct add_entry_iter *iter, struct addr_location *al)
return err;
}

+static int
+iter_prepare_cumulative_entry(struct add_entry_iter *iter,
+ struct machine *machine __maybe_unused,
+ struct perf_evsel *evsel,
+ struct addr_location *al __maybe_unused,
+ struct perf_sample *sample)
+{
+ callchain_cursor_commit(&callchain_cursor);
+
+ iter->evsel = evsel;
+ iter->sample = sample;
+ return 0;
+}
+
+static int
+iter_add_single_cumulative_entry(struct add_entry_iter *iter,
+ struct addr_location *al)
+{
+ struct perf_evsel *evsel = iter->evsel;
+ struct perf_sample *sample = iter->sample;
+ struct hist_entry *he;
+ int err = 0;
+
+ he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
+ sample->period, sample->weight,
+ sample->transaction, true);
+ if (he == NULL)
+ return -ENOMEM;
+
+ /*
+ * This is for putting parents upward during output resort iff
+ * only a child gets sampled. See hist_entry__sort_on_period().
+ */
+ he->callchain->max_depth = PERF_MAX_STACK_DEPTH + 1;
+
+ /*
+ * Only in the TUI browser we are doing integrated annotation,
+ * so we don't allocated the extra space needed because the stdio
+ * code will not use it.
+ */
+ if (sort__has_sym && he->ms.sym && use_browser == 1) {
+ struct annotation *notes = symbol__annotation(he->ms.sym);
+
+ assert(evsel != NULL);
+
+ if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0)
+ return -ENOMEM;
+
+ err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+ }
+
+ return err;
+}
+
+static int
+iter_next_cumulative_entry(struct add_entry_iter *iter,
+ struct addr_location *al)
+{
+ struct callchain_cursor_node *node;
+
+ node = callchain_cursor_current(&callchain_cursor);
+ if (node == NULL)
+ return 0;
+
+ al->map = node->map;
+ al->sym = node->sym;
+ if (node->map)
+ al->addr = node->map->map_ip(node->map, node->ip);
+ else
+ al->addr = node->ip;
+
+ if (iter->rep->hide_unresolved && al->sym == NULL)
+ return 0;
+
+ callchain_cursor_advance(&callchain_cursor);
+ return 1;
+}
+
+static int
+iter_add_next_cumulative_entry(struct add_entry_iter *iter,
+ struct addr_location *al)
+{
+ struct perf_evsel *evsel = iter->evsel;
+ struct perf_sample *sample = iter->sample;
+ struct hist_entry *he;
+ int err = 0;
+
+ he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
+ sample->period, sample->weight,
+ sample->transaction, false);
+ if (he == NULL)
+ return -ENOMEM;
+
+ /*
+ * Only in the TUI browser we are doing integrated annotation,
+ * so we don't allocated the extra space needed because the stdio
+ * code will not use it.
+ */
+ if (sort__has_sym && he->ms.sym && use_browser == 1) {
+ struct annotation *notes = symbol__annotation(he->ms.sym);
+
+ assert(evsel != NULL);
+
+ if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0)
+ return -ENOMEM;
+
+ err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+ }
+ return err;
+}
+
+static int
+iter_finish_cumulative_entry(struct add_entry_iter *iter,
+ struct addr_location *al __maybe_unused)
+{
+ struct perf_evsel *evsel = iter->evsel;
+ struct perf_sample *sample = iter->sample;
+
+ evsel->hists.stats.total_period += sample->period;
+ hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+
+ return 0;
+}
+
static struct add_entry_iter mem_iter = {
.prepare_entry = iter_prepare_mem_entry,
.add_single_entry = iter_add_single_mem_entry,
@@ -422,6 +546,14 @@ static struct add_entry_iter normal_iter = {
.finish_entry = iter_finish_normal_entry,
};

+static struct add_entry_iter cumulative_iter = {
+ .prepare_entry = iter_prepare_cumulative_entry,
+ .add_single_entry = iter_add_single_cumulative_entry,
+ .next_entry = iter_next_cumulative_entry,
+ .add_next_entry = iter_add_next_cumulative_entry,
+ .finish_entry = iter_finish_cumulative_entry,
+};
+
static int
perf_evsel__add_entry(struct perf_evsel *evsel, struct addr_location *al,
struct perf_sample *sample, struct machine *machine,
@@ -487,7 +619,9 @@ static int process_sample_event(struct perf_tool *tool,
else if (rep->mem_mode == 1) {
iter = &mem_iter;
iter->priv = event;
- } else
+ } else if (symbol_conf.cumulate_callchain)
+ iter = &cumulative_iter;
+ else
iter = &normal_iter;

if (al.map != NULL)
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index c244cb524ef2..4c4986e809d8 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -364,7 +364,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,

ret = fprintf(fp, "%s\n", bf);

- if (symbol_conf.use_callchain)
+ if (symbol_conf.use_callchain && !symbol_conf.cumulate_callchain)
ret += hist_entry__callchain_fprintf(he, hists, fp);

return ret;

Namhyung Kim

unread,
Dec 18, 2013, 12:30:02 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

If -g cumulative option is given, it needs to show entries which don't
have self overhead. So apply percent-limit to accumulated overhead
percentage in this case.

Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/ui/browsers/hists.c | 34 ++++++++++++++++++++++++++--------
tools/perf/ui/gtk/hists.c | 11 +++++++++--
tools/perf/ui/stdio/hist.c | 11 +++++++++--
3 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index efa78894f70d..b02e71ecc5fe 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -828,12 +828,19 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)

for (nd = browser->top; nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
- float percent = h->stat.period * 100.0 /
- hb->hists->stats.total_period;
+ float percent;

if (h->filtered)
continue;

+ if (symbol_conf.cumulate_callchain) {
+ percent = h->stat_acc->period * 100.0 /
+ hb->hists->stats.total_period;
+ } else {
+ percent = h->stat.period * 100.0 /
+ hb->hists->stats.total_period;
+ }
+
if (percent < hb->min_pcnt)
continue;

@@ -851,13 +858,17 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd,
{
while (nd != NULL) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
- float percent = h->stat.period * 100.0 /
- hists->stats.total_period;
+ float percent;

- if (percent < min_pcnt)
- return NULL;
+ if (symbol_conf.cumulate_callchain) {
+ percent = h->stat_acc->period * 100.0 /
+ hists->stats.total_period;
+ } else {
+ percent = h->stat.period * 100.0 /
+ hists->stats.total_period;
+ }

- if (!h->filtered)
+ if (!h->filtered && percent >= min_pcnt)
return nd;

nd = rb_next(nd);
@@ -872,8 +883,15 @@ static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
{
while (nd != NULL) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
- float percent = h->stat.period * 100.0 /
+ float percent;
+
+ if (symbol_conf.cumulate_callchain) {
+ percent = h->stat_acc->period * 100.0 /
+ hists->stats.total_period;
+ } else {
+ percent = h->stat.period * 100.0 /
hists->stats.total_period;
+ }

if (!h->filtered && percent >= min_pcnt)
return nd;
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 70ed0d5e1b94..06ae3342e14f 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -296,12 +296,19 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
GtkTreeIter iter;
- float percent = h->stat.period * 100.0 /
- hists->stats.total_period;
+ float percent;

if (h->filtered)
continue;

+ if (symbol_conf.cumulate_callchain) {
+ percent = h->stat_acc->period * 100.0 /
+ hists->stats.total_period;
+ } else {
+ percent = h->stat.period * 100.0 /
+ hists->stats.total_period;
+ }
+
if (percent < min_pcnt)
continue;

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 4c4986e809d8..7ea8502192b0 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -487,12 +487,19 @@ print_entries:

for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
- float percent = h->stat.period * 100.0 /
- hists->stats.total_period;
+ float percent;

if (h->filtered)
continue;

+ if (symbol_conf.cumulate_callchain) {
+ percent = h->stat_acc->period * 100.0 /
+ hists->stats.total_period;
+ } else {
+ percent = h->stat.period * 100.0 /
+ hists->stats.total_period;
+ }
+
if (percent < min_pcnt)
continue;

Namhyung Kim

unread,
Dec 18, 2013, 12:30:02 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

When callchain accumulation is requested, we need to sort the entries
by accumulated period value. When accumulated periods of two entries
are same (i.e. single path callchain) put the caller above since
accumulation tends to put callers on higher position for obvious
reason.

Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/builtin-report.c | 6 ++++++
tools/perf/util/hist.c | 12 ++++++++++++
2 files changed, 18 insertions(+)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 4ec1a090d1a3..b2bcb98a7300 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -566,6 +566,12 @@ iter_add_next_cumulative_entry(struct add_entry_iter *iter,
he_cache[iter->curr++] = he;

/*
+ * This is for putting parents upward during output resort iff
+ * only a child gets sampled. See hist_entry__sort_on_period().
+ */
+ he->callchain->max_depth = callchain_cursor.nr - callchain_cursor.pos;
+
+ /*
* Only in the TUI browser we are doing integrated annotation,
* so we don't allocated the extra space needed because the stdio
* code will not use it.
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 22b80b509c85..84fd1e6e9a37 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -626,6 +626,18 @@ static int hist_entry__sort_on_period(struct hist_entry *a,
struct hist_entry *pair;
u64 *periods_a, *periods_b;

+ if (symbol_conf.cumulate_callchain) {
+ /*
+ * Put caller above callee when they have equal period.
+ */
+ if (a->stat_acc->period != b->stat_acc->period)
+ return a->stat_acc->period > b->stat_acc->period ? 1 : -1;
+
+ if (a->callchain->max_depth != b->callchain->max_depth)
+ return a->callchain->max_depth < b->callchain->max_depth ?
+ 1 : -1;
+ }
+
ret = period_cmp(a->stat.period, b->stat.period);
if (ret || !symbol_conf.event_group)
return ret;

Namhyung Kim

unread,
Dec 18, 2013, 12:30:03 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

The commit 09600e0f9ebb ("perf tools: Compare dso's also when
comparing symbols") added a comparison of dso when comparing symbol.
But if the sort key already has dso, it doesn't need to do it again
since entries have a different dso already filtered out.

Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/util/sort.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 68a4fd2f505e..635cd8f8b22e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -13,6 +13,7 @@ int have_ignore_callees = 0;
int sort__need_collapse = 0;
int sort__has_parent = 0;
int sort__has_sym = 0;
+int sort__has_dso = 0;
enum sort_mode sort__mode = SORT_MODE__NORMAL;

enum sort_type sort__first_dimension;
@@ -194,9 +195,11 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
* comparing symbol address alone is not enough since it's a
* relative address within a dso.
*/
- ret = sort__dso_cmp(left, right);
- if (ret != 0)
- return ret;
+ if (!sort__has_dso) {
+ ret = sort__dso_cmp(left, right);
+ if (ret != 0)
+ return ret;
+ }

return _sort__sym_cmp(left->ms.sym, right->ms.sym);
}
@@ -1061,6 +1064,8 @@ int sort_dimension__add(const char *tok)
sort__has_parent = 1;
} else if (sd->entry == &sort_sym) {
sort__has_sym = 1;
+ } else if (sd->entry == &sort_dso) {
+ sort__has_dso = 1;
}

__sort_dimension__add(sd, i);

Namhyung Kim

unread,
Dec 18, 2013, 12:30:02 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

To support callchain accumulation, @entry should be recognized if it's
accumulated or not when add_hist_entry() called. The period of an
accumulated entry should be added to ->stat_acc but not ->stat. Add
@sample_self arg for that.

Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/builtin-annotate.c | 3 ++-
tools/perf/builtin-diff.c | 2 +-
tools/perf/builtin-report.c | 6 +++---
tools/perf/builtin-top.c | 2 +-
tools/perf/tests/hists_link.c | 4 ++--
tools/perf/util/hist.c | 23 +++++++++++++++--------
tools/perf/util/hist.h | 3 ++-
7 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 6fd52c8fa682..9c89bb2e3002 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -65,7 +65,8 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return 0;
}

- he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0);
+ he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0,
+ true);
if (he == NULL)
return -ENOMEM;

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 2a85cc9a2d09..4dbc14c33ab9 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -308,7 +308,7 @@ static int hists__add_entry(struct hists *hists,
u64 weight, u64 transaction)
{
if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight,
- transaction) != NULL)
+ transaction, true) != NULL)
return 0;
return -ENOMEM;
}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 5830bf923955..4e4572b47e04 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -154,7 +154,7 @@ iter_add_single_mem_entry(struct add_entry_iter *iter, struct addr_location *al)
* and the he_stat__add_period() function.
*/
he = __hists__add_entry(&iter->evsel->hists, al, iter->parent, NULL, mi,
- cost, cost, 0);
+ cost, cost, 0, true);
if (!he)
return -ENOMEM;

@@ -286,7 +286,7 @@ iter_add_next_branch_entry(struct add_entry_iter *iter, struct addr_location *al
* and not events sampled. Thus we use a pseudo period of 1.
*/
he = __hists__add_entry(&evsel->hists, al, iter->parent, &bi[i], NULL,
- 1, 1, 0);
+ 1, 1, 0, true);
if (he == NULL)
return -ENOMEM;

@@ -351,7 +351,7 @@ iter_add_single_normal_entry(struct add_entry_iter *iter, struct addr_location *

he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
sample->period, sample->weight,
- sample->transaction);
+ sample->transaction, true);
if (he == NULL)
return -ENOMEM;

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 03d37a76c612..ef54e9d1468f 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -248,7 +248,7 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
pthread_mutex_lock(&evsel->hists.lock);
he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL,
sample->period, sample->weight,
- sample->transaction);
+ sample->transaction, true);
pthread_mutex_unlock(&evsel->hists.lock);
if (he == NULL)
return NULL;
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 173bf42cc03e..b829c2a1a598 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -223,7 +223,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
goto out;

he = __hists__add_entry(&evsel->hists, &al, NULL,
- NULL, NULL, 1, 1, 0);
+ NULL, NULL, 1, 1, 0, true);
if (he == NULL)
goto out;

@@ -246,7 +246,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
goto out;

he = __hists__add_entry(&evsel->hists, &al, NULL,
- NULL, NULL, 1, 1, 0);
+ NULL, NULL, 1, 1, 0, true);
if (he == NULL)
goto out;

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 6dfa1b48a1a9..22b80b509c85 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -279,7 +279,8 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
* histogram, sorted on item, collects periods
*/

-static struct hist_entry *hist_entry__new(struct hist_entry *template)
+static struct hist_entry *hist_entry__new(struct hist_entry *template,
+ bool sample_self)
{
size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
struct hist_entry *he = zalloc(sizeof(*he) + callchain_size);
@@ -294,6 +295,8 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
return NULL;
}
memcpy(he->stat_acc, &he->stat, sizeof(he->stat));
+ if (!sample_self)
+ memset(&he->stat, 0, sizeof(he->stat));
}

if (he->ms.map)
@@ -355,7 +358,8 @@ static u8 symbol__parent_filter(const struct symbol *parent)

static struct hist_entry *add_hist_entry(struct hists *hists,
struct hist_entry *entry,
- struct addr_location *al)
+ struct addr_location *al,
+ bool sample_self)
{
struct rb_node **p;
struct rb_node *parent = NULL;
@@ -379,7 +383,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
cmp = hist_entry__cmp(he, entry);

if (!cmp) {
- he_stat__add_period(&he->stat, period, weight);
+ if (sample_self)
+ he_stat__add_period(&he->stat, period, weight);
if (symbol_conf.cumulate_callchain)
he_stat__add_period(he->stat_acc, period, weight);

@@ -409,7 +414,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
p = &(*p)->rb_right;
}

- he = hist_entry__new(entry);
+ he = hist_entry__new(entry, sample_self);
if (!he)
return NULL;

@@ -417,7 +422,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
rb_link_node(&he->rb_node_in, parent, p);
rb_insert_color(&he->rb_node_in, hists->entries_in);
out:
- he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
+ if (sample_self)
+ he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
if (symbol_conf.cumulate_callchain)
he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
return he;
@@ -428,7 +434,8 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
struct symbol *sym_parent,
struct branch_info *bi,
struct mem_info *mi,
- u64 period, u64 weight, u64 transaction)
+ u64 period, u64 weight, u64 transaction,
+ bool sample_self)
{
struct hist_entry entry = {
.thread = al->thread,
@@ -453,7 +460,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
.transaction = transaction,
};

- return add_hist_entry(hists, &entry, al);
+ return add_hist_entry(hists, &entry, al, sample_self);
}

int64_t
@@ -877,7 +884,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
p = &(*p)->rb_right;
}

- he = hist_entry__new(pair);
+ he = hist_entry__new(pair, true);
if (he) {
memset(&he->stat, 0, sizeof(he->stat));
he->hists = hists;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index b621347a1585..09e194a1e8f8 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -89,7 +89,8 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
struct symbol *parent,
struct branch_info *bi,
struct mem_info *mi, u64 period,
- u64 weight, u64 transaction);
+ u64 weight, u64 transaction,
+ bool sample_self);
int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
int hist_entry__transaction_len(void);

Namhyung Kim

unread,
Dec 18, 2013, 12:30:03 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

If a hist entry doesn't have symbol information, compare it with its
address. Currently it only compares its level or whether it's NULL.
This can lead to an undesired result like an overhead exceeds 100%
especially when callchain accumulation is enabled by later patch.

Cc: Stephane Eranian <era...@google.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/util/sort.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 8b0bb1f4494a..68a4fd2f505e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -161,6 +161,11 @@ struct sort_entry sort_dso = {

/* --sort symbol */

+static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
+{
+ return (int64_t)(right_ip - left_ip);
+}
+
static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
{
u64 ip_l, ip_r;
@@ -183,7 +188,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
int64_t ret;

if (!left->ms.sym && !right->ms.sym)
- return right->level - left->level;
+ return _sort__addr_cmp(left->ip, right->ip);

/*
* comparing symbol address alone is not enough since it's a
@@ -372,7 +377,7 @@ sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
struct addr_map_symbol *from_r = &right->branch_info->from;

if (!from_l->sym && !from_r->sym)
- return right->level - left->level;
+ return _sort__addr_cmp(from_l->addr, from_r->addr);

return _sort__sym_cmp(from_l->sym, from_r->sym);
}
@@ -384,7 +389,7 @@ sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
struct addr_map_symbol *to_r = &right->branch_info->to;

if (!to_l->sym && !to_r->sym)
- return right->level - left->level;
+ return _sort__addr_cmp(to_l->addr, to_r->addr);

return _sort__sym_cmp(to_l->sym, to_r->sym);

Namhyung Kim

unread,
Dec 18, 2013, 12:30:03 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

The cpumode and level in struct addr_localtion was set for a sample
and but updated as cumulative callchains were added. This led to have
non-matching symbol and cpumode in the output.

Update it accordingly based on the fact whether the map is a part of
the kernel or not. This is a reverse of what thread__find_addr_map()
does.

Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/builtin-report.c | 34 +++++++++++++++++++++++++++++++---
1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 3bc48e410d06..80c774615287 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -82,6 +82,7 @@ struct add_entry_iter {
struct perf_report *rep;
struct perf_evsel *evsel;
struct perf_sample *sample;
+ struct machine *machine;
struct hist_entry *he;
struct symbol *parent;
void *priv;
@@ -400,7 +401,7 @@ iter_finish_normal_entry(struct add_entry_iter *iter, struct addr_location *al)

static int
iter_prepare_cumulative_entry(struct add_entry_iter *iter,
- struct machine *machine __maybe_unused,
+ struct machine *machine,
struct perf_evsel *evsel,
struct addr_location *al __maybe_unused,
struct perf_sample *sample)
@@ -409,6 +410,7 @@ iter_prepare_cumulative_entry(struct add_entry_iter *iter,

iter->evsel = evsel;
iter->sample = sample;
+ iter->machine = machine;
return 0;
}

@@ -469,9 +471,35 @@ iter_next_cumulative_entry(struct add_entry_iter *iter,
else
al->addr = node->ip;

- if (iter->rep->hide_unresolved && al->sym == NULL)
- return 0;
+ if (al->sym == NULL) {
+ if (iter->rep->hide_unresolved)
+ return 0;
+ if (al->map == NULL)
+ goto out;
+ }

+ if (al->map->groups == &iter->machine->kmaps) {
+ if (machine__is_host(iter->machine)) {
+ al->cpumode = PERF_RECORD_MISC_KERNEL;
+ al->level = 'k';
+ } else {
+ al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
+ al->level = 'g';
+ }
+ } else {
+ if (machine__is_host(iter->machine)) {
+ al->cpumode = PERF_RECORD_MISC_USER;
+ al->level = '.';
+ } else if (perf_guest) {
+ al->cpumode = PERF_RECORD_MISC_GUEST_USER;
+ al->level = 'u';
+ } else {
+ al->cpumode = PERF_RECORD_MISC_HYPERVISOR;
+ al->level = 'H';
+ }
+ }
+
+out:
callchain_cursor_advance(&callchain_cursor);
return 1;

Namhyung Kim

unread,
Dec 18, 2013, 12:30:03 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

Print accumulated stat of a hist entry if requested.

Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/ui/browsers/hists.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index a440e03cd8c2..efa78894f70d 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -693,11 +693,26 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
return __hpp__color_fmt(hpp, he, __hpp_get_##_field, _cb); \
}

+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field, _cb) \
+static u64 __hpp_get_acc_##_field(struct hist_entry *he) \
+{ \
+ return he->stat_acc->_field; \
+} \
+ \
+static int \
+hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
+ struct perf_hpp *hpp, \
+ struct hist_entry *he) \
+{ \
+ return __hpp__color_fmt(hpp, he, __hpp_get_acc_##_field, _cb); \
+}
+
__HPP_COLOR_PERCENT_FN(overhead, period, __hpp__color_callchain)
__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys, NULL)
__HPP_COLOR_PERCENT_FN(overhead_us, period_us, NULL)
__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys, NULL)
__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us, NULL)
+__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period, NULL)

#undef __HPP_COLOR_PERCENT_FN

@@ -715,6 +730,8 @@ void hist_browser__init_hpp(void)
hist_browser__hpp_color_overhead_guest_sys;
perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
hist_browser__hpp_color_overhead_guest_us;
+ perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
+ hist_browser__hpp_color_overhead_acc;
}

static int hist_browser__show_entry(struct hist_browser *browser,

Namhyung Kim

unread,
Dec 18, 2013, 12:30:03 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

Print accumulated stat of a hist entry if requested.

Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/ui/gtk/hists.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 2ca66cc1160f..70ed0d5e1b94 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -98,11 +98,25 @@ static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,
return __hpp__color_fmt(hpp, he, he_get_##_field); \
}

+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
+static u64 he_get_acc_##_field(struct hist_entry *he) \
+{ \
+ return he->stat_acc->_field; \
+} \
+ \
+static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
+ struct perf_hpp *hpp, \
+ struct hist_entry *he) \
+{ \
+ return __hpp__color_fmt(hpp, he, he_get_acc_##_field); \
+}
+
__HPP_COLOR_PERCENT_FN(overhead, period)
__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
__HPP_COLOR_PERCENT_FN(overhead_us, period_us)
__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
+__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)

#undef __HPP_COLOR_PERCENT_FN

@@ -121,6 +135,8 @@ void perf_gtk__init_hpp(void)
perf_gtk__hpp_color_overhead_guest_sys;
perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
perf_gtk__hpp_color_overhead_guest_us;
+ perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
+ perf_gtk__hpp_color_overhead_acc;
}

static void callchain_list__sym_name(struct callchain_list *cl,

Namhyung Kim

unread,
Dec 18, 2013, 12:30:03 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

The --cumulate option is for showing accumulated overhead (period)
value as well as self overhead.

Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/Documentation/perf-report.txt | 5 +++++
tools/perf/builtin-report.c | 12 +++++++++++-
2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 8eab8a4bdeb8..44e53ea45098 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -141,6 +141,11 @@ OPTIONS

Default: fractal,0.5,callee,function.

+--cumulate::
+ Accumulate callchain to parent entry so that then can show up in the
+ output. The output will have a new "Overhead/acc." column and will
+ bo sorted on the data. It requires callchain are recorded.
+
--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index b2bcb98a7300..206947a52fa8 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -770,6 +770,14 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
}
}

+ if (symbol_conf.cumulate_callchain) {
+ /* Silently ignore if callchain is not used */
+ if (!symbol_conf.use_callchain) {
+ symbol_conf.cumulate_callchain = false;
+ perf_hpp__cancel_cumulate();
+ }
+ }
+
if (sort__mode == SORT_MODE__BRANCH) {
if (!is_pipe &&
!(sample_type & PERF_SAMPLE_BRANCH_STACK)) {
@@ -1197,8 +1205,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
"Only display entries with parent-match"),
OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
- "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
+ "Display callchains using output_type (graph, flat, fractal or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
"Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt),
+ OPT_BOOLEAN(0, "cumulate", &symbol_conf.cumulate_callchain,
+ "Accumulate callchain and show cumulative overhead as well"),
OPT_INTEGER(0, "max-stack", &report.max_stack,
"Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. "

Namhyung Kim

unread,
Dec 18, 2013, 12:30:03 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

It is possble that a callchain has cycles or recursive calls. In that
case it'll end up having entries more than 100% overhead in the
output. In order to prevent such entries, cache each callchain node
and skip if same entry already cumulated.

Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/builtin-report.c | 48 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 80c774615287..4ec1a090d1a3 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -406,8 +406,27 @@ iter_prepare_cumulative_entry(struct add_entry_iter *iter,
struct addr_location *al __maybe_unused,
struct perf_sample *sample)
{
+ struct callchain_cursor_node *node;
+ struct hist_entry **he_cache;
+
callchain_cursor_commit(&callchain_cursor);

+ /*
+ * This is for detecting cycles or recursions so that they're
+ * cumulated only one time to prevent entries more than 100%
+ * overhead.
+ */
+ he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1));
+ if (he_cache == NULL)
+ return -ENOMEM;
+
+ iter->priv = he_cache;
+ iter->curr = 0;
+
+ node = callchain_cursor_current(&callchain_cursor);
+ if (node == NULL)
+ return 0;
+
iter->evsel = evsel;
iter->sample = sample;
iter->machine = machine;
@@ -420,6 +439,7 @@ iter_add_single_cumulative_entry(struct add_entry_iter *iter,
{
struct perf_evsel *evsel = iter->evsel;
struct perf_sample *sample = iter->sample;
+ struct hist_entry **he_cache = iter->priv;
struct hist_entry *he;
int err = 0;

@@ -429,6 +449,8 @@ iter_add_single_cumulative_entry(struct add_entry_iter *iter,
if (he == NULL)
return -ENOMEM;

+ he_cache[iter->curr++] = he;
+
/*
* This is for putting parents upward during output resort iff
* only a child gets sampled. See hist_entry__sort_on_period().
@@ -510,8 +532,30 @@ iter_add_next_cumulative_entry(struct add_entry_iter *iter,
{
struct perf_evsel *evsel = iter->evsel;
struct perf_sample *sample = iter->sample;
+ struct hist_entry **he_cache = iter->priv;
struct hist_entry *he;
+ struct hist_entry he_tmp = {
+ .cpu = al->cpu,
+ .thread = al->thread,
+ .comm = thread__comm(al->thread),
+ .ip = al->addr,
+ .ms = {
+ .map = al->map,
+ .sym = al->sym,
+ },
+ .parent = iter->parent,
+ };
int err = 0;
+ int i;
+
+ /*
+ * Check if there's duplicate entries in the callchain.
+ * It's possible that it has cycles or recursive calls.
+ */
+ for (i = 0; i < iter->curr; i++) {
+ if (hist_entry__cmp(he_cache[i], &he_tmp) == 0)
+ return 0;
+ }

he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
sample->period, sample->weight,
@@ -519,6 +563,8 @@ iter_add_next_cumulative_entry(struct add_entry_iter *iter,
if (he == NULL)
return -ENOMEM;

+ he_cache[iter->curr++] = he;
+
/*
* Only in the TUI browser we are doing integrated annotation,
* so we don't allocated the extra space needed because the stdio
@@ -547,6 +593,8 @@ iter_finish_cumulative_entry(struct add_entry_iter *iter,
evsel->hists.stats.total_period += sample->period;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);

+ free(iter->priv);
+ iter->priv = NULL;
return 0;

Namhyung Kim

unread,
Dec 18, 2013, 12:30:03 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

Add report.cumulate config option for setting default value of
callchain accumulation. It affects the report output only if
perf.data contains callchain info.

A user can write .perfconfig file like below to enable accumulation
by default:

$ cat ~/.perfconfig
[report]
cumulate = true

And it can be disabled through command line:

$ perf report --no-cumulate

Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/builtin-report.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 206947a52fa8..923ed2752209 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -71,6 +71,10 @@ static int perf_report_config(const char *var, const char *value, void *cb)
rep->min_percent = strtof(value, NULL);
return 0;
}
+ if (!strcmp(var, "report.cumulate")) {
+ symbol_conf.cumulate_callchain = perf_config_bool(var, value);
+ return 0;
+ }

return perf_default_config(var, value, cb);

Namhyung Kim

unread,
Dec 18, 2013, 12:30:03 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

Sometimes it needs to disable some columns at runtime. Add help
functions to support that.

Cc: Jiri Olsa <jo...@redhat.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/ui/hist.c | 17 +++++++++++++++++
tools/perf/util/hist.h | 3 +++
2 files changed, 20 insertions(+)

diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index b365260645d3..2a076dd86518 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -275,12 +275,29 @@ void perf_hpp__column_register(struct perf_hpp_fmt *format)
list_add_tail(&format->list, &perf_hpp__list);
}

+void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
+{
+ list_del(&format->list);
+}
+
void perf_hpp__column_enable(unsigned col)
{
BUG_ON(col >= PERF_HPP__MAX_INDEX);
perf_hpp__column_register(&perf_hpp__format[col]);
}

+void perf_hpp__column_disable(unsigned col)
+{
+ BUG_ON(col >= PERF_HPP__MAX_INDEX);
+ perf_hpp__column_unregister(&perf_hpp__format[col]);
+}
+
+void perf_hpp__cancel_cumulate(void)
+{
+ perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC);
+ perf_hpp__format[PERF_HPP__OVERHEAD].header = hpp__header_overhead;
+}
+
int hist_entry__sort_snprintf(struct hist_entry *he, char *s, size_t size,
struct hists *hists)
{
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 60134e79103d..5a3bccb5413e 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -169,7 +169,10 @@ enum {

void perf_hpp__init(void);
void perf_hpp__column_register(struct perf_hpp_fmt *format);
+void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
void perf_hpp__column_enable(unsigned col);
+void perf_hpp__column_disable(unsigned col);
+void perf_hpp__cancel_cumulate(void);

static inline size_t perf_hpp__use_color(void)
{

Namhyung Kim

unread,
Dec 18, 2013, 12:30:03 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

There're some duplicate code when adding hist entries. They are
different in that some have branch info or mem info but generally do
same thing. So introduce new struct add_entry_iter and add callbacks
to customize each case in general way.

The new perf_evsel__add_entry() function will look like:

iter->prepare_entry();
iter->add_single_entry();

while (iter->next_entry())
iter->add_next_entry();

iter->finish_entry();

This will help further work like the cumulative callchain patchset.

Cc: Jiri Olsa <jo...@redhat.com>
Cc: Stephane Eranian <era...@google.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/builtin-report.c | 453 +++++++++++++++++++++++++++++---------------
1 file changed, 300 insertions(+), 153 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 3a14dbed387c..5830bf923955 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -75,38 +75,74 @@ static int perf_report_config(const char *var, const char *value, void *cb)
return perf_default_config(var, value, cb);
}

-static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
- struct addr_location *al,
- struct perf_sample *sample,
- struct perf_evsel *evsel,
- struct machine *machine,
- union perf_event *event)
-{
- struct perf_report *rep = container_of(tool, struct perf_report, tool);
- struct symbol *parent = NULL;
- u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- int err = 0;
+struct add_entry_iter {
+ int total;
+ int curr;
+
+ struct perf_report *rep;
+ struct perf_evsel *evsel;
+ struct perf_sample *sample;
struct hist_entry *he;
- struct mem_info *mi, *mx;
- uint64_t cost;
+ struct symbol *parent;
+ void *priv;
+
+ int (*prepare_entry)(struct add_entry_iter *, struct machine *,
+ struct perf_evsel *, struct addr_location *,
+ struct perf_sample *);
+ int (*add_single_entry)(struct add_entry_iter *, struct addr_location *);
+ int (*next_entry)(struct add_entry_iter *, struct addr_location *);
+ int (*add_next_entry)(struct add_entry_iter *, struct addr_location *);
+ int (*finish_entry)(struct add_entry_iter *, struct addr_location *);
+};

- if ((sort__has_parent || symbol_conf.use_callchain) &&
- sample->callchain) {
- err = machine__resolve_callchain(machine, evsel, al->thread,
- sample, &parent, al,
- rep->max_stack);
- if (err)
- return err;
- }
+static int
+iter_next_nop_entry(struct add_entry_iter *iter __maybe_unused,
+ struct addr_location *al __maybe_unused)
+{
+ return 0;
+}
+
+static int
+iter_add_next_nop_entry(struct add_entry_iter *iter __maybe_unused,
+ struct addr_location *al __maybe_unused)
+{
+ return 0;
+}
+
+static int
+iter_prepare_mem_entry(struct add_entry_iter *iter, struct machine *machine,
+ struct perf_evsel *evsel, struct addr_location *al,
+ struct perf_sample *sample)
+{
+ union perf_event *event = iter->priv;
+ struct mem_info *mi;
+ u8 cpumode;
+
+ BUG_ON(event == NULL);
+
+ cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;

mi = machine__resolve_mem(machine, al->thread, sample, cpumode);
- if (!mi)
+ if (mi == NULL)
return -ENOMEM;

- if (rep->hide_unresolved && !al->sym)
+ iter->evsel = evsel;
+ iter->sample = sample;
+ iter->priv = mi;
+ return 0;
+}
+
+static int
+iter_add_single_mem_entry(struct add_entry_iter *iter, struct addr_location *al)
+{
+ u64 cost;
+ struct mem_info *mi = iter->priv;
+ struct hist_entry *he;
+
+ if (iter->rep->hide_unresolved && !al->sym)
return 0;

- cost = sample->weight;
+ cost = iter->sample->weight;
if (!cost)
cost = 1;

@@ -117,17 +153,33 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
* and this is indirectly achieved by passing period=weight here
* and the he_stat__add_period() function.
*/
- he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi,
+ he = __hists__add_entry(&iter->evsel->hists, al, iter->parent, NULL, mi,
cost, cost, 0);
if (!he)
return -ENOMEM;

+ iter->he = he;
+ return 0;
+}
+
+static int
+iter_finish_mem_entry(struct add_entry_iter *iter, struct addr_location *al)
+{
+ struct perf_evsel *evsel = iter->evsel;
+ struct hist_entry *he = iter->he;
+ struct mem_info *mi = iter->priv;
+ int err = -ENOMEM;
+ u64 cost;
+
+ if (he == NULL)
+ return 0;
+
/*
* In the TUI browser, we are doing integrated annotation,
* so we don't allocate the extra space needed because the stdio
* code will not use it.
*/
- if (sort__has_sym && he->ms.sym && use_browser > 0) {
+ if (sort__has_sym && he->ms.sym && use_browser == 1) {
struct annotation *notes = symbol__annotation(he->ms.sym);

assert(evsel != NULL);
@@ -140,175 +192,272 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
goto out;
}

- if (sort__has_sym && he->mem_info->daddr.sym && use_browser > 0) {
+ if (sort__has_sym && mi->daddr.sym && use_browser == 1) {
struct annotation *notes;

- mx = he->mem_info;
-
- notes = symbol__annotation(mx->daddr.sym);
- if (notes->src == NULL && symbol__alloc_hist(mx->daddr.sym) < 0)
+ notes = symbol__annotation(mi->daddr.sym);
+ if (notes->src == NULL && symbol__alloc_hist(mi->daddr.sym) < 0)
goto out;

- err = symbol__inc_addr_samples(mx->daddr.sym,
- mx->daddr.map,
- evsel->idx,
- mx->daddr.al_addr);
+ err = symbol__inc_addr_samples(mi->daddr.sym, mi->daddr.map,
+ evsel->idx, mi->daddr.al_addr);
if (err)
goto out;
}

+ cost = iter->sample->weight;
+ if (!cost)
+ cost = 1;
+
evsel->hists.stats.total_period += cost;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
err = 0;

if (symbol_conf.use_callchain) {
- err = callchain_append(he->callchain,
- &callchain_cursor,
- sample->period);
+ err = callchain_append(he->callchain, &callchain_cursor,
+ iter->sample->period);
}
+
out:
+ iter->he = NULL;
return err;
}

-static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
- struct addr_location *al,
- struct perf_sample *sample,
- struct perf_evsel *evsel,
- struct machine *machine)
+static int
+iter_prepare_branch_entry(struct add_entry_iter *iter, struct machine *machine,
+ struct perf_evsel *evsel, struct addr_location *al,
+ struct perf_sample *sample)
{
- struct perf_report *rep = container_of(tool, struct perf_report, tool);
- struct symbol *parent = NULL;
- int err = 0;
- unsigned i;
- struct hist_entry *he;
- struct branch_info *bi, *bx;
-
- if ((sort__has_parent || symbol_conf.use_callchain)
- && sample->callchain) {
- err = machine__resolve_callchain(machine, evsel, al->thread,
- sample, &parent, al,
- rep->max_stack);
- if (err)
- return err;
- }
+ struct branch_info *bi;

bi = machine__resolve_bstack(machine, al->thread,
sample->branch_stack);
if (!bi)
return -ENOMEM;

- for (i = 0; i < sample->branch_stack->nr; i++) {
- if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
- continue;
+ iter->curr = 0;
+ iter->total = sample->branch_stack->nr;

- err = -ENOMEM;
+ iter->evsel = evsel;
+ iter->sample = sample;
+ iter->priv = bi;
+ return 0;
+}

- /* overwrite the 'al' to branch-to info */
- al->map = bi[i].to.map;
- al->sym = bi[i].to.sym;
- al->addr = bi[i].to.addr;
- /*
- * The report shows the percentage of total branches captured
- * and not events sampled. Thus we use a pseudo period of 1.
- */
- he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL,
- 1, 1, 0);
- if (he) {
- struct annotation *notes;
- bx = he->branch_info;
- if (bx->from.sym && use_browser == 1 && sort__has_sym) {
- notes = symbol__annotation(bx->from.sym);
- if (!notes->src
- && symbol__alloc_hist(bx->from.sym) < 0)
- goto out;
-
- err = symbol__inc_addr_samples(bx->from.sym,
- bx->from.map,
- evsel->idx,
- bx->from.al_addr);
- if (err)
- goto out;
- }
+static int
+iter_add_single_branch_entry(struct add_entry_iter *iter __maybe_unused,
+ struct addr_location *al __maybe_unused)
+{
+ return 0;
+}

- if (bx->to.sym && use_browser == 1 && sort__has_sym) {
- notes = symbol__annotation(bx->to.sym);
- if (!notes->src
- && symbol__alloc_hist(bx->to.sym) < 0)
- goto out;
-
- err = symbol__inc_addr_samples(bx->to.sym,
- bx->to.map,
- evsel->idx,
- bx->to.al_addr);
- if (err)
- goto out;
- }
- evsel->hists.stats.total_period += 1;
- hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
- } else
- goto out;
- }
- err = 0;
-out:
- free(bi);
- return err;
+static int
+iter_next_branch_entry(struct add_entry_iter *iter, struct addr_location *al)
+{
+ struct branch_info *bi = iter->priv;
+ int i = iter->curr;
+
+ if (iter->curr >= iter->total)
+ return 0;
+
+ al->map = bi[i].to.map;
+ al->sym = bi[i].to.sym;
+ al->addr = bi[i].to.addr;
+ return 1;
}

-static int perf_evsel__add_hist_entry(struct perf_tool *tool,
- struct perf_evsel *evsel,
- struct addr_location *al,
- struct perf_sample *sample,
- struct machine *machine)
+static int
+iter_add_next_branch_entry(struct add_entry_iter *iter, struct addr_location *al)
{
- struct perf_report *rep = container_of(tool, struct perf_report, tool);
- struct symbol *parent = NULL;
- int err = 0;
+ struct branch_info *bi, *bx;
+ struct annotation *notes;
+ struct perf_evsel *evsel = iter->evsel;
struct hist_entry *he;
+ int i = iter->curr;
+ int err;

- if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
- err = machine__resolve_callchain(machine, evsel, al->thread,
- sample, &parent, al,
- rep->max_stack);
+ bi = iter->priv;
+
+ if (iter->rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
+ goto out;
+
+ /*
+ * The report shows the percentage of total branches captured
+ * and not events sampled. Thus we use a pseudo period of 1.
+ */
+ he = __hists__add_entry(&evsel->hists, al, iter->parent, &bi[i], NULL,
+ 1, 1, 0);
+ if (he == NULL)
+ return -ENOMEM;
+
+ bx = he->branch_info;
+ if (sort__has_sym && bx->from.sym && use_browser == 1) {
+ notes = symbol__annotation(bx->from.sym);
+ if (!notes->src && symbol__alloc_hist(bx->from.sym) < 0)
+ return -ENOMEM;
+
+ err = symbol__inc_addr_samples(bx->from.sym, bx->from.map,
+ evsel->idx, bx->from.al_addr);
if (err)
return err;
}

- he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL,
+ if (sort__has_sym && bx->to.sym && use_browser == 1) {
+ notes = symbol__annotation(bx->to.sym);
+ if (!notes->src && symbol__alloc_hist(bx->to.sym) < 0)
+ return -ENOMEM;
+
+ err = symbol__inc_addr_samples(bx->to.sym, bx->to.map,
+ evsel->idx, bx->to.al_addr);
+ if (err)
+ return err;
+ }
+ evsel->hists.stats.total_period += 1;
+ hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+
+out:
+ iter->curr++;
+ return 0;
+}
+
+static int
+iter_finish_branch_entry(struct add_entry_iter *iter,
+ struct addr_location *al __maybe_unused)
+{
+ free(iter->priv);
+ iter->priv = NULL;
+
+ return iter->curr >= iter->total ? 0 : -1;
+}
+
+static int
+iter_prepare_normal_entry(struct add_entry_iter *iter,
+ struct machine *machine __maybe_unused,
+ struct perf_evsel *evsel,
+ struct addr_location *al __maybe_unused,
+ struct perf_sample *sample)
+{
+ iter->evsel = evsel;
+ iter->sample = sample;
+ return 0;
+}
+
+static int
+iter_add_single_normal_entry(struct add_entry_iter *iter, struct addr_location *al)
+{
+ struct perf_evsel *evsel = iter->evsel;
+ struct perf_sample *sample = iter->sample;
+ struct hist_entry *he;
+
+ he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
sample->period, sample->weight,
sample->transaction);
if (he == NULL)
return -ENOMEM;

- if (symbol_conf.use_callchain) {
- err = callchain_append(he->callchain,
- &callchain_cursor,
- sample->period);
- if (err)
- return err;
- }
+ iter->he = he;
+ return 0;
+}
+
+static int
+iter_finish_normal_entry(struct add_entry_iter *iter, struct addr_location *al)
+{
+ int err = 0;
+ struct hist_entry *he = iter->he;
+ struct perf_evsel *evsel = iter->evsel;
+ struct perf_sample *sample = iter->sample;
+
+ if (he == NULL)
+ return 0;
+
+ iter->he = NULL;
+
/*
* Only in the TUI browser we are doing integrated annotation,
* so we don't allocated the extra space needed because the stdio
* code will not use it.
*/
- if (he->ms.sym != NULL && use_browser == 1 && sort__has_sym) {
+ if (sort__has_sym && he->ms.sym && use_browser == 1) {
struct annotation *notes = symbol__annotation(he->ms.sym);

assert(evsel != NULL);

- err = -ENOMEM;
if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0)
- goto out;
+ return -ENOMEM;

err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
}

evsel->hists.stats.total_period += sample->period;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
-out:
+
+ if (symbol_conf.use_callchain) {
+ err = callchain_append(he->callchain, &callchain_cursor,
+ sample->period);
+ }
return err;
}

+static struct add_entry_iter mem_iter = {
+ .prepare_entry = iter_prepare_mem_entry,
+ .add_single_entry = iter_add_single_mem_entry,
+ .next_entry = iter_next_nop_entry,
+ .add_next_entry = iter_add_next_nop_entry,
+ .finish_entry = iter_finish_mem_entry,
+};
+
+static struct add_entry_iter branch_iter = {
+ .prepare_entry = iter_prepare_branch_entry,
+ .add_single_entry = iter_add_single_branch_entry,
+ .next_entry = iter_next_branch_entry,
+ .add_next_entry = iter_add_next_branch_entry,
+ .finish_entry = iter_finish_branch_entry,
+};
+
+static struct add_entry_iter normal_iter = {
+ .prepare_entry = iter_prepare_normal_entry,
+ .add_single_entry = iter_add_single_normal_entry,
+ .next_entry = iter_next_nop_entry,
+ .add_next_entry = iter_add_next_nop_entry,
+ .finish_entry = iter_finish_normal_entry,
+};
+
+static int
+perf_evsel__add_entry(struct perf_evsel *evsel, struct addr_location *al,
+ struct perf_sample *sample, struct machine *machine,
+ struct add_entry_iter *iter)
+{
+ int err, err2;
+
+ if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
+ err = machine__resolve_callchain(machine, evsel, al->thread,
+ sample, &iter->parent, al,
+ iter->rep->max_stack);
+ if (err)
+ return err;
+ }
+
+ err = iter->prepare_entry(iter, machine, evsel, al, sample);
+ if (err)
+ goto out;
+
+ err = iter->add_single_entry(iter, al);
+ if (err)
+ goto out;
+
+ while (iter->next_entry(iter, al)) {
+ err = iter->add_next_entry(iter, al);
+ if (err)
+ break;
+ }
+
+out:
+ err2 = iter->finish_entry(iter, al);
+ if (!err)
+ err = err2;
+
+ return err;
+}

static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
@@ -318,6 +467,7 @@ static int process_sample_event(struct perf_tool *tool,
{
struct perf_report *rep = container_of(tool, struct perf_report, tool);
struct addr_location al;
+ struct add_entry_iter *iter;
int ret;

if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
@@ -332,25 +482,22 @@ static int process_sample_event(struct perf_tool *tool,
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
return 0;

- if (sort__mode == SORT_MODE__BRANCH) {
- ret = perf_report__add_branch_hist_entry(tool, &al, sample,
- evsel, machine);
- if (ret < 0)
- pr_debug("problem adding lbr entry, skipping event\n");
- } else if (rep->mem_mode == 1) {
- ret = perf_report__add_mem_hist_entry(tool, &al, sample,
- evsel, machine, event);
- if (ret < 0)
- pr_debug("problem adding mem entry, skipping event\n");
- } else {
- if (al.map != NULL)
- al.map->dso->hit = 1;
-
- ret = perf_evsel__add_hist_entry(tool, evsel, &al, sample,
- machine);
- if (ret < 0)
- pr_debug("problem incrementing symbol period, skipping event\n");
- }
+ if (sort__mode == SORT_MODE__BRANCH)
+ iter = &branch_iter;
+ else if (rep->mem_mode == 1) {
+ iter = &mem_iter;
+ iter->priv = event;
+ } else
+ iter = &normal_iter;
+
+ if (al.map != NULL)
+ al.map->dso->hit = 1;
+
+ iter->rep = rep;
+ ret = perf_evsel__add_entry(evsel, &al, sample, machine, iter);
+ if (ret < 0)
+ pr_debug("problem adding hist entry, skipping event\n");
+
return ret;

Namhyung Kim

unread,
Dec 18, 2013, 12:30:03 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

Print accumulated stat of a hist entry if requested.

Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/ui/hist.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
tools/perf/util/hist.h | 1 +
2 files changed, 46 insertions(+)

diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 78f4c92e9b73..b365260645d3 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -129,6 +129,28 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused, \
scnprintf, true); \
}

+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
+static u64 he_get_acc_##_field(struct hist_entry *he) \
+{ \
+ return he->stat_acc->_field; \
+} \
+ \
+static int hpp__color_acc_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
+ struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ return __hpp__fmt(hpp, he, he_get_acc_##_field, " %6.2f%%", \
+ (hpp_snprint_fn)percent_color_snprintf, true); \
+}
+
+#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \
+static int hpp__entry_acc_##_type(struct perf_hpp_fmt *_fmt __maybe_unused, \
+ struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%"; \
+ return __hpp__fmt(hpp, he, he_get_acc_##_field, fmt, \
+ scnprintf, true); \
+}
+
#define __HPP_ENTRY_RAW_FN(_type, _field) \
static u64 he_get_raw_##_field(struct hist_entry *he) \
{ \
@@ -148,17 +170,25 @@ __HPP_WIDTH_FN(_type, _min_width, _unit_width) \
__HPP_COLOR_PERCENT_FN(_type, _field) \
__HPP_ENTRY_PERCENT_FN(_type, _field)

+#define HPP_PERCENT_ACC_FNS(_type, _str, _field, _min_width, _unit_width)\
+__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
+__HPP_WIDTH_FN(_type, _min_width, _unit_width) \
+__HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
+__HPP_ENTRY_ACC_PERCENT_FN(_type, _field)
+
#define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width) \
__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
__HPP_WIDTH_FN(_type, _min_width, _unit_width) \
__HPP_ENTRY_RAW_FN(_type, _field)

+__HPP_HEADER_FN(overhead_self, "Self", 8, 8)

HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8)
HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8)
HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8)
HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8)
HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8)
+HPP_PERCENT_ACC_FNS(overhead_acc, "Total", period, 8, 8)

HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12)
HPP_RAW_FNS(period, "Period", period, 12, 12)
@@ -171,6 +201,14 @@ HPP_RAW_FNS(period, "Period", period, 12, 12)
.entry = hpp__entry_ ## _name \
}

+#define HPP__COLOR_ACC_PRINT_FNS(_name) \
+ { \
+ .header = hpp__header_ ## _name, \
+ .width = hpp__width_ ## _name, \
+ .color = hpp__color_acc_ ## _name, \
+ .entry = hpp__entry_acc_ ## _name \
+ }
+
#define HPP__PRINT_FNS(_name) \
{ \
.header = hpp__header_ ## _name, \
@@ -184,6 +222,7 @@ struct perf_hpp_fmt perf_hpp__format[] = {
HPP__COLOR_PRINT_FNS(overhead_us),
HPP__COLOR_PRINT_FNS(overhead_guest_sys),
HPP__COLOR_PRINT_FNS(overhead_guest_us),
+ HPP__COLOR_ACC_PRINT_FNS(overhead_acc),
HPP__PRINT_FNS(samples),
HPP__PRINT_FNS(period)
};
@@ -208,6 +247,12 @@ void perf_hpp__init(void)
{
perf_hpp__column_enable(PERF_HPP__OVERHEAD);

+ if (symbol_conf.cumulate_callchain) {
+ perf_hpp__format[PERF_HPP__OVERHEAD].header =
+ hpp__header_overhead_self;
+ perf_hpp__column_enable(PERF_HPP__OVERHEAD_ACC);
+ }
+
if (symbol_conf.show_cpu_utilization) {
perf_hpp__column_enable(PERF_HPP__OVERHEAD_SYS);
perf_hpp__column_enable(PERF_HPP__OVERHEAD_US);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 09e194a1e8f8..60134e79103d 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -160,6 +160,7 @@ enum {
PERF_HPP__OVERHEAD_US,
PERF_HPP__OVERHEAD_GUEST_SYS,
PERF_HPP__OVERHEAD_GUEST_US,
+ PERF_HPP__OVERHEAD_ACC,
PERF_HPP__SAMPLES,
PERF_HPP__PERIOD,

Namhyung Kim

unread,
Dec 18, 2013, 12:30:04 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

The @entry argument already has the info so no need to pass them.

Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/util/hist.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 822903eaa201..63234e37583c 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -342,15 +342,15 @@ static u8 symbol__parent_filter(const struct symbol *parent)
}

static struct hist_entry *add_hist_entry(struct hists *hists,
- struct hist_entry *entry,
- struct addr_location *al,
- u64 period,
- u64 weight)
+ struct hist_entry *entry,
+ struct addr_location *al)
{
struct rb_node **p;
struct rb_node *parent = NULL;
struct hist_entry *he;
int64_t cmp;
+ u64 period = entry->stat.period;
+ u64 weight = entry->stat.weight;

p = &hists->entries_in->rb_node;

@@ -437,7 +437,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
.transaction = transaction,
};

- return add_hist_entry(hists, &entry, al, period, weight);
+ return add_hist_entry(hists, &entry, al);
}

int64_t

Namhyung Kim

unread,
Dec 18, 2013, 12:30:03 AM12/18/13
to
Hello,

This is my third attempt to implement cumulative hist period report.
This work begins from Arun's SORT_INCLUSIVE patch [1] but I completely
rewrote it from scratch.

Please see the patch 04/18. I refactored functions that add hist
entries with struct add_entry_iter. While I converted all functions
carefully, it'd be better anyone can test and confirm that I didn't
mess up something - especially for branch stack and mem stuff.

This patchset basically adds period in a sample to every node in the
callchain. A hist_entry now has an additional fields to keep the
cumulative period if --cumulate option is given on perf report.

I changed the option as a separate --cumulate and added a new "Total"
column (and renamed the default "Overhead" column into "Self"). The
output will be sorted by total (cumulative) overhead for now. The
reason I changed to the --cumulate is that I still think it's much
different from other --callchain options and I plan to add support for
showing (remaining) callchains to cumulative entries too. The
--callchain option will take care of it even with --cumulate option.

I know that the UI should be changed also to be more flexible as Ingo
requested, but I'd like to do this first and then move to work on the
next. I also added a new config option to enable it by default.

* changes in v3:
- change to --cumulate option
- fix a couple of bugs (Jiri, Rodrigo)
- rename some help functions (Arnaldo)
- cache previous hist entries rathen than just symbol and dso
- add some preparatory cleanups
- add report.cumulate config option


Let me show you an example:

$ cat abc.c
#define barrier() asm volatile("" ::: "memory")

void a(void)
{
int i;
for (i = 0; i < 1000000; i++)
barrier();
}
void b(void)
{
a();
}
void c(void)
{
b();
}
int main(void)
{
c();
return 0;
}

With this simple program I ran perf record and report:

$ perf record -g -e cycles:u ./abc

$ perf report --stdio
88.29% abc abc [.] a
|
--- a
b
c
main
__libc_start_main

9.43% abc ld-2.17.so [.] _dl_relocate_object
|
--- _dl_relocate_object
dl_main
_dl_sysdep_start

2.27% abc [kernel.kallsyms] [k] page_fault
|
--- page_fault
|
|--95.94%-- _dl_sysdep_start
| _dl_start_user
|
--4.06%-- _start

0.00% abc ld-2.17.so [.] _start
|
--- _start


When the -g cumulative option is given, it'll be shown like this:

$ perf report --cumulate --stdio

# Self Total Command Shared Object Symbol
# ........ ........ ....... ................. .......................
#
0.00% 88.29% abc libc-2.17.so [.] __libc_start_main
0.00% 88.29% abc abc [.] main
0.00% 88.29% abc abc [.] c
0.00% 88.29% abc abc [.] b
88.29% 88.29% abc abc [.] a
0.00% 11.61% abc ld-2.17.so [.] _dl_sysdep_start
0.00% 9.43% abc ld-2.17.so [.] dl_main
9.43% 9.43% abc ld-2.17.so [.] _dl_relocate_object
2.27% 2.27% abc [kernel.kallsyms] [k] page_fault
0.00% 2.18% abc ld-2.17.so [.] _dl_start_user
0.00% 0.10% abc ld-2.17.so [.] _start

As you can see __libc_start_main -> main -> c -> b -> a callchain show
up in the output.

I know it have some rough edges or even bugs, but I really want to
release it and get reviews. It does not handle event groups and
annotations yet.

You can also get this series on 'perf/cumulate-v3' branch in my tree at:

git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git


Any comments are welcome, thanks.
Namhyung


Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>

[1] https://lkml.org/lkml/2012/3/31/6


Namhyung Kim (18):
perf sort: Compare addresses if no symbol info
perf sort: Do not compare dso again
perf tools: Do not pass period and weight to add_hist_entry()
perf tools: Introduce struct add_entry_iter
perf hists: Convert hist entry functions to use struct he_stat
perf hists: Add support for accumulated stat of hist entry
perf hists: Check if accumulated when adding a hist entry
perf hists: Accumulate hist entry stat based on the callchain
perf tools: Update cpumode for each cumulative entry
perf report: Cache cumulative callchains
perf hists: Sort hist entries by accumulated period
perf ui/hist: Add support to accumulated hist stat
perf ui/browser: Add support to accumulated hist stat
perf ui/gtk: Add support to accumulated hist stat
perf tools: Apply percent-limit to cumulative percentage
perf tools: Add more hpp helper functions
perf report: Add --cumulate option
perf report: Add report.cumulate config option

tools/perf/Documentation/perf-report.txt | 5 +
tools/perf/builtin-annotate.c | 3 +-
tools/perf/builtin-diff.c | 2 +-
tools/perf/builtin-report.c | 679 ++++++++++++++++++++++++-------
tools/perf/builtin-top.c | 2 +-
tools/perf/tests/hists_link.c | 4 +-
tools/perf/ui/browsers/hists.c | 51 ++-
tools/perf/ui/gtk/hists.c | 27 +-
tools/perf/ui/hist.c | 62 +++
tools/perf/ui/stdio/hist.c | 13 +-
tools/perf/util/hist.c | 79 +++-
tools/perf/util/hist.h | 7 +-
tools/perf/util/sort.c | 22 +-
tools/perf/util/sort.h | 1 +
tools/perf/util/symbol.h | 1 +
15 files changed, 761 insertions(+), 197 deletions(-)

Namhyung Kim

unread,
Dec 18, 2013, 12:30:03 AM12/18/13
to
From: Namhyung Kim <namhyu...@lge.com>

Maintain accumulated stat information in hist_entry->stat_acc if
symbol_conf.cumulate_callchain is set. Fields in ->stat_acc have same
vaules initially, and will be updated as callchain is processed later.

Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Signed-off-by: Namhyung Kim <namh...@kernel.org>
---
tools/perf/util/hist.c | 18 ++++++++++++++++++
tools/perf/util/sort.h | 1 +
tools/perf/util/symbol.h | 1 +
3 files changed, 20 insertions(+)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 1f84314546a2..6dfa1b48a1a9 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -238,6 +238,8 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
return true;

he_stat__decay(&he->stat);
+ if (symbol_conf.cumulate_callchain)
+ he_stat__decay(he->stat_acc);

if (!he->filtered)
hists->stats.total_period -= prev_period - he->stat.period;
@@ -285,6 +287,15 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
if (he != NULL) {
*he = *template;

+ if (symbol_conf.cumulate_callchain) {
+ he->stat_acc = malloc(sizeof(he->stat));
+ if (he->stat_acc == NULL) {
+ free(he);
+ return NULL;
+ }
+ memcpy(he->stat_acc, &he->stat, sizeof(he->stat));
+ }
+
if (he->ms.map)
he->ms.map->referenced = true;

@@ -296,6 +307,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
*/
he->branch_info = malloc(sizeof(*he->branch_info));
if (he->branch_info == NULL) {
+ free(he->stat_acc);
free(he);
return NULL;
}
@@ -368,6 +380,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists,

if (!cmp) {
he_stat__add_period(&he->stat, period, weight);
+ if (symbol_conf.cumulate_callchain)
+ he_stat__add_period(he->stat_acc, period, weight);

/*
* This mem info was allocated from machine__resolve_mem
@@ -404,6 +418,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
rb_insert_color(&he->rb_node_in, hists->entries_in);
out:
he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
+ if (symbol_conf.cumulate_callchain)
+ he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
return he;
}

@@ -503,6 +519,8 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,

if (!cmp) {
he_stat__add_stat(&iter->stat, &he->stat);
+ if (symbol_conf.cumulate_callchain)
+ he_stat__add_stat(iter->stat_acc, he->stat_acc);

if (symbol_conf.use_callchain) {
callchain_cursor_reset(&callchain_cursor);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 43e5ff42a609..309f2838a1b4 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -81,6 +81,7 @@ struct hist_entry {
struct list_head head;
} pairs;
struct he_stat stat;
+ struct he_stat *stat_acc;
struct map_symbol ms;
struct thread *thread;
struct comm *comm;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 8a9d910c5345..66f429633804 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -92,6 +92,7 @@ struct symbol_conf {
show_nr_samples,
show_total_period,
use_callchain,
+ cumulate_callchain,
exclude_other,
show_cpu_utilization,
initialized,

Ingo Molnar

unread,
Dec 18, 2013, 4:50:02 AM12/18/13
to

* Namhyung Kim <namh...@kernel.org> wrote:

> I changed the option as a separate --cumulate and added a new
> "Total" column (and renamed the default "Overhead" column into
> "Self"). The output will be sorted by total (cumulative) overhead
> for now. The reason I changed to the --cumulate is that I still
> think it's much different from other --callchain options and I plan
> to add support for showing (remaining) callchains to cumulative
> entries too. The --callchain option will take care of it even with
> --cumulate option.

So I still think this is a fantastic feature, and while this variant
of the user interface is an improvement over the previous version, we
are not there yet ;-)

My main complaint that any variation of 'cumulative' or 'cumulate' is
a tongue-twister to users. I certainly won't be able to remember it
and will have to call up the manpage every time I use it - which will
be very annoying. I'd probably not use the feature much.

So lets approach this from the casual user's angle. Casual users don't
really remember twisted names for command line options, they remember
the big picture, big concepts, and they remember bits of the output:

> When the -g cumulative option is given, it'll be shown like this:
>
> $ perf report --cumulate --stdio
>
> # Self Total Command Shared Object Symbol
> # ........ ........ ....... ................. .......................
> #
> 0.00% 88.29% abc libc-2.17.so [.] __libc_start_main
> 0.00% 88.29% abc abc [.] main
> 0.00% 88.29% abc abc [.] c
> 0.00% 88.29% abc abc [.] b
> 88.29% 88.29% abc abc [.] a
> 0.00% 11.61% abc ld-2.17.so [.] _dl_sysdep_start

So the natural way to get this would something like:

perf report --total

Or, if '--total' does not feel good, maybe we should change 'Total' to
'Siblings' or 'Children', and propagate that naming through the UI:

$ perf report --children --stdio

# Self Children Command Shared Object Symbol
# ........ ........ ....... ................. .......................
#
0.00% 88.29% abc libc-2.17.so [.] __libc_start_main
0.00% 88.29% abc abc [.] main
0.00% 88.29% abc abc [.] c
0.00% 88.29% abc abc [.] b
88.29% 88.29% abc abc [.] a
0.00% 11.61% abc ld-2.17.so [.] _dl_sysdep_start

Which shows the sum of overhead of all child functions as well, not
just ourselves.

Anything but 'cumulative'. That word, beyond being a strange, hard to
remember Latin word, also does not tell the user (or the developer)
anything about _what_ is being accumulated. It could cover anything.

I also think this option should be enabled by default - lets see how
much people complain about that.

Okay?

Thanks,

Ingo

Arun Sharma

unread,
Dec 18, 2013, 5:40:02 AM12/18/13
to
On 12/18/13 3:16 PM, Ingo Molnar wrote:

> My main complaint that any variation of 'cumulative' or 'cumulate' is
> a tongue-twister to users. I certainly won't be able to remember it
> and will have to call up the manpage every time I use it - which will
> be very annoying. I'd probably not use the feature much.

I can remember it, mainly because it's such an unusual word :)
Agree that we could use something simpler/easier to remember.

My other user space projects have been keeping me busy. But still very
interested in this feature. Will test this patch series over the holidays.

-Arun

Namhyung Kim

unread,
Dec 18, 2013, 9:40:02 AM12/18/13
to
Hi Arun,

2013-12-18 (수), 16:08 +0530, Arun Sharma:
> On 12/18/13 3:16 PM, Ingo Molnar wrote:
>
> > My main complaint that any variation of 'cumulative' or 'cumulate' is
> > a tongue-twister to users. I certainly won't be able to remember it
> > and will have to call up the manpage every time I use it - which will
> > be very annoying. I'd probably not use the feature much.
>
> I can remember it, mainly because it's such an unusual word :)
> Agree that we could use something simpler/easier to remember.
>
> My other user space projects have been keeping me busy. But still very
> interested in this feature. Will test this patch series over the holidays.

Thanks in advance for your test and feedback! :)
Namhyung

Namhyung Kim

unread,
Dec 18, 2013, 9:40:02 AM12/18/13
to
2013-12-18 (수), 10:46 +0100, Ingo Molnar:
> * Namhyung Kim <namh...@kernel.org> wrote:
>
> > I changed the option as a separate --cumulate and added a new
> > "Total" column (and renamed the default "Overhead" column into
> > "Self"). The output will be sorted by total (cumulative) overhead
> > for now. The reason I changed to the --cumulate is that I still
> > think it's much different from other --callchain options and I plan
> > to add support for showing (remaining) callchains to cumulative
> > entries too. The --callchain option will take care of it even with
> > --cumulate option.
>
> So I still think this is a fantastic feature, and while this variant
> of the user interface is an improvement over the previous version, we
> are not there yet ;-)

Right. But I want to finish the core part of this patchset first. And
then I will work on the UI part since it'll be a quite large work by
itself, I guess.

>
> My main complaint that any variation of 'cumulative' or 'cumulate' is
> a tongue-twister to users. I certainly won't be able to remember it
> and will have to call up the manpage every time I use it - which will
> be very annoying. I'd probably not use the feature much.
>
> So lets approach this from the casual user's angle. Casual users don't
> really remember twisted names for command line options, they remember
> the big picture, big concepts, and they remember bits of the output:
>
> > When the -g cumulative option is given, it'll be shown like this:
> >
> > $ perf report --cumulate --stdio
> >
> > # Self Total Command Shared Object Symbol
> > # ........ ........ ....... ................. .......................
> > #
> > 0.00% 88.29% abc libc-2.17.so [.] __libc_start_main
> > 0.00% 88.29% abc abc [.] main
> > 0.00% 88.29% abc abc [.] c
> > 0.00% 88.29% abc abc [.] b
> > 88.29% 88.29% abc abc [.] a
> > 0.00% 11.61% abc ld-2.17.so [.] _dl_sysdep_start
>
> So the natural way to get this would something like:
>
> perf report --total
>
> Or, if '--total' does not feel good, maybe we should change 'Total' to
> 'Siblings' or 'Children', and propagate that naming through the UI:

Childrend looks better to me.

>
> $ perf report --children --stdio
>
> # Self Children Command Shared Object Symbol
> # ........ ........ ....... ................. .......................
> #
> 0.00% 88.29% abc libc-2.17.so [.] __libc_start_main
> 0.00% 88.29% abc abc [.] main
> 0.00% 88.29% abc abc [.] c
> 0.00% 88.29% abc abc [.] b
> 88.29% 88.29% abc abc [.] a
> 0.00% 11.61% abc ld-2.17.so [.] _dl_sysdep_start
>
> Which shows the sum of overhead of all child functions as well, not
> just ourselves.
>
> Anything but 'cumulative'. That word, beyond being a strange, hard to
> remember Latin word, also does not tell the user (or the developer)
> anything about _what_ is being accumulated. It could cover anything.
>
> I also think this option should be enabled by default - lets see how
> much people complain about that.
>
> Okay?

Okay. I'll make it default in the next spin.

Thanks,
Namhyung

Jiri Olsa

unread,
Dec 18, 2013, 10:40:03 AM12/18/13
to
On Wed, Dec 18, 2013 at 02:21:09PM +0900, Namhyung Kim wrote:
> From: Namhyung Kim <namhyu...@lge.com>
>
> If a hist entry doesn't have symbol information, compare it with its
> address. Currently it only compares its level or whether it's NULL.
> This can lead to an undesired result like an overhead exceeds 100%
> especially when callchain accumulation is enabled by later patch.
>
> Cc: Stephane Eranian <era...@google.com>
> Signed-off-by: Namhyung Kim <namh...@kernel.org>
> ---
> tools/perf/util/sort.c | 11 ++++++++---
> 1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index 8b0bb1f4494a..68a4fd2f505e 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -161,6 +161,11 @@ struct sort_entry sort_dso = {
>
> /* --sort symbol */
>
> +static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
> +{
> + return (int64_t)(right_ip - left_ip);
> +}
> +

what's the reason for the leading '_' in the name?

otherwise:

Acked-by: Jiri Olsa <jo...@redhat.com>

jirka

Jiri Olsa

unread,
Dec 18, 2013, 10:50:02 AM12/18/13
to
On Wed, Dec 18, 2013 at 02:21:11PM +0900, Namhyung Kim wrote:
> From: Namhyung Kim <namhyu...@lge.com>
>
> The @entry argument already has the info so no need to pass them.
>
> Signed-off-by: Namhyung Kim <namh...@kernel.org>

Acked-by: Jiri Olsa <jo...@redhat.com>

jirka

Jiri Olsa

unread,
Dec 18, 2013, 10:50:02 AM12/18/13
to
On Wed, Dec 18, 2013 at 02:21:10PM +0900, Namhyung Kim wrote:
> From: Namhyung Kim <namhyu...@lge.com>
>
> The commit 09600e0f9ebb ("perf tools: Compare dso's also when
> comparing symbols") added a comparison of dso when comparing symbol.
> But if the sort key already has dso, it doesn't need to do it again
> since entries have a different dso already filtered out.
>
> Signed-off-by: Namhyung Kim <namh...@kernel.org>

Acked-by: Jiri Olsa <jo...@redhat.com>

jirka

Jiri Olsa

unread,
Dec 18, 2013, 11:00:03 AM12/18/13
to
On Wed, Dec 18, 2013 at 02:21:12PM +0900, Namhyung Kim wrote:
> From: Namhyung Kim <namhyu...@lge.com>
>
> There're some duplicate code when adding hist entries. They are
> different in that some have branch info or mem info but generally do
> same thing. So introduce new struct add_entry_iter and add callbacks
> to customize each case in general way.
>
> The new perf_evsel__add_entry() function will look like:
>
> iter->prepare_entry();
> iter->add_single_entry();
>
> while (iter->next_entry())
> iter->add_next_entry();
>
> iter->finish_entry();
>
> This will help further work like the cumulative callchain patchset.

cool, I was waiting for this one! I have another user
for it, I'll test by rebasing my code ;-)

thanks,
jirka

Arnaldo Carvalho de Melo

unread,
Dec 18, 2013, 12:40:01 PM12/18/13
to
Em Wed, Dec 18, 2013 at 04:38:49PM +0100, Jiri Olsa escreveu:
> On Wed, Dec 18, 2013 at 02:21:09PM +0900, Namhyung Kim wrote:
> > From: Namhyung Kim <namhyu...@lge.com>
> >
> > If a hist entry doesn't have symbol information, compare it with its
> > address. Currently it only compares its level or whether it's NULL.
> > This can lead to an undesired result like an overhead exceeds 100%
> > especially when callchain accumulation is enabled by later patch.
> >
> > Cc: Stephane Eranian <era...@google.com>
> > Signed-off-by: Namhyung Kim <namh...@kernel.org>
> > ---
> > tools/perf/util/sort.c | 11 ++++++++---
> > 1 file changed, 8 insertions(+), 3 deletions(-)
> >
> > diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> > index 8b0bb1f4494a..68a4fd2f505e 100644
> > --- a/tools/perf/util/sort.c
> > +++ b/tools/perf/util/sort.c
> > @@ -161,6 +161,11 @@ struct sort_entry sort_dso = {
> >
> > /* --sort symbol */
> >
> > +static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
> > +{
> > + return (int64_t)(right_ip - left_ip);
> > +}
> > +
>
> what's the reason for the leading '_' in the name?

Yeah, I'm curious as well, the convention is to only use double _ in
front of functions when it does a little less than a function of the
same name without such prefix, like locking, etc.

- Arnaldo

Arnaldo Carvalho de Melo

unread,
Dec 18, 2013, 12:40:02 PM12/18/13
to
Em Wed, Dec 18, 2013 at 02:35:28PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Wed, Dec 18, 2013 at 04:38:49PM +0100, Jiri Olsa escreveu:
> > > +static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
> > > +{
> > > + return (int64_t)(right_ip - left_ip);
> > > +}
> > > +
> >
> > what's the reason for the leading '_' in the name?
>
> Yeah, I'm curious as well, the convention is to only use double _ in
> front of functions when it does a little less than a function of the
> same name without such prefix, like locking, etc.
>
> - Arnaldo
>
> > otherwise:
> >
> > Acked-by: Jiri Olsa <jo...@redhat.com>

Yeah, I'll apply it, it just keeps whatever convention that is there
already. I'll take a stab at fixing it all up after merging this
--whatever-the-cumulate-option-becomes new code :-)

- Arnaldo

Arnaldo Carvalho de Melo

unread,
Dec 18, 2013, 12:50:02 PM12/18/13
to
Em Wed, Dec 18, 2013 at 11:37:49PM +0900, Namhyung Kim escreveu:
> 2013-12-18 (수), 10:46 +0100, Ingo Molnar:
> > * Namhyung Kim <namh...@kernel.org> wrote:

> > > I changed the option as a separate --cumulate and added a new
> > > "Total" column (and renamed the default "Overhead" column into
> > > "Self"). The output will be sorted by total (cumulative) overhead
> > > for now. The reason I changed to the --cumulate is that I still
> > > think it's much different from other --callchain options and I
> > > plan to add support for showing (remaining) callchains to
> > > cumulative entries too. The --callchain option will take care of
> > > it even with --cumulate option.

> > So I still think this is a fantastic feature, and while this variant
> > of the user interface is an improvement over the previous version, we
> > are not there yet ;-)
>
> Right. But I want to finish the core part of this patchset first. And
> then I will work on the UI part since it'll be a quite large work by
> itself, I guess.

I got the first few ones, that are not really related to this, to reduce
the patchkit size, will go thru the others next.

> > So the natural way to get this would something like:
> >
> > perf report --total
> >
> > Or, if '--total' does not feel good, maybe we should change 'Total' to
> > 'Siblings' or 'Children', and propagate that naming through the UI:
>
> Childrend looks better to me.

--children, ack

Namhyung Kim

unread,
Dec 19, 2013, 2:20:01 AM12/19/13
to
Hi Jiri,

On Wed, 18 Dec 2013 16:50:40 +0100, Jiri Olsa wrote:
> On Wed, Dec 18, 2013 at 02:21:12PM +0900, Namhyung Kim wrote:
>> From: Namhyung Kim <namhyu...@lge.com>
>>
>> There're some duplicate code when adding hist entries. They are
>> different in that some have branch info or mem info but generally do
>> same thing. So introduce new struct add_entry_iter and add callbacks
>> to customize each case in general way.
>>
>> The new perf_evsel__add_entry() function will look like:
>>
>> iter->prepare_entry();
>> iter->add_single_entry();
>>
>> while (iter->next_entry())
>> iter->add_next_entry();
>>
>> iter->finish_entry();
>>
>> This will help further work like the cumulative callchain patchset.
>
> cool, I was waiting for this one! I have another user
> for it, I'll test by rebasing my code ;-)

Great! I'd look forward to your feedback! :)

Thanks,
Namhyung

Namhyung Kim

unread,
Dec 19, 2013, 2:20:01 AM12/19/13
to
Hi Arnaldo,

On Wed, 18 Dec 2013 14:39:07 -0300, Arnaldo Carvalho de Melo wrote:
> Em Wed, Dec 18, 2013 at 02:35:28PM -0300, Arnaldo Carvalho de Melo escreveu:
>> Em Wed, Dec 18, 2013 at 04:38:49PM +0100, Jiri Olsa escreveu:
>> > > +static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
>> > > +{
>> > > + return (int64_t)(right_ip - left_ip);
>> > > +}
>> > > +
>> >
>> > what's the reason for the leading '_' in the name?
>>
>> Yeah, I'm curious as well, the convention is to only use double _ in
>> front of functions when it does a little less than a function of the
>> same name without such prefix, like locking, etc.
>>
>> - Arnaldo
>>
>> > otherwise:
>> >
>> > Acked-by: Jiri Olsa <jo...@redhat.com>
>
> Yeah, I'll apply it, it just keeps whatever convention that is there
> already.

Exactly. It's not a top-level sort/compare function, hence the leading
'_', I guess.


> I'll take a stab at fixing it all up after merging this
> --whatever-the-cumulate-option-becomes new code :-)

Thank you!
Namhyung

Namhyung Kim

unread,
Dec 19, 2013, 2:30:01 AM12/19/13
to
On Wed, 18 Dec 2013 14:47:15 -0300, Arnaldo Carvalho de Melo wrote:
> Em Wed, Dec 18, 2013 at 11:37:49PM +0900, Namhyung Kim escreveu:
>> 2013-12-18 (수), 10:46 +0100, Ingo Molnar:
>> > * Namhyung Kim <namh...@kernel.org> wrote:
>
>> > > I changed the option as a separate --cumulate and added a new
>> > > "Total" column (and renamed the default "Overhead" column into
>> > > "Self"). The output will be sorted by total (cumulative) overhead
>> > > for now. The reason I changed to the --cumulate is that I still
>> > > think it's much different from other --callchain options and I
>> > > plan to add support for showing (remaining) callchains to
>> > > cumulative entries too. The --callchain option will take care of
>> > > it even with --cumulate option.
>
>> > So I still think this is a fantastic feature, and while this variant
>> > of the user interface is an improvement over the previous version, we
>> > are not there yet ;-)
>>
>> Right. But I want to finish the core part of this patchset first. And
>> then I will work on the UI part since it'll be a quite large work by
>> itself, I guess.
>
> I got the first few ones, that are not really related to this, to reduce
> the patchkit size, will go thru the others next.

Right. I missed to mention that they're independent cleanups, thanks.

>
>> > So the natural way to get this would something like:
>> >
>> > perf report --total
>> >
>> > Or, if '--total' does not feel good, maybe we should change 'Total' to
>> > 'Siblings' or 'Children', and propagate that naming through the UI:
>>
>> Childrend looks better to me.
>
> --children, ack

Okay.

Arun Sharma

unread,
Dec 23, 2013, 4:20:02 AM12/23/13
to
On 12/18/13 8:09 PM, Namhyung Kim wrote:
> Hi Arun,
>
> 2013-12-18 (수), 16:08 +0530, Arun Sharma:
>> On 12/18/13 3:16 PM, Ingo Molnar wrote:
>>
>>> My main complaint that any variation of 'cumulative' or 'cumulate' is
>>> a tongue-twister to users. I certainly won't be able to remember it
>>> and will have to call up the manpage every time I use it - which will
>>> be very annoying. I'd probably not use the feature much.
>>
>> I can remember it, mainly because it's such an unusual word :)
>> Agree that we could use something simpler/easier to remember.
>>
>> My other user space projects have been keeping me busy. But still very
>> interested in this feature. Will test this patch series over the holidays.
>
> Thanks in advance for your test and feedback! :)

Looks great! One of the features I'm missing compared to my earlier
--sort inclusive series of patches is that, in the --children mode, I'd
still like to see callchains (at least in --tui). Looks like --children
and -G -s pid can't be used together in this implementation.

-Arun

Namhyung Kim

unread,
Dec 24, 2013, 3:00:01 AM12/24/13
to
Hi Arun,

On Mon, 23 Dec 2013 14:46:17 +0530, Arun Sharma wrote:
> On 12/18/13 8:09 PM, Namhyung Kim wrote:
>> Hi Arun,
>>
>> 2013-12-18 (수), 16:08 +0530, Arun Sharma:
>>> On 12/18/13 3:16 PM, Ingo Molnar wrote:
>>>
>>>> My main complaint that any variation of 'cumulative' or 'cumulate' is
>>>> a tongue-twister to users. I certainly won't be able to remember it
>>>> and will have to call up the manpage every time I use it - which will
>>>> be very annoying. I'd probably not use the feature much.
>>>
>>> I can remember it, mainly because it's such an unusual word :)
>>> Agree that we could use something simpler/easier to remember.
>>>
>>> My other user space projects have been keeping me busy. But still very
>>> interested in this feature. Will test this patch series over the holidays.
>>
>> Thanks in advance for your test and feedback! :)
>
> Looks great! One of the features I'm missing compared to my earlier
> --sort inclusive series of patches is that, in the --children mode,
> I'd still like to see callchains (at least in --tui). Looks like
> --children and -G -s pid can't be used together in this
> implementation.

Right, it wasn't supported in this version. But I have a plan to add it
in the near future. :)

Thanks,
Namhyung

tip-bot for Namhyung Kim

unread,
Jan 12, 2014, 1:40:03 PM1/12/14
to
Commit-ID: 2037be53b2bceac3c2e648b8ff3fd62e21af2d35
Gitweb: http://git.kernel.org/tip/2037be53b2bceac3c2e648b8ff3fd62e21af2d35
Author: Namhyung Kim <namhyu...@lge.com>
AuthorDate: Wed, 18 Dec 2013 14:21:09 +0900
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 18 Dec 2013 14:42:30 -0300

perf sort: Compare addresses if no symbol info

If a hist entry doesn't have symbol information, compare it with its
address. Currently it only compares its level or whether it's NULL.

This can lead to an undesired result like an overhead exceeds 100%
especially when callchain accumulation is enabled by later patch.

Signed-off-by: Namhyung Kim <namh...@kernel.org>
Acked-by: Jiri Olsa <jo...@redhat.com>
Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Cc: Ingo Molnar <mi...@kernel.org>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Cc: Rodrigo Campos <rod...@sdfg.com.ar>
Cc: Stephane Eranian <era...@google.com>
Link: http://lkml.kernel.org/r/1387344086-12744-2-gi...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/sort.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 8b0bb1f..68a4fd2 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -161,6 +161,11 @@ struct sort_entry sort_dso = {

/* --sort symbol */

+static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
+{
+ return (int64_t)(right_ip - left_ip);
+}
+
static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
{
u64 ip_l, ip_r;
@@ -183,7 +188,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
int64_t ret;

if (!left->ms.sym && !right->ms.sym)
- return right->level - left->level;
+ return _sort__addr_cmp(left->ip, right->ip);

/*
* comparing symbol address alone is not enough since it's a
@@ -372,7 +377,7 @@ sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
struct addr_map_symbol *from_r = &right->branch_info->from;

if (!from_l->sym && !from_r->sym)
- return right->level - left->level;
+ return _sort__addr_cmp(from_l->addr, from_r->addr);

return _sort__sym_cmp(from_l->sym, from_r->sym);
}
@@ -384,7 +389,7 @@ sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
struct addr_map_symbol *to_r = &right->branch_info->to;

if (!to_l->sym && !to_r->sym)
- return right->level - left->level;
+ return _sort__addr_cmp(to_l->addr, to_r->addr);

return _sort__sym_cmp(to_l->sym, to_r->sym);

tip-bot for Namhyung Kim

unread,
Jan 12, 2014, 1:40:03 PM1/12/14
to
Commit-ID: f1cbf78d175e6202a29f53a7f915520e40a37baf
Gitweb: http://git.kernel.org/tip/f1cbf78d175e6202a29f53a7f915520e40a37baf
Author: Namhyung Kim <namhyu...@lge.com>
AuthorDate: Wed, 18 Dec 2013 14:21:11 +0900
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 18 Dec 2013 14:44:05 -0300

perf hists: Do not pass period and weight to add_hist_entry()

The @entry argument already has the info so no need to pass them.

Signed-off-by: Namhyung Kim <namh...@kernel.org>
Acked-by: Jiri Olsa <jo...@redhat.com>
Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Cc: Ingo Molnar <mi...@kernel.org>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Cc: Rodrigo Campos <rod...@sdfg.com.ar>
Link: http://lkml.kernel.org/r/1387344086-12744-4-gi...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/hist.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 822903e..63234e3 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -342,15 +342,15 @@ static u8 symbol__parent_filter(const struct symbol *parent)
}

static struct hist_entry *add_hist_entry(struct hists *hists,
- struct hist_entry *entry,
- struct addr_location *al,
- u64 period,
- u64 weight)
+ struct hist_entry *entry,
+ struct addr_location *al)
{
struct rb_node **p;
struct rb_node *parent = NULL;
struct hist_entry *he;
int64_t cmp;
+ u64 period = entry->stat.period;
+ u64 weight = entry->stat.weight;

p = &hists->entries_in->rb_node;

@@ -437,7 +437,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
.transaction = transaction,
};

- return add_hist_entry(hists, &entry, al, period, weight);
+ return add_hist_entry(hists, &entry, al);
}

int64_t
--

tip-bot for Namhyung Kim

unread,
Jan 12, 2014, 1:40:04 PM1/12/14
to
Commit-ID: 68f6d0224b2a19a4da4a12a5081f01776e5150df
Gitweb: http://git.kernel.org/tip/68f6d0224b2a19a4da4a12a5081f01776e5150df
Author: Namhyung Kim <namhyu...@lge.com>
AuthorDate: Wed, 18 Dec 2013 14:21:10 +0900
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 18 Dec 2013 14:43:04 -0300

perf sort: Do not compare dso again

The commit 09600e0f9ebb ("perf tools: Compare dso's also when comparing
symbols") added a comparison of dso when comparing symbol.

But if the sort key already has dso, it doesn't need to do it again
since entries have a different dso already filtered out.

Signed-off-by: Namhyung Kim <namh...@kernel.org>
Acked-by: Jiri Olsa <jo...@redhat.com>
Cc: Arun Sharma <ash...@fb.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Cc: Ingo Molnar <mi...@kernel.org>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Cc: Rodrigo Campos <rod...@sdfg.com.ar>
Link: http://lkml.kernel.org/r/1387344086-12744-3-gi...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/sort.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 68a4fd2..635cd8f 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -13,6 +13,7 @@ int have_ignore_callees = 0;
int sort__need_collapse = 0;
int sort__has_parent = 0;
int sort__has_sym = 0;
+int sort__has_dso = 0;
enum sort_mode sort__mode = SORT_MODE__NORMAL;

enum sort_type sort__first_dimension;
@@ -194,9 +195,11 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
* comparing symbol address alone is not enough since it's a
* relative address within a dso.
*/
- ret = sort__dso_cmp(left, right);
- if (ret != 0)
- return ret;
+ if (!sort__has_dso) {
+ ret = sort__dso_cmp(left, right);
+ if (ret != 0)
+ return ret;
+ }

return _sort__sym_cmp(left->ms.sym, right->ms.sym);
}
@@ -1061,6 +1064,8 @@ int sort_dimension__add(const char *tok)
sort__has_parent = 1;
} else if (sd->entry == &sort_sym) {
sort__has_sym = 1;
+ } else if (sd->entry == &sort_dso) {
+ sort__has_dso = 1;
}

__sort_dimension__add(sd, i);
0 new messages