Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH 20/61] perf c2c report: Add cacheline hists processing

88 views
Skip to first unread message

Jiri Olsa

unread,
Sep 19, 2016, 9:20:06 AM9/19/16
to
Store cacheline related entries in nested hist
object for each cacheline data. Nested entries
are sorted by 'offset' within related cacheline.

We will allow specific sort keys to be configured
for nested cacheline data entries in following
patches.

Link: http://lkml.kernel.org/n/tip-37f751rgqa...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 90 ++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 84 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 29fb9573e292..cd0406ab8b5d 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -61,6 +61,32 @@ static struct hist_entry_ops c2c_entry_ops = {
.free = c2c_he_free,
};

+static int c2c_hists__init(struct c2c_hists *hists,
+ const char *sort);
+
+static struct hists*
+he__get_hists(struct hist_entry *he,
+ const char *sort)
+{
+ struct c2c_hist_entry *c2c_he;
+ struct c2c_hists *hists;
+ int ret;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ if (c2c_he->hists)
+ return &c2c_he->hists->hists;
+
+ hists = c2c_he->hists = zalloc(sizeof(*hists));
+ if (!hists)
+ return NULL;
+
+ ret = c2c_hists__init(hists, sort);
+ if (ret)
+ free(hists);
+
+ return &hists->hists;
+}
+
static int process_sample_event(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -70,7 +96,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
struct hists *hists = &c2c.hists.hists;
struct hist_entry *he;
struct addr_location al;
- struct mem_info *mi;
+ struct mem_info *mi, *mi_dup;
int ret;

if (machine__resolve(machine, &al, sample) < 0) {
@@ -83,19 +109,50 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (mi == NULL)
return -ENOMEM;

+ mi_dup = memdup(mi, sizeof(*mi));
+ if (!mi_dup)
+ goto free_mi;
+
he = hists__add_entry_ops(hists, &c2c_entry_ops,
&al, NULL, NULL, mi,
sample, true);
- if (he == NULL) {
- free(mi);
- return -ENOMEM;
- }
+ if (he == NULL)
+ goto free_mi_dup;

hists__inc_nr_samples(hists, he->filtered);
ret = hist_entry__append_callchain(he, sample);

+ if (!ret) {
+ mi = mi_dup;
+
+ mi_dup = memdup(mi, sizeof(*mi));
+ if (!mi_dup)
+ goto free_mi;
+
+ hists = he__get_hists(he, "offset");
+ if (!hists)
+ goto free_mi_dup;
+
+ he = hists__add_entry_ops(hists, &c2c_entry_ops,
+ &al, NULL, NULL, mi,
+ sample, true);
+ if (he == NULL)
+ goto free_mi_dup;
+
+ hists__inc_nr_samples(hists, he->filtered);
+ ret = hist_entry__append_callchain(he, sample);
+ }
+
+out:
addr_location__put(&al);
return ret;
+
+free_mi_dup:
+ free(mi_dup);
+free_mi:
+ free(mi);
+ ret = -ENOMEM;
+ goto out;
}

static struct perf_c2c c2c = {
@@ -400,6 +457,27 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
return hpp_list__parse(&c2c_hists->list, output, sort);
}

+static int filter_cb(struct hist_entry *he __maybe_unused)
+{
+ return 0;
+}
+
+static int resort_cl_cb(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ struct c2c_hists *c2c_hists;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ c2c_hists = c2c_he->hists;
+
+ if (c2c_hists) {
+ hists__collapse_resort(&c2c_hists->hists, NULL);
+ hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb);
+ }
+
+ return 0;
+}
+
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
@@ -458,7 +536,7 @@ static int perf_c2c__report(int argc, const char **argv)
ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");

hists__collapse_resort(&c2c.hists.hists, NULL);
- hists__output_resort(&c2c.hists.hists, &prog);
+ hists__output_resort_cb(&c2c.hists.hists, &prog, resort_cl_cb);

ui_progress__finish();

--
2.7.4

Jiri Olsa

unread,
Sep 19, 2016, 9:20:06 AM9/19/16
to
hi,
sending new version of c2c patches (v3) originally posted in here:
http://lwn.net/Articles/588866/

I took the old set and reworked it to fit into current upstream code.
It follows the same logic as original patch and provides (almost) the
same stdio interface. In addition new TUI interface was added.

The perf c2c tool provides means for Shared Data C2C/HITM analysis.
It allows you to track down the cacheline contentions. The tool is
based on x86's load latency and precise store facility events provided
by Intel CPUs.

The tool was tested by Joe Mario and has proven to be useful and found
some cachelines contentions. Joe also wrote a blog about c2c tool with
examples located in here:

https://joemario.github.io/blog/2016/09/01/c2c-blog/

Code is also available in:
git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
perf/c2c

Testing:
$ perf c2c record -a [workload]
$ perf c2c report [--stdio]
$ man perf-c2c

It's most likely you won't generate any remote HITMs on common
laptops, so to get results for local HITMs please use:

$ perf c2c report -d lcl [--stdio]


thanks,
jirka


Cc: "Michael Trapp" <michae...@sap.com>
Cc: "Long, Wai Man" <waima...@hpe.com>
---
Jiri Olsa (61):
perf symbols: Do not open device files again
perf tools: Remove superfluous initialization of weight
perf tools: Make hist_entry__snprintf work over struct perf_hpp_list
perf tools: Use bigger buffer for stdio headers
perf tools: Introduce c2c_decode_stats function
perf tools: Introduce c2c_add_stats function
perf tools: Make reset_dimensions global
perf tools: Make output_field_add and sort_dimension__add global
perf tools: Make several sorting functions global
perf tools: Make several display functions global
perf tools: Make hist_entry__snprintf function global
perf tools: Make hists__fprintf_headers function global
perf c2c: Add c2c command
perf c2c: Add record subcommand
perf c2c: Add report subcommand
perf c2c report: Add dimension support
perf c2c report: Add sort_entry dimension support
perf c2c report: Fallback to standard dimensions
perf c2c report: Add sample processing
perf c2c report: Add cacheline hists processing
perf c2c report: Decode c2c_stats for hist entries
perf c2c report: Add header macros
perf c2c report: Add dcacheline dimension key
perf c2c report: Add offset dimension key
perf c2c report: Add iaddr dimension key
perf c2c report: Add hitm related dimension keys
perf c2c report: Add stores related dimension keys
perf c2c report: Add loads related dimension keys
perf c2c report: Add llc and remote loads related dimension keys
perf c2c report: Add llc load miss dimension key
perf c2c report: Add total record sort key
perf c2c report: Add total loads sort key
perf c2c report: Add hitm percent sort key
perf c2c report: Add hitm/store percent related sort keys
perf c2c report: Add dram related sort keys
perf c2c report: Add pid sort key
perf c2c report: Add tid sort key
perf c2c report: Add symbol and dso sort keys
perf c2c report: Add node sort key
perf c2c report: Add stats related sort keys
perf c2c report: Add cpu cnt sort key
perf c2c report: Add src line sort key
perf c2c report: Setup number of header lines for hists
perf c2c report: Set final resort fields
perf c2c report: Add stdio output support
perf c2c report: Add main browser
perf c2c report: Add cacheline browser
perf c2c report: Add global stats stdio output
perf c2c report: Add shared cachelines stats stdio output
perf c2c report: Add c2c related stats stdio output
perf c2c report: Allow to report callchains
perf c2c report: Limit the cachelines table entries
perf c2c report: Add support to choose local HITMs
perf c2c report: Allow to set cacheline sort fields
perf c2c report: Recalc width of global sort entries
perf c2c report: Add cacheline index entry
perf c2c report: Add support to manage symbol name length
perf c2c report: Iterate node display in browser
perf c2c report: Add help windows
perf c2c: Add man page and credits
perf tools: Fix width computation for srcline sort entry

tools/perf/Build | 1 +
tools/perf/Documentation/perf-c2c.txt | 276 ++++
tools/perf/builtin-c2c.c | 2732 +++++++++++++++++++++++++++++++++
tools/perf/builtin.h | 1 +
tools/perf/perf.c | 1 +
tools/perf/ui/browsers/hists.c | 4 +-
tools/perf/ui/browsers/hists.h | 1 +
tools/perf/ui/hist.c | 2 +-
tools/perf/ui/stdio/hist.c | 11 +-
tools/perf/util/dso.c | 8 +-
tools/perf/util/evsel.c | 2 -
tools/perf/util/hist.c | 7 +-
tools/perf/util/hist.h | 6 +
tools/perf/util/mem-events.c | 128 ++
tools/perf/util/mem-events.h | 37 +
tools/perf/util/sort.c | 18 +-
tools/perf/util/sort.h | 13 +
tools/perf/util/symbol.c | 3 -
18 files changed, 3222 insertions(+), 29 deletions(-)
create mode 100644 tools/perf/Documentation/perf-c2c.txt
create mode 100644 tools/perf/builtin-c2c.c

Jiri Olsa

unread,
Sep 19, 2016, 9:20:11 AM9/19/16
to
Will be used from external places in following patches.

Link: http://lkml.kernel.org/n/tip-uip4x9u74t...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/ui/stdio/hist.c | 4 ++--
tools/perf/util/hist.h | 2 ++
2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 0a32b48eda80..3434d571ddd1 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -373,8 +373,8 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
return 0;
}

-static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
- struct perf_hpp_list *hpp_list)
+int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list)
{
const char *sep = symbol_conf.field_sep;
struct perf_hpp_fmt *fmt;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ef9985cba1de..aa5ddfa1fa22 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -486,5 +486,7 @@ static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
+int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list);

#endif /* __PERF_HIST_H */
--
2.7.4

Jiri Olsa

unread,
Sep 19, 2016, 9:20:15 AM9/19/16
to
Set resort/display fields for both cachelines and
single cacheline displays.

Cachelines are sorted on:
rmt_hitm

will be made configurable in following patches.

Following fields are display for cachelines:
dcacheline
tot_recs
percent_hitm
tot_hitm,lcl_hitm,rmt_hitm
stores,stores_l1hit,stores_l1miss
dram_lcl,dram_rmt
ld_llcmiss
tot_loads
ld_fbhit,ld_l1hit,ld_l2hit
ld_lclhit,ld_rmthit

The single cacheline is sort by:
offset,rmt_hitm,lcl_hitm

will be made configurable in following patches.

Following fields are display for each cacheline:
percent_rmt_hitm
percent_lcl_hitm
percent_stores_l1hit
percent_stores_l1miss
offset
pid
tid
mean_rmt
mean_lcl
mean_load
cpucnt
symbol
dso
node

Link: http://lkml.kernel.org/n/tip-0rclftliyw...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index f0983d2b26e3..d7b47c69aa07 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1642,6 +1642,23 @@ static int resort_cl_cb(struct hist_entry *he)
c2c_hists = c2c_he->hists;

if (c2c_hists) {
+ c2c_hists__reinit(c2c_hists,
+ "percent_rmt_hitm,"
+ "percent_lcl_hitm,"
+ "percent_stores_l1hit,"
+ "percent_stores_l1miss,"
+ "offset,"
+ "pid,"
+ "tid,"
+ "mean_rmt,"
+ "mean_lcl,"
+ "mean_load,"
+ "cpucnt,"
+ "symbol,"
+ "dso,"
+ "node",
+ "offset,rmt_hitm,lcl_hitm");
+
hists__collapse_resort(&c2c_hists->hists, NULL);
hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb);
}
@@ -1774,6 +1791,20 @@ static int perf_c2c__report(int argc, const char **argv)
goto out_session;
}

+ c2c_hists__reinit(&c2c.hists,
+ "dcacheline,"
+ "tot_recs,"
+ "percent_hitm,"
+ "tot_hitm,lcl_hitm,rmt_hitm,"
+ "stores,stores_l1hit,stores_l1miss,"
+ "dram_lcl,dram_rmt,"
+ "ld_llcmiss,"
+ "tot_loads,"
+ "ld_fbhit,ld_l1hit,ld_l2hit,"
+ "ld_lclhit,ld_rmthit",
+ "rmt_hitm"
+ );
+
ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");

hists__collapse_resort(&c2c.hists.hists, NULL);
--
2.7.4

Jiri Olsa

unread,
Sep 19, 2016, 9:20:18 AM9/19/16
to
Using resort callbacks to compute the columns' width.

Computing only the global ones, c2c entries have fixed
width only.

Link: http://lkml.kernel.org/n/tip-zyayvq2u3d...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 12 ++++++++++++
1 file changed, 12 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index ff8a66ee7092..c93a766190b1 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1726,11 +1726,21 @@ static inline int valid_hitm_or_store(struct hist_entry *he)
return has_hitm || c2c_he->stats.store;
}

+static void calc_width(struct hist_entry *he)
+{
+ struct c2c_hists *c2c_hists;
+
+ c2c_hists = container_of(he->hists, struct c2c_hists, hists);
+ hists__calc_col_len(&c2c_hists->hists, he);
+}
+
static int filter_cb(struct hist_entry *he)
{
if (c2c.show_src && !he->srcline)
he->srcline = hist_entry__get_srcline(he);

+ calc_width(he);
+
if (!valid_hitm_or_store(he))
he->filtered = HIST_FILTER__C2C;

@@ -1746,6 +1756,8 @@ static int resort_cl_cb(struct hist_entry *he)
c2c_he = container_of(he, struct c2c_hist_entry, he);
c2c_hists = c2c_he->hists;

+ calc_width(he);
+
if (display && c2c_hists) {
c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort);

--
2.7.4

Jiri Olsa

unread,
Sep 19, 2016, 9:30:08 AM9/19/16
to
Adding dcacheline dimension key support. It
displays cacheline address as hex number.

Using c2c wrapper to standard 'dcacheline' object
to defined own header and simple (just address)
cacheline output.

Link: http://lkml.kernel.org/n/tip-j5enppr8e7...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c21124e6bb63..060ee1050da9 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1,5 +1,6 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/stringify.h>
#include "util.h"
#include "debug.h"
#include "builtin.h"
@@ -7,6 +8,7 @@
#include "mem-events.h"
#include "session.h"
#include "hist.h"
+#include "sort.h"
#include "tool.h"
#include "data.h"
#include "sort.h"
@@ -271,6 +273,33 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
return scnprintf(hpp->buf, hpp->size, "%*s", width, text);
}

+static char *hex_str(u64 val)
+{
+ static char buf[20];
+
+ snprintf(buf, 20, "0x%" PRIx64, val);
+ return buf;
+}
+
+static int64_t
+dcacheline_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return sort__dcacheline_cmp(left, right);
+}
+
+static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ uint64_t addr = 0;
+ int width = c2c_width(fmt, hpp, he->hists);
+
+ if (he->mem_info)
+ addr = cl_address(he->mem_info->daddr.addr);
+
+ return snprintf(hpp->buf, hpp->size, "%*s", width, hex_str(addr));
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -306,12 +335,21 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
}, \
}

+static struct c2c_dimension dim_dcacheline = {
+ .header = HEADER_LOW("Cacheline"),
+ .name = "dcacheline",
+ .cmp = dcacheline_cmp,
+ .entry = dcacheline_entry,
+ .width = 18,
+};
+
#undef HEADER_LOW
#undef HEADER_BOTH
#undef HEADER_SPAN
#undef HEADER_SPAN_LOW

static struct c2c_dimension *dimensions[] = {
+ &dim_dcacheline,
NULL,
};

--
2.7.4

Jiri Olsa

unread,
Sep 19, 2016, 9:30:12 AM9/19/16
to
Adding c2c command base wirings. Its implementation
is going to be added gradually in following patches.

Link: http://lkml.kernel.org/n/tip-svq2kccqja...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/Build | 1 +
tools/perf/builtin-c2c.c | 23 +++++++++++++++++++++++
tools/perf/builtin.h | 1 +
tools/perf/perf.c | 1 +
4 files changed, 26 insertions(+)
create mode 100644 tools/perf/builtin-c2c.c

diff --git a/tools/perf/Build b/tools/perf/Build
index a43fae7f439a..b12d5d1666e3 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -21,6 +21,7 @@ perf-y += builtin-inject.o
perf-y += builtin-mem.o
perf-y += builtin-data.o
perf-y += builtin-version.o
+perf-y += builtin-c2c.o

perf-$(CONFIG_AUDIT) += builtin-trace.o
perf-$(CONFIG_LIBELF) += builtin-probe.o
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
new file mode 100644
index 000000000000..8252ed0ba5d0
--- /dev/null
+++ b/tools/perf/builtin-c2c.c
@@ -0,0 +1,23 @@
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include "util.h"
+#include "debug.h"
+#include "builtin.h"
+#include <subcmd/parse-options.h>
+
+static const char * const c2c_usage[] = {
+ "perf c2c",
+ NULL
+};
+
+int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+ const struct option c2c_options[] = {
+ OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+ OPT_END()
+ };
+
+ argc = parse_options(argc, argv, c2c_options, c2c_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ return 0;
+}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 41c24010ab43..0bcf68e98ccc 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -18,6 +18,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix);
int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
int cmd_buildid_list(int argc, const char **argv, const char *prefix);
int cmd_config(int argc, const char **argv, const char *prefix);
+int cmd_c2c(int argc, const char **argv, const char *prefix);
int cmd_diff(int argc, const char **argv, const char *prefix);
int cmd_evlist(int argc, const char **argv, const char *prefix);
int cmd_help(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 64c06961bfe4..aa23b3347d6b 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -43,6 +43,7 @@ static struct cmd_struct commands[] = {
{ "buildid-cache", cmd_buildid_cache, 0 },
{ "buildid-list", cmd_buildid_list, 0 },
{ "config", cmd_config, 0 },
+ { "c2c", cmd_c2c, 0 },
{ "diff", cmd_diff, 0 },
{ "evlist", cmd_evlist, 0 },
{ "help", cmd_help, 0 },
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:40:06 AM9/22/16
to
Introducing c2c_decode_stats function, which decodes
data_src data into new struct c2c_stats.

Original-patch-by: Dick Fowles <rfo...@redhat.com>
Original-patch-by: Don Zickus <dzi...@redhat.com>
Link: http://lkml.kernel.org/n/tip-7garqfmx5i...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/util/mem-events.c | 98 ++++++++++++++++++++++++++++++++++++++++++++
tools/perf/util/mem-events.h | 36 ++++++++++++++++
2 files changed, 134 insertions(+)

diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index bbc368e7d1e4..502fcee91973 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -9,6 +9,7 @@
#include "mem-events.h"
#include "debug.h"
#include "symbol.h"
+#include "sort.h"

unsigned int perf_mem_events__loads_ldlat = 30;

@@ -268,3 +269,100 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_in

return i;
}
+
+int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
+{
+ union perf_mem_data_src *data_src = &mi->data_src;
+ u64 daddr = mi->daddr.addr;
+ u64 op = data_src->mem_op;
+ u64 lvl = data_src->mem_lvl;
+ u64 snoop = data_src->mem_snoop;
+ u64 lock = data_src->mem_lock;
+ int err = 0;
+
+#define P(a, b) PERF_MEM_##a##_##b
+
+ stats->nr_entries++;
+
+ if (lock & P(LOCK, LOCKED)) stats->locks++;
+
+ if (op & P(OP, LOAD)) {
+ /* load */
+ stats->load++;
+
+ if (!daddr) {
+ stats->ld_noadrs++;
+ return -1;
+ }
+
+ if (lvl & P(LVL, HIT)) {
+ if (lvl & P(LVL, UNC)) stats->ld_uncache++;
+ if (lvl & P(LVL, IO)) stats->ld_io++;
+ if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
+ if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
+ if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
+ if (lvl & P(LVL, L3 )) {
+ if (snoop & P(SNOOP, HITM))
+ stats->lcl_hitm++;
+ else
+ stats->ld_llchit++;
+ }
+
+ if (lvl & P(LVL, LOC_RAM)) {
+ stats->lcl_dram++;
+ if (snoop & P(SNOOP, HIT))
+ stats->ld_shared++;
+ else
+ stats->ld_excl++;
+ }
+
+ if ((lvl & P(LVL, REM_RAM1)) ||
+ (lvl & P(LVL, REM_RAM2))) {
+ stats->rmt_dram++;
+ if (snoop & P(SNOOP, HIT))
+ stats->ld_shared++;
+ else
+ stats->ld_excl++;
+ }
+ }
+
+ if ((lvl & P(LVL, REM_CCE1)) ||
+ (lvl & P(LVL, REM_CCE2))) {
+ if (snoop & P(SNOOP, HIT))
+ stats->rmt_hit++;
+ else if (snoop & P(SNOOP, HITM))
+ stats->rmt_hitm++;
+ }
+
+ if ((lvl & P(LVL, MISS)))
+ stats->ld_miss++;
+
+ } else if (op & P(OP, STORE)) {
+ /* store */
+ stats->store++;
+
+ if (!daddr) {
+ stats->st_noadrs++;
+ return -1;
+ }
+
+ if (lvl & P(LVL, HIT)) {
+ if (lvl & P(LVL, UNC)) stats->st_uncache++;
+ if (lvl & P(LVL, L1 )) stats->st_l1hit++;
+ }
+ if (lvl & P(LVL, MISS))
+ if (lvl & P(LVL, L1)) stats->st_l1miss++;
+ } else {
+ /* unparsable data_src? */
+ stats->noparse++;
+ return -1;
+ }
+
+ if (!mi->daddr.map || !mi->iaddr.map) {
+ stats->nomap++;
+ return -1;
+ }
+
+#undef P
+ return err;
+}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 7f69bf9d789d..e111a2a2b18f 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -2,6 +2,10 @@
#define __PERF_MEM_EVENTS_H

#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/types.h>
+#include "stat.h"

struct perf_mem_event {
bool record;
@@ -33,4 +37,36 @@ int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);

int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);

+struct c2c_stats {
+ u32 nr_entries;
+
+ u32 locks; /* count of 'lock' transactions */
+ u32 store; /* count of all stores in trace */
+ u32 st_uncache; /* stores to uncacheable address */
+ u32 st_noadrs; /* cacheable store with no address */
+ u32 st_l1hit; /* count of stores that hit L1D */
+ u32 st_l1miss; /* count of stores that miss L1D */
+ u32 load; /* count of all loads in trace */
+ u32 ld_excl; /* exclusive loads, rmt/lcl DRAM - snp none/miss */
+ u32 ld_shared; /* shared loads, rmt/lcl DRAM - snp hit */
+ u32 ld_uncache; /* loads to uncacheable address */
+ u32 ld_io; /* loads to io address */
+ u32 ld_miss; /* loads miss */
+ u32 ld_noadrs; /* cacheable load with no address */
+ u32 ld_fbhit; /* count of loads hitting Fill Buffer */
+ u32 ld_l1hit; /* count of loads that hit L1D */
+ u32 ld_l2hit; /* count of loads that hit L2D */
+ u32 ld_llchit; /* count of loads that hit LLC */
+ u32 lcl_hitm; /* count of loads with local HITM */
+ u32 rmt_hitm; /* count of loads with remote HITM */
+ u32 rmt_hit; /* count of loads with remote hit clean; */
+ u32 lcl_dram; /* count of loads miss to local DRAM */
+ u32 rmt_dram; /* count of loads miss to remote DRAM */
+ u32 nomap; /* count of load/stores with no phys adrs */
+ u32 noparse; /* count of unparsable data sources */
+};
+
+struct hist_entry;
+int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi);
+
#endif /* __PERF_MEM_EVENTS_H */
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:40:06 AM9/22/16
to
Add --call-graph option to properly setup callchain
code. Adding default settings to display callchains
whenever they are stored in the perf.data.

Link: http://lkml.kernel.org/n/tip-inykbom2f1...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 71b6850809a2..31e311959480 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -17,6 +17,7 @@
#include "evsel.h"
#include <asm/bug.h>
#include "ui/browsers/hists.h"
+#include "evlist.h"

struct c2c_hists {
struct hists hists;
@@ -181,6 +182,11 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
return -1;
}

+ ret = sample__resolve_callchain(sample, &callchain_cursor, NULL,
+ evsel, &al, sysctl_perf_event_max_stack);
+ if (ret)
+ goto out;
+
mi = sample__resolve_mem(sample, &al);
if (mi == NULL)
return -ENOMEM;
@@ -2114,6 +2120,58 @@ static void ui_quirks(void)
}
}

+#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
+
+const char callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
+ CALLCHAIN_REPORT_HELP
+ "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT;
+
+static int
+parse_callchain_opt(const struct option *opt, const char *arg, int unset)
+{
+ struct callchain_param *callchain = opt->value;
+
+ callchain->enabled = !unset;
+ /*
+ * --no-call-graph
+ */
+ if (unset) {
+ symbol_conf.use_callchain = false;
+ callchain->mode = CHAIN_NONE;
+ return 0;
+ }
+
+ return parse_callchain_report_opt(arg);
+}
+
+static int setup_callchain(struct perf_evlist *evlist)
+{
+ u64 sample_type = perf_evlist__combined_sample_type(evlist);
+ enum perf_call_graph_mode mode = CALLCHAIN_NONE;
+
+ if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+ (sample_type & PERF_SAMPLE_STACK_USER))
+ mode = CALLCHAIN_DWARF;
+ else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ mode = CALLCHAIN_LBR;
+ else if (sample_type & PERF_SAMPLE_CALLCHAIN)
+ mode = CALLCHAIN_FP;
+
+ if (!callchain_param.enabled &&
+ callchain_param.mode != CHAIN_NONE &&
+ mode != CALLCHAIN_NONE) {
+ symbol_conf.use_callchain = true;
+ if (callchain_register_param(&callchain_param) < 0) {
+ ui__error("Can't register callchain params.\n");
+ return -EINVAL;
+ }
+ }
+
+ callchain_param.record_mode = mode;
+ callchain_param.min_percent = 0;
+ return 0;
+}
+
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
@@ -2121,6 +2179,7 @@ static int perf_c2c__report(int argc, const char **argv)
struct perf_data_file file = {
.mode = PERF_DATA_MODE_READ,
};
+ char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
const struct option c2c_options[] = {
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
@@ -2135,6 +2194,10 @@ static int perf_c2c__report(int argc, const char **argv)
#endif
OPT_BOOLEAN(0, "stats", &c2c.stats_only,
"Use the stdio interface"),
+ OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
+ "print_type,threshold[,print_limit],order,sort_key[,branch],value",
+ callchain_help, &parse_callchain_opt,
+ callchain_default_opt),
OPT_END()
};
int err = 0;
@@ -2176,6 +2239,10 @@ static int perf_c2c__report(int argc, const char **argv)
goto out;
}

+ err = setup_callchain(session->evlist);
+ if (err)
+ goto out_session;
+
if (symbol__init(&session->header.env) < 0)
goto out_session;

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:40:07 AM9/22/16
to
Allow to setup number of header lines for c2c hists objects.

Link: http://lkml.kernel.org/n/tip-4ilsf0ulub...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 8fb798c8a790..e281930e57da 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -100,11 +100,13 @@ static struct hist_entry_ops c2c_entry_ops = {
};

static int c2c_hists__init(struct c2c_hists *hists,
- const char *sort);
+ const char *sort,
+ int nr_header_lines);

static struct c2c_hists*
he__get_c2c_hists(struct hist_entry *he,
- const char *sort)
+ const char *sort,
+ int nr_header_lines)
{
struct c2c_hist_entry *c2c_he;
struct c2c_hists *hists;
@@ -118,7 +120,7 @@ he__get_c2c_hists(struct hist_entry *he,
if (!hists)
return NULL;

- ret = c2c_hists__init(hists, sort);
+ ret = c2c_hists__init(hists, sort, nr_header_lines);
if (ret)
free(hists);

@@ -212,7 +214,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (!mi_dup)
goto free_mi;

- c2c_hists = he__get_c2c_hists(he, "offset");
+ c2c_hists = he__get_c2c_hists(he, "offset", 2);
if (!c2c_hists)
goto free_mi_dup;

@@ -1588,7 +1590,8 @@ static int hpp_list__parse(struct perf_hpp_list *hpp_list,
}

static int c2c_hists__init(struct c2c_hists *hists,
- const char *sort)
+ const char *sort,
+ int nr_header_lines)
{
__hists__init(&hists->hists, &hists->list);

@@ -1599,6 +1602,9 @@ static int c2c_hists__init(struct c2c_hists *hists,
*/
perf_hpp_list__init(&hists->list);

+ /* Overload number of header lines.*/
+ hists->list.nr_header_lines = nr_header_lines;
+
return hpp_list__parse(&hists->list, NULL, sort);
}

@@ -1727,7 +1733,7 @@ static int perf_c2c__report(int argc, const char **argv)

file.path = input_name;

- err = c2c_hists__init(&c2c.hists, "dcacheline");
+ err = c2c_hists__init(&c2c.hists, "dcacheline", 2);
if (err) {
pr_debug("Failed to initialize hists\n");
goto out;
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:40:07 AM9/22/16
to
Display global shared cachelines related stats table as part
of the stdio output or when --stats option is speicified:

$ perf c2c report --stats
...
=================================================
Global Shared Cache Line Event Information
=================================================
Total Shared Cache Lines : 1384
Load HITs on shared lines : 5995
Fill Buffer Hits on shared lines : 1726
L1D hits on shared lines : 1943
L2D hits on shared lines : 0
LLC hits on shared lines : 1360
Locked Access on shared lines : 1993
Store HITs on shared lines : 1504
Store L1D hits on shared lines : 1446
Total Merged records : 3527

Original-patch-by: Dick Fowles <rfo...@redhat.com>
Original-patch-by: Don Zickus <dzi...@redhat.com>
Link: http://lkml.kernel.org/n/tip-p0gty8ctbd...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 61 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 49a3af556fa4..e85343e4ffe7 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -56,6 +56,10 @@ struct perf_c2c {
bool show_src;
bool use_stdio;
bool stats_only;
+
+ /* HITM shared clines stats */
+ struct c2c_stats hitm_stats;
+ int shared_clines;
};

static struct perf_c2c c2c;
@@ -1729,6 +1733,39 @@ static int setup_nodes(struct perf_session *session)
return 0;
}

+#define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm)
+
+static int resort_hitm_cb(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+ if (HAS_HITMS(c2c_he)) {
+ c2c.shared_clines++;
+ c2c_add_stats(&c2c.hitm_stats, &c2c_he->stats);
+ }
+
+ return 0;
+}
+
+static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb)
+{
+ struct rb_node *next = rb_first(&hists->entries);
+ int ret = 0;
+
+ while (next) {
+ struct hist_entry *he;
+
+ he = rb_entry(next, struct hist_entry, rb_node);
+ ret = cb(he);
+ if (ret)
+ break;
+ next = rb_next(&he->rb_node);
+ }
+
+ return ret;
+}
+
static void print_c2c__display_stats(FILE *out)
{
int llc_misses;
@@ -1774,6 +1811,26 @@ static void print_c2c__display_stats(FILE *out)
fprintf(out, " Unable to parse data source : %10d\n", stats->noparse);
}

+static void print_shared_cacheline_info(FILE *out)
+{
+ struct c2c_stats *stats = &c2c.hitm_stats;
+ int hitm_cnt = stats->lcl_hitm + stats->rmt_hitm;
+
+ fprintf(out, "=================================================\n");
+ fprintf(out, " Global Shared Cache Line Event Information \n");
+ fprintf(out, "=================================================\n");
+ fprintf(out, " Total Shared Cache Lines : %10d\n", c2c.shared_clines);
+ fprintf(out, " Load HITs on shared lines : %10d\n", stats->load);
+ fprintf(out, " Fill Buffer Hits on shared lines : %10d\n", stats->ld_fbhit);
+ fprintf(out, " L1D hits on shared lines : %10d\n", stats->ld_l1hit);
+ fprintf(out, " L2D hits on shared lines : %10d\n", stats->ld_l2hit);
+ fprintf(out, " LLC hits on shared lines : %10d\n", stats->ld_llchit + stats->lcl_hitm);
+ fprintf(out, " Locked Access on shared lines : %10d\n", stats->locks);
+ fprintf(out, " Store HITs on shared lines : %10d\n", stats->store);
+ fprintf(out, " Store L1D hits on shared lines : %10d\n", stats->st_l1hit);
+ fprintf(out, " Total Merged records : %10d\n", hitm_cnt + stats->store);
+}
+
static void print_cacheline(struct c2c_hists *c2c_hists,
struct hist_entry *he_cl,
struct perf_hpp_list *hpp_list,
@@ -1838,6 +1895,8 @@ static void perf_c2c__hists_fprintf(FILE *out)
setup_pager();

print_c2c__display_stats(out);
+ fprintf(out, "\n");
+ print_shared_cacheline_info(out);

if (c2c.stats_only)
return;
@@ -2128,7 +2187,8 @@ static int perf_c2c__report(int argc, const char **argv)
ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");

hists__collapse_resort(&c2c.hists.hists, NULL);
- hists__output_resort_cb(&c2c.hists.hists, &prog, resort_cl_cb);
+ hists__output_resort_cb(&c2c.hists.hists, &prog, resort_hitm_cb);
+ hists__iterate_cb(&c2c.hists.hists, resort_cl_cb);

ui_progress__finish();

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:40:08 AM9/22/16
to
Adding dcacheline dimension key support. It
displays cacheline address as hex number.

Using c2c wrapper to standard 'dcacheline' object
to defined own header and simple (just address)
cacheline output.

Link: http://lkml.kernel.org/n/tip-j5enppr8e7...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index cfa12930b77b..335c0fd30757 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -306,7 +335,16 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
}, \
}

+static struct c2c_dimension dim_dcacheline = {
+ .header = HEADER_LOW("Cacheline"),
+ .name = "dcacheline",
+ .cmp = dcacheline_cmp,
+ .entry = dcacheline_entry,
+ .width = 18,
+};
+

Jiri Olsa

unread,
Sep 22, 2016, 11:40:08 AM9/22/16
to
Adding basic sample processing specific hist_entry
allocation callbacks (via hists__add_entry_ops).

Overloading 'struct hist_entry' object with new
'struct c2c_hist_entry'. The new hist entry object
will carry specific stats and nested hists objects.

Link: http://lkml.kernel.org/n/tip-ksr9smz4o1...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 107 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index a3481f86e2ae..29fb9573e292 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -16,6 +16,15 @@ struct c2c_hists {
struct perf_hpp_list list;
};

+struct c2c_hist_entry {
+ struct c2c_hists *hists;
+ /*
+ * must be at the end,
+ * because of its callchain dynamic entry
+ */
+ struct hist_entry he;
+};
+
struct perf_c2c {
struct perf_tool tool;
struct c2c_hists hists;
@@ -23,6 +32,86 @@ struct perf_c2c {

static struct perf_c2c c2c;

+static void *c2c_he_zalloc(size_t size)
+{
+ struct c2c_hist_entry *c2c_he;
+
+ c2c_he = zalloc(size + sizeof(*c2c_he));
+ if (!c2c_he)
+ return NULL;
+
+ return &c2c_he->he;
+}
+
+static void c2c_he_free(void *he)
+{
+ struct c2c_hist_entry *c2c_he;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ if (c2c_he->hists) {
+ hists__delete_entries(&c2c_he->hists->hists);
+ free(c2c_he->hists);
+ }
+
+ free(c2c_he);
+}
+
+static struct hist_entry_ops c2c_entry_ops = {
+ .new = c2c_he_zalloc,
+ .free = c2c_he_free,
+};
+
+static int process_sample_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine)
+{
+ struct hists *hists = &c2c.hists.hists;
+ struct hist_entry *he;
+ struct addr_location al;
+ struct mem_info *mi;
+ int ret;
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_debug("problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ mi = sample__resolve_mem(sample, &al);
+ if (mi == NULL)
+ return -ENOMEM;
+
+ he = hists__add_entry_ops(hists, &c2c_entry_ops,
+ &al, NULL, NULL, mi,
+ sample, true);
+ if (he == NULL) {
+ free(mi);
+ return -ENOMEM;
+ }
+
+ hists__inc_nr_samples(hists, he->filtered);
+ ret = hist_entry__append_callchain(he, sample);
+
+ addr_location__put(&al);
+ return ret;
+}
+
+static struct perf_c2c c2c = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
+ .lost = perf_event__process_lost,
+ .ordered_events = true,
+ .ordering_requires_timestamps = true,
+ },
+};
+
static const char * const c2c_usage[] = {
"perf c2c {record|report}",
NULL
@@ -314,6 +403,7 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
+ struct ui_progress prog;
struct perf_data_file file = {
.mode = PERF_DATA_MODE_READ,
};
@@ -330,9 +420,12 @@ static int perf_c2c__report(int argc, const char **argv)

argc = parse_options(argc, argv, c2c_options, report_c2c_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
- if (!argc)
+ if (argc)
usage_with_options(report_c2c_usage, c2c_options);

+ if (!input_name || !strlen(input_name))
+ input_name = "perf.data";
+
file.path = input_name;

err = c2c_hists__init(&c2c.hists, "dcacheline");
@@ -356,6 +449,19 @@ static int perf_c2c__report(int argc, const char **argv)
goto out_session;
}

+ err = perf_session__process_events(session);
+ if (err) {
+ pr_err("failed to process sample\n");
+ goto out_session;
+ }
+
+ ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");
+
+ hists__collapse_resort(&c2c.hists.hists, NULL);
+ hists__output_resort(&c2c.hists.hists, &prog);
+
+ ui_progress__finish();
+
out_session:
perf_session__delete(session);
out:
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:40:08 AM9/22/16
to
Add __hist_entry__snprintf to take perf_hpp_list as an argument
instead of using he->hists->hpp_list. This way we can display
arbitrary list of entries regardles of the hists setup, which
will be useful in following patches.

Link: http://lkml.kernel.org/n/tip-j2sizkygla...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/ui/stdio/hist.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index c8dca34585ea..189665c315cc 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -373,7 +373,8 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
return 0;
}

-static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+static int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list)
{
const char *sep = symbol_conf.field_sep;
struct perf_hpp_fmt *fmt;
@@ -384,7 +385,7 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
if (symbol_conf.exclude_other && !he->parent)
return 0;

- hists__for_each_format(he->hists, fmt) {
+ perf_hpp_list__for_each_format(hpp_list, fmt) {
if (perf_hpp__should_skip(fmt, he->hists))
continue;

@@ -410,6 +411,11 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
return hpp->buf - start;
}

+static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+{
+ return __hist_entry__snprintf(he, hpp, he->hists->hpp_list);
+}
+
static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
struct perf_hpp *hpp,
struct hists *hists,
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:40:41 AM9/22/16
to
hi,
sending new version of c2c patches (v3) originally posted in here:
http://lwn.net/Articles/588866/

I took the old set and reworked it to fit into current upstream code.
It follows the same logic as original patch and provides (almost) the
same stdio interface. In addition new TUI interface was added.

The perf c2c tool provides means for Shared Data C2C/HITM analysis.
It allows you to track down the cacheline contentions. The tool is
based on x86's load latency and precise store facility events provided
by Intel CPUs.

The tool was tested by Joe Mario and has proven to be useful and found
some cachelines contentions. Joe also wrote a blog about c2c tool with
examples located in here:

https://joemario.github.io/blog/2016/09/01/c2c-blog/

v4 changes:
- 4 patches already queued
- used u32 for c2c_stats instead of int [Stanislav]
- fixed NO_SLANG=1 compilation [Kim]
- add __hist_entry__snprintf helper [Arnaldo]
perf/c2c_v4

Testing:
$ perf c2c record -a [workload]
$ perf c2c report [--stdio]
$ man perf-c2c

It's most likely you won't generate any remote HITMs on common
laptops, so to get results for local HITMs please use:

$ perf c2c report -d lcl [--stdio]

thanks,
jirka


Cc: "Michael Trapp" <michae...@sap.com>
Cc: "Long, Wai Man" <waima...@hpe.com>
Cc: Stanislav Ievlev <stanisla...@gmail.com>
Cc: Kim Phillips <kim.ph...@arm.com>
---
Jiri Olsa (57):
perf tools: Add __hist_entry__snprintf function
perf tools: Introduce c2c_decode_stats function
perf tools: Introduce c2c_add_stats function
perf tools: Make reset_dimensions global
perf tools: Make output_field_add and sort_dimension__add global
perf tools: Make several sorting functions global
perf tools: Make several display functions global
perf tools: Make __hist_entry__snprintf function global
tools/perf/Build | 1 +
tools/perf/Documentation/perf-c2c.txt | 276 ++++
tools/perf/builtin-c2c.c | 2742 +++++++++++++++++++++++++++++++++
tools/perf/builtin.h | 1 +
tools/perf/perf.c | 1 +
tools/perf/ui/browsers/hists.c | 4 +-
tools/perf/ui/browsers/hists.h | 1 +
tools/perf/ui/hist.c | 2 +-
tools/perf/ui/stdio/hist.c | 12 +-
tools/perf/util/hist.c | 1 +
tools/perf/util/hist.h | 6 +
tools/perf/util/mem-events.c | 128 ++
tools/perf/util/mem-events.h | 37 +
tools/perf/util/sort.c | 18 +-
tools/perf/util/sort.h | 12 +
15 files changed, 3227 insertions(+), 15 deletions(-)

Jiri Olsa

unread,
Sep 22, 2016, 11:50:05 AM9/22/16
to
Adding HITM percent dimension key wrapper.

It is to be displayed in the main cachelines
overall output:

percent_hitm

It displays HITMs percentage for cacheline.

It counts remote HITMs at the moment, but it
is changed later to support local as well,
based on the sort configuration.

Link: http://lkml.kernel.org/n/tip-czd17qsh5u...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 87 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 11b3db91fce9..e24472f100c6 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -562,6 +562,83 @@ tot_loads_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
return tot_recs_left - tot_recs_right;
}

+typedef double (get_percent_cb)(struct c2c_hist_entry *);
+
+static int
+percent_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he, get_percent_cb get_percent)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ per = get_percent(c2c_he);
+
+ return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, per);
+}
+
+static double percent_hitm(struct c2c_hist_entry *c2c_he)
+{
+ struct c2c_hists *hists;
+ struct c2c_stats *stats;
+ struct c2c_stats *total;
+ int tot, st;
+ double p;
+
+ hists = container_of(c2c_he->he.hists, struct c2c_hists, hists);
+ stats = &c2c_he->stats;
+ total = &hists->stats;
+
+ st = stats->rmt_hitm;
+ tot = total->rmt_hitm;
+
+ p = tot ? (double) st / tot : 0;
+
+ return 100 * p;
+}
+
+static int
+percent_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[10];
+ double per;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ per = percent_hitm(c2c_he);
+
+ snprintf(buf, 10, "%.2F%%", per);
+ return snprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+static int
+percent_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_hitm);
+}
+
+static int64_t
+percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ struct c2c_hist_entry *c2c_left;
+ struct c2c_hist_entry *c2c_right;
+ double per_left;
+ double per_right;
+
+ c2c_left = container_of(left, struct c2c_hist_entry, he);
+ c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+ per_left = percent_hitm(c2c_left);
+ per_right = percent_hitm(c2c_right);
+
+ return per_left - per_right;
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -765,6 +842,15 @@ static struct c2c_dimension dim_tot_loads = {
.width = 7,
};

+static struct c2c_dimension dim_percent_hitm = {
+ .header = HEADER_LOW("%hitm"),
+ .name = "percent_hitm",
+ .cmp = percent_hitm_cmp,
+ .entry = percent_hitm_entry,
+ .color = percent_hitm_color,
+ .width = 7,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -787,6 +873,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_ld_llcmiss,
&dim_tot_recs,
&dim_tot_loads,
+ &dim_percent_hitm,
NULL,
};

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:05 AM9/22/16
to
It's convenient to have an index for each cacheline to
help discussions about results over the phone.

Add new 'Index' and 'Num' fields in main and single
cacheline tables.

$ perf c2c report
=================================================
Shared Data Cache Line Table
=================================================
#
# Total Lcl ----- LLC Load Hitm -----
# Index Cacheline records Hitm Total Lcl Rmt ...
# ..... .................. ....... ....... ....... ....... .......
#
0 0xffff880036233b40 1 11.11% 1 1 0
1 0xffff88009ccb2900 1 11.11% 1 1 0
2 0xffff8800b5b3bc40 7 11.11% 1 1 0
...

=================================================
Shared Cache Line Distribution Pareto
=================================================
#
# ----- HITM ----- -- Store Refs -- Data address
# Num Rmt Lcl L1 Hit L1 Miss Offset Pid ...
# ..... ....... ....... ....... ....... .................. .......
#
-------------------------------------------------------------
0 0 1 0 0 0xffff880036233b40
-------------------------------------------------------------
0.00% 100.00% 0.00% 0.00% 0x30 0

-------------------------------------------------------------
1 0 1 0 0 0xffff88009ccb2900
-------------------------------------------------------------
0.00% 100.00% 0.00% 0.00% 0x28 549
...

Link: http://lkml.kernel.org/n/tip-4dhfagaz57...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 64 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 61 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c7fe81d49016..f39947d29765 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -36,6 +36,7 @@ struct c2c_hist_entry {
struct c2c_stats stats;
unsigned long *cpuset;
struct c2c_stats *node_stats;
+ unsigned int cacheline_idx;

struct compute_stats cstats;

@@ -1085,6 +1086,29 @@ cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
return snprintf(hpp->buf, hpp->size, "%*s", width, buf);
}

+static int
+cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[10];
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+ snprintf(buf, 10, "%u", c2c_he->cacheline_idx);
+ return snprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+static int
+cl_idx_empty_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+
+ return snprintf(hpp->buf, hpp->size, "%*s", width, "");
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -1430,6 +1454,30 @@ static struct c2c_dimension dim_srcline = {
.se = &sort_srcline,
};

+static struct c2c_dimension dim_dcacheline_idx = {
+ .header = HEADER_LOW("Index"),
+ .name = "cl_idx",
+ .cmp = empty_cmp,
+ .entry = cl_idx_entry,
+ .width = 5,
+};
+
+static struct c2c_dimension dim_dcacheline_num = {
+ .header = HEADER_LOW("Num"),
+ .name = "cl_num",
+ .cmp = empty_cmp,
+ .entry = cl_idx_entry,
+ .width = 5,
+};
+
+static struct c2c_dimension dim_dcacheline_num_empty = {
+ .header = HEADER_LOW("Num"),
+ .name = "cl_num_empty",
+ .cmp = empty_cmp,
+ .entry = cl_idx_empty_entry,
+ .width = 5,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -1469,6 +1517,9 @@ static struct c2c_dimension *dimensions[] = {
&dim_mean_load,
&dim_cpucnt,
&dim_srcline,
+ &dim_dcacheline_idx,
+ &dim_dcacheline_num,
+ &dim_dcacheline_num_empty,
NULL,
};

@@ -1755,6 +1806,10 @@ static int resort_cl_cb(struct hist_entry *he)
calc_width(he);

if (display && c2c_hists) {
+ static unsigned int idx;
+
+ c2c_he->cacheline_idx = idx++;
+
c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort);

hists__collapse_resort(&c2c_hists->hists, NULL);
@@ -1942,10 +1997,10 @@ static void print_cacheline(struct c2c_hists *c2c_hists,
fprintf(out, "\n");
}

- fprintf(out, " ------------------------------------------------------\n");
+ fprintf(out, " -------------------------------------------------------------\n");
__hist_entry__snprintf(he_cl, &hpp, hpp_list);
fprintf(out, "%s\n", bf);
- fprintf(out, " ------------------------------------------------------\n");
+ fprintf(out, " -------------------------------------------------------------\n");

hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, true);
}
@@ -1958,6 +2013,7 @@ static void print_pareto(FILE *out)

perf_hpp_list__init(&hpp_list);
ret = hpp_list__parse(&hpp_list,
+ "cl_num,"
"cl_rmt_hitm,"
"cl_lcl_hitm,"
"cl_stores_l1hit,"
@@ -2315,7 +2371,8 @@ static int build_cl_output(char *cl_sort)
}

if (asprintf(&c2c.cl_output,
- "%s%s%s%s%s%s%s%s%s",
+ "%s%s%s%s%s%s%s%s%s%s",
+ c2c.use_stdio ? "cl_num_empty," : "",
"percent_rmt_hitm,"
"percent_lcl_hitm,"
"percent_stores_l1hit,"
@@ -2464,6 +2521,7 @@ static int perf_c2c__report(int argc, const char **argv)
}

c2c_hists__reinit(&c2c.hists,
+ "cl_idx,"
"dcacheline,"
"tot_recs,"
"percent_hitm,"
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:05 AM9/22/16
to
Adding single cacheline TUI browser. It triggers when
you press 'd' in the main browser on the specific cacheline.

It allows to navigate through cacheline's offsets and display
callchains (implemented in following patches).

Link: http://lkml.kernel.org/n/tip-fovjwgyusv...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 81 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 48f926768a59..a89aa8408c45 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1826,6 +1826,84 @@ static void c2c_browser__update_nr_entries(struct hist_browser *hb)
hb->nr_non_filtered_entries = nr_entries;
}

+struct c2c_cacheline_browser {
+ struct hist_browser hb;
+ struct hist_entry *he;
+};
+
+static int
+perf_c2c_cacheline_browser__title(struct hist_browser *browser,
+ char *bf, size_t size)
+{
+ struct c2c_cacheline_browser *cl_browser;
+ struct hist_entry *he;
+ uint64_t addr = 0;
+
+ cl_browser = container_of(browser, struct c2c_cacheline_browser, hb);
+ he = cl_browser->he;
+
+ if (he->mem_info)
+ addr = cl_address(he->mem_info->daddr.addr);
+
+ scnprintf(bf, size, "Cacheline 0x%lx", addr);
+ return 0;
+}
+
+static struct c2c_cacheline_browser*
+c2c_cacheline_browser__new(struct hists *hists, struct hist_entry *he)
+{
+ struct c2c_cacheline_browser *browser;
+
+ browser = zalloc(sizeof(*browser));
+ if (browser) {
+ hist_browser__init(&browser->hb, hists);
+ browser->hb.c2c_filter = true;
+ browser->hb.title = perf_c2c_cacheline_browser__title;
+ browser->he = he;
+ }
+
+ return browser;
+}
+
+static int perf_c2c__browse_cacheline(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ struct c2c_hists *c2c_hists;
+ struct c2c_cacheline_browser *cl_browser;
+ struct hist_browser *browser;
+ int key = -1;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ c2c_hists = c2c_he->hists;
+
+ cl_browser = c2c_cacheline_browser__new(&c2c_hists->hists, he);
+ if (cl_browser == NULL)
+ return -1;
+
+ browser = &cl_browser->hb;
+
+ /* reset abort key so that it can get Ctrl-C as a key */
+ SLang_reset_tty();
+ SLang_init_tty(0, 0, 0);
+
+ c2c_browser__update_nr_entries(browser);
+
+ while (1) {
+ key = hist_browser__run(browser, "help");
+
+ switch (key) {
+ case 'q':
+ goto out;
+ default:
+ break;
+ }
+ }
+
+out:
+ free(cl_browser);
+ return 0;
+}
+
static int perf_c2c_browser__title(struct hist_browser *browser,
char *bf, size_t size)
{
@@ -1869,6 +1947,9 @@ static int perf_c2c__hists_browse(struct hists *hists)
switch (key) {
case 'q':
goto out;
+ case 'd':
+ perf_c2c__browse_cacheline(browser->he_selection);
+ break;
default:
break;
}
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:05 AM9/22/16
to
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index e281930e57da..b7404387faf7 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1634,6 +1634,23 @@ static int resort_cl_cb(struct hist_entry *he)
c2c_hists = c2c_he->hists;

if (c2c_hists) {
+ c2c_hists__reinit(c2c_hists,
+ "percent_rmt_hitm,"
+ "percent_lcl_hitm,"
+ "percent_stores_l1hit,"
+ "percent_stores_l1miss,"
+ "offset,"
+ "pid,"
+ "tid,"
+ "mean_rmt,"
+ "mean_lcl,"
+ "mean_load,"
+ "cpucnt,"
+ "symbol,"
+ "dso,"
+ "node",
+ "offset,rmt_hitm,lcl_hitm");
+
hists__collapse_resort(&c2c_hists->hists, NULL);
hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb);
}
@@ -1765,6 +1782,20 @@ static int perf_c2c__report(int argc, const char **argv)
goto out_session;
}

+ c2c_hists__reinit(&c2c.hists,
+ "dcacheline,"
+ "tot_recs,"
+ "percent_hitm,"
+ "tot_hitm,lcl_hitm,rmt_hitm,"
+ "stores,stores_l1hit,stores_l1miss,"
+ "dram_lcl,dram_rmt,"
+ "ld_llcmiss,"
+ "tot_loads,"
+ "ld_fbhit,ld_l1hit,ld_l2hit,"
+ "ld_lclhit,ld_rmthit",
+ "rmt_hitm"
+ );
+
ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");

hists__collapse_resort(&c2c.hists.hists, NULL);
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:05 AM9/22/16
to
Display c2c related configuration options/setup.
So far it's output of monitored events:

$ perf c2c report --stats
...

=================================================
c2c details
=================================================
Events : cpu/mem-loads,ldlat=50/pp
: cpu/mem-stores/pp

Link: http://lkml.kernel.org/n/tip-ypz84f3a9f...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 33 +++++++++++++++++++++++++++------
1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index e85343e4ffe7..71b6850809a2 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -13,6 +13,8 @@
#include "tool.h"
#include "data.h"
#include "sort.h"
+#include "evlist.h"
+#include "evsel.h"
#include <asm/bug.h>
#include "ui/browsers/hists.h"

@@ -1890,13 +1892,32 @@ static void print_pareto(FILE *out)
}
}

-static void perf_c2c__hists_fprintf(FILE *out)
+static void print_c2c_info(FILE *out, struct perf_session *session)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_evsel *evsel;
+ bool first = true;
+
+ fprintf(out, "=================================================\n");
+ fprintf(out, " c2c details \n");
+ fprintf(out, "=================================================\n");
+
+ evlist__for_each_entry(evlist, evsel) {
+ fprintf(out, "%-36s: %s\n", first ? " Events" : "",
+ perf_evsel__name(evsel));
+ first = false;
+ }
+}
+
+static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session)
{
setup_pager();

print_c2c__display_stats(out);
fprintf(out, "\n");
print_shared_cacheline_info(out);
+ fprintf(out, "\n");
+ print_c2c_info(out, session);

if (c2c.stats_only)
return;
@@ -2070,18 +2091,18 @@ out:
return 0;
}

-static void perf_c2c_display(void)
+static void perf_c2c_display(struct perf_session *session)
{
if (c2c.use_stdio)
- perf_c2c__hists_fprintf(stdout);
+ perf_c2c__hists_fprintf(stdout, session);
else
perf_c2c__hists_browse(&c2c.hists.hists);
}
#else
-static void perf_c2c_display(void)
+static void perf_c2c_display(struct perf_session *session)
{
use_browser = 0;
- perf_c2c__hists_fprintf(stdout);
+ perf_c2c__hists_fprintf(stdout, session);
}
#endif /* HAVE_SLANG_SUPPORT */

@@ -2194,7 +2215,7 @@ static int perf_c2c__report(int argc, const char **argv)

ui_quirks();

- perf_c2c_display();
+ perf_c2c_display(session);

out_session:
perf_session__delete(session);
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:05 AM9/22/16
to
Adding source line dimension key wrapper.

It is to be displayed in the single cacheline output:

cl_srcline

It displays source line related to the code address that
accessed cacheline. It's a wrapper to global srcline sort
entry.

Link: http://lkml.kernel.org/n/tip-cmnzgm37mj...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 11 +++++++++++
tools/perf/util/sort.c | 2 +-
tools/perf/util/sort.h | 1 +
3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 2318249362f8..8fb798c8a790 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -50,6 +50,8 @@ struct perf_c2c {
int cpus_cnt;
int *cpu2node;
int node_info;
+
+ bool show_src;
};

static struct perf_c2c c2c;
@@ -1360,6 +1362,11 @@ static struct c2c_dimension dim_cpucnt = {
.width = 8,
};

+static struct c2c_dimension dim_srcline = {
+ .name = "cl_srcline",
+ .se = &sort_srcline,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -1398,6 +1405,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_mean_lcl,
&dim_mean_load,
&dim_cpucnt,
+ &dim_srcline,
NULL,
};

@@ -1605,6 +1613,9 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists,

static int filter_cb(struct hist_entry *he __maybe_unused)
{
+ if (c2c.show_src && !he->srcline)
+ he->srcline = hist_entry__get_srcline(he);
+
return 0;
}

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 452e15a10dd2..df622f4e301e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -315,7 +315,7 @@ struct sort_entry sort_sym = {

/* --sort srcline */

-static char *hist_entry__get_srcline(struct hist_entry *he)
+char *hist_entry__get_srcline(struct hist_entry *he)
{
struct map *map = he->ms.map;

diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 099c97557d33..7aff317fc7c4 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -280,4 +280,5 @@ int64_t
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
int64_t
sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
+char *hist_entry__get_srcline(struct hist_entry *he);
#endif /* __PERF_SORT_H */
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:05 AM9/22/16
to
Allowing user to configure the way the single cacheline
data are sorted after being sorted by offset.

Adding 'c' option to specify sorting fields for single cacheline:

-c, --coalesce <coalesce fields>
coalesce fields: pid,tid,iaddr,dso

It's allowed to use following combination of fields:
pid - process pid
tid - process tid
iaddr - code address
dso - shared object

Link: http://lkml.kernel.org/n/tip-aka8z31umx...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 119 ++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 102 insertions(+), 17 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 771991cf6946..ee64537493ce 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -46,6 +46,8 @@ struct c2c_hist_entry {
struct hist_entry he;
};

+static char const *coalesce_default = "pid,tid,iaddr";
+
struct perf_c2c {
struct perf_tool tool;
struct c2c_hists hists;
@@ -65,6 +67,11 @@ struct perf_c2c {
int shared_clines;

int display;
+
+ const char *coalesce;
+ char *cl_sort;
+ char *cl_resort;
+ char *cl_output;
};

enum {
@@ -237,7 +244,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (!mi_dup)
goto free_mi;

- c2c_hists = he__get_c2c_hists(he, "offset", 2);
+ c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2);
if (!c2c_hists)
goto free_mi_dup;

@@ -1736,22 +1743,7 @@ static int resort_cl_cb(struct hist_entry *he)
c2c_hists = c2c_he->hists;

if (display && c2c_hists) {
- c2c_hists__reinit(c2c_hists,
- "percent_rmt_hitm,"
- "percent_lcl_hitm,"
- "percent_stores_l1hit,"
- "percent_stores_l1miss,"
- "offset,"
- "pid,"
- "tid,"
- "mean_rmt,"
- "mean_lcl,"
- "mean_load,"
- "cpucnt,"
- "symbol,"
- "dso,"
- "node",
- "offset,rmt_hitm,lcl_hitm");
+ c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort);

hists__collapse_resort(&c2c_hists->hists, NULL);
hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb);
@@ -1995,6 +1987,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session)
}
fprintf(out, " Cachelines sort on : %s HITMs\n",
c2c.display == DISPLAY_LCL ? "Local" : "Remote");
+ fprintf(out, " Cacheline data grouping : %s\n", c2c.cl_sort);
}

static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session)
@@ -2274,6 +2267,89 @@ static int setup_display(const char *str)
return 0;
}

+#define for_each_token(__tok, __buf, __sep, __tmp) \
+ for (__tok = strtok_r(__buf, __sep, &__tmp); __tok; \
+ __tok = strtok_r(NULL, __sep, &__tmp))
+
+static int build_cl_output(char *cl_sort)
+{
+ char *tok, *tmp, *buf = strdup(cl_sort);
+ bool add_pid = false;
+ bool add_tid = false;
+ bool add_iaddr = false;
+ bool add_sym = false;
+ bool add_dso = false;
+ bool add_src = false;
+
+ if (!buf)
+ return -ENOMEM;
+
+ for_each_token(tok, buf, ",", tmp) {
+ if (!strcmp(tok, "tid")) {
+ add_tid = true;
+ } else if (!strcmp(tok, "pid")) {
+ add_pid = true;
+ } else if (!strcmp(tok, "iaddr")) {
+ add_iaddr = true;
+ add_sym = true;
+ add_dso = true;
+ add_src = true;
+ } else if (!strcmp(tok, "dso")) {
+ add_dso = true;
+ } else if (strcmp(tok, "offset")) {
+ pr_err("unrecognized sort token: %s\n", tok);
+ return -EINVAL;
+ }
+ }
+
+ if (asprintf(&c2c.cl_output,
+ "%s%s%s%s%s%s%s%s%s",
+ "percent_rmt_hitm,"
+ "percent_lcl_hitm,"
+ "percent_stores_l1hit,"
+ "percent_stores_l1miss,"
+ "offset,",
+ add_pid ? "pid," : "",
+ add_tid ? "tid," : "",
+ add_iaddr ? "iaddr," : "",
+ "mean_rmt,"
+ "mean_lcl,"
+ "mean_load,"
+ "cpucnt,",
+ add_sym ? "symbol," : "",
+ add_dso ? "dso," : "",
+ add_src ? "cl_srcline," : "",
+ "node") < 0)
+ return -ENOMEM;
+
+ c2c.show_src = add_src;
+
+ free(buf);
+ return 0;
+}
+
+static int setup_coalesce(const char *coalesce)
+{
+ const char *c = coalesce ?: coalesce_default;
+
+ if (asprintf(&c2c.cl_sort, "offset,%s", c) < 0)
+ return -ENOMEM;
+
+ if (build_cl_output(c2c.cl_sort))
+ return -1;
+
+ if (asprintf(&c2c.cl_resort, "offset,%s",
+ c2c.display == DISPLAY_RMT ?
+ "rmt_hitm,lcl_hitm" :
+ "lcl_hitm,rmt_hitm") < 0)
+ return -ENOMEM;
+
+ pr_debug("coalesce sort fields: %s\n", c2c.cl_sort);
+ pr_debug("coalesce resort fields: %s\n", c2c.cl_resort);
+ pr_debug("coalesce output fields: %s\n", c2c.cl_output);
+ return 0;
+}
+
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
@@ -2283,6 +2359,7 @@ static int perf_c2c__report(int argc, const char **argv)
};
char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
const char *display = NULL;
+ const char *coalesce = NULL;
const struct option c2c_options[] = {
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
@@ -2302,6 +2379,8 @@ static int perf_c2c__report(int argc, const char **argv)
callchain_help, &parse_callchain_opt,
callchain_default_opt),
OPT_STRING('d', "display", &display, NULL, "lcl,rmt"),
+ OPT_STRING('c', "coalesce", &coalesce, "coalesce fields",
+ "coalesce fields: pid,tid,iaddr,dso"),
OPT_END()
};
int err = 0;
@@ -2330,6 +2409,12 @@ static int perf_c2c__report(int argc, const char **argv)
if (err)
goto out;

+ err = setup_coalesce(coalesce);
+ if (err) {
+ pr_debug("Failed to initialize hists\n");
+ goto out;
+ }
+
err = c2c_hists__init(&c2c.hists, "dcacheline", 2);
if (err) {
pr_debug("Failed to initialize hists\n");
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:06 AM9/22/16
to
Add a limit for entries number of the cachelines table
entries. By default now it's the 0.0005% minimum of
remote HITMs.

Also display only cachelines with remote hitm or store data.

Link: http://lkml.kernel.org/n/tip-inykbom2f1...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 36 ++++++++++++++++++++++++++++++++++--
tools/perf/util/hist.c | 1 +
tools/perf/util/hist.h | 1 +
3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 31e311959480..ff89c0b86c44 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1639,11 +1639,42 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
return hpp_list__parse(&c2c_hists->list, output, sort);
}

-static int filter_cb(struct hist_entry *he __maybe_unused)
+#define DISPLAY_LINE_LIMIT 0.0005
+
+static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
+{
+ struct c2c_hist_entry *c2c_he;
+ double ld_dist;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+ if (stats->rmt_hitm) {
+ ld_dist = ((double)c2c_he->stats.rmt_hitm / stats->rmt_hitm);
+ if (ld_dist < DISPLAY_LINE_LIMIT)
+ he->filtered = HIST_FILTER__C2C;
+ } else {
+ he->filtered = HIST_FILTER__C2C;
+ }
+
+ return he->filtered == 0;
+}
+
+static inline int valid_hitm_or_store(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ return c2c_he->stats.rmt_hitm || c2c_he->stats.store;
+}
+
+static int filter_cb(struct hist_entry *he)
{
if (c2c.show_src && !he->srcline)
he->srcline = hist_entry__get_srcline(he);

+ if (!valid_hitm_or_store(he))
+ he->filtered = HIST_FILTER__C2C;
+
return 0;
}

@@ -1651,11 +1682,12 @@ static int resort_cl_cb(struct hist_entry *he)
{
struct c2c_hist_entry *c2c_he;
struct c2c_hists *c2c_hists;
+ bool display = he__display(he, &c2c.hitm_stats);

c2c_he = container_of(he, struct c2c_hist_entry, he);
c2c_hists = c2c_he->hists;

- if (c2c_hists) {
+ if (display && c2c_hists) {
c2c_hists__reinit(c2c_hists,
"percent_rmt_hitm,"
"percent_lcl_hitm,"
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b02992efb513..e1be4132054d 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1195,6 +1195,7 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he,
case HIST_FILTER__GUEST:
case HIST_FILTER__HOST:
case HIST_FILTER__SOCKET:
+ case HIST_FILTER__C2C:
default:
return;
}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 9928fed8bc59..d4b6514eeef5 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -22,6 +22,7 @@ enum hist_filter {
HIST_FILTER__GUEST,
HIST_FILTER__HOST,
HIST_FILTER__SOCKET,
+ HIST_FILTER__C2C,
};

enum hist_column {
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:06 AM9/22/16
to
Adding 5 hitm related dimension key wrappers.

First 3 are to be displayed in the main cachelines
overall output:

tot_hitm, lcl_hitm, rmt_hitm

The latter 2 are to be displayed within single
cacheline output:

cl_rmt_hitm, cl_lcl_hitm

They all display bare numbers of remote/local/total
HITMs for cacheline or its related offsets.

Link: http://lkml.kernel.org/n/tip-iju5239xa5...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 109 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index a2881677c315..06c4d3e92ed2 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -346,6 +346,70 @@ iaddr_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
return sort__iaddr_cmp(left, right);
}

+static int
+tot_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ unsigned int tot_hitm;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ tot_hitm = c2c_he->stats.lcl_hitm + c2c_he->stats.rmt_hitm;
+
+ return snprintf(hpp->buf, hpp->size, "%*u", width, tot_hitm);
+}
+
+static int64_t
+tot_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ struct c2c_hist_entry *c2c_left;
+ struct c2c_hist_entry *c2c_right;
+ unsigned int tot_hitm_left;
+ unsigned int tot_hitm_right;
+
+ c2c_left = container_of(left, struct c2c_hist_entry, he);
+ c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+ tot_hitm_left = c2c_left->stats.lcl_hitm + c2c_left->stats.rmt_hitm;
+ tot_hitm_right = c2c_right->stats.lcl_hitm + c2c_right->stats.rmt_hitm;
+
+ return tot_hitm_left - tot_hitm_right;
+}
+
+#define STAT_FN_ENTRY(__f) \
+static int \
+__f ## _entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, \
+ struct hist_entry *he) \
+{ \
+ struct c2c_hist_entry *c2c_he; \
+ int width = c2c_width(fmt, hpp, he->hists); \
+ \
+ c2c_he = container_of(he, struct c2c_hist_entry, he); \
+ return snprintf(hpp->buf, hpp->size, "%*u", width, \
+ c2c_he->stats.__f); \
+}
+
+#define STAT_FN_CMP(__f) \
+static int64_t \
+__f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \
+ struct hist_entry *left, struct hist_entry *right) \
+{ \
+ struct c2c_hist_entry *c2c_left, *c2c_right; \
+ \
+ c2c_left = container_of(left, struct c2c_hist_entry, he); \
+ c2c_right = container_of(right, struct c2c_hist_entry, he); \
+ return c2c_left->stats.__f - c2c_right->stats.__f; \
+}
+
+#define STAT_FN(__f) \
+ STAT_FN_ENTRY(__f) \
+ STAT_FN_CMP(__f)
+
+STAT_FN(rmt_hitm)
+STAT_FN(lcl_hitm)
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -405,10 +469,55 @@ static struct c2c_dimension dim_iaddr = {
.width = 18,
};

+static struct c2c_dimension dim_tot_hitm = {
+ .header = HEADER_SPAN("----- LLC Load Hitm -----", "Total", 2),
+ .name = "tot_hitm",
+ .cmp = tot_hitm_cmp,
+ .entry = tot_hitm_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_lcl_hitm = {
+ .header = HEADER_SPAN_LOW("Lcl"),
+ .name = "lcl_hitm",
+ .cmp = lcl_hitm_cmp,
+ .entry = lcl_hitm_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_rmt_hitm = {
+ .header = HEADER_SPAN_LOW("Rmt"),
+ .name = "rmt_hitm",
+ .cmp = rmt_hitm_cmp,
+ .entry = rmt_hitm_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_cl_rmt_hitm = {
+ .header = HEADER_SPAN("----- HITM -----", "Rmt", 1),
+ .name = "cl_rmt_hitm",
+ .cmp = rmt_hitm_cmp,
+ .entry = rmt_hitm_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_cl_lcl_hitm = {
+ .header = HEADER_SPAN_LOW("Lcl"),
+ .name = "cl_lcl_hitm",
+ .cmp = lcl_hitm_cmp,
+ .entry = lcl_hitm_entry,
+ .width = 7,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
&dim_iaddr,
+ &dim_tot_hitm,
+ &dim_lcl_hitm,
+ &dim_rmt_hitm,
+ &dim_cl_lcl_hitm,
+ &dim_cl_rmt_hitm,
NULL,
};

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:06 AM9/22/16
to
Adding help windows to display key/action mappings
for both browsers.

Link: http://lkml.kernel.org/n/tip-zni4apopx6...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 3728e6c7d0cd..dce3e32cef75 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2175,6 +2175,11 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
struct c2c_cacheline_browser *cl_browser;
struct hist_browser *browser;
int key = -1;
+ const char help[] =
+ " ENTER Togle callchains (if present) \n"
+ " n Togle Node details info \n"
+ " s Togle full lenght of symbol and source line columns \n"
+ " q Return back to cacheline list \n";

/* Display compact version first. */
c2c.symbol_full = false;
@@ -2195,7 +2200,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
c2c_browser__update_nr_entries(browser);

while (1) {
- key = hist_browser__run(browser, "help");
+ key = hist_browser__run(browser, "? - help");

switch (key) {
case 's':
@@ -2207,6 +2212,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
break;
case 'q':
goto out;
+ case '?':
+ ui_browser__help_window(&browser->b, help);
+ break;
default:
break;
}
@@ -2245,6 +2253,10 @@ static int perf_c2c__hists_browse(struct hists *hists)
{
struct hist_browser *browser;
int key = -1;
+ const char help[] =
+ " d Display cacheline details \n"
+ " ENTER Togle callchains (if present) \n"
+ " q Quit \n";

browser = perf_c2c_browser__new(hists);
if (browser == NULL)
@@ -2257,7 +2269,7 @@ static int perf_c2c__hists_browse(struct hists *hists)
c2c_browser__update_nr_entries(browser);

while (1) {
- key = hist_browser__run(browser, "help");
+ key = hist_browser__run(browser, "? - help");

switch (key) {
case 'q':
@@ -2265,6 +2277,9 @@ static int perf_c2c__hists_browse(struct hists *hists)
case 'd':
perf_c2c__browse_cacheline(browser->he_selection);
break;
+ case '?':
+ ui_browser__help_window(&browser->b, help);

Jiri Olsa

unread,
Sep 22, 2016, 11:50:06 AM9/22/16
to
Adding the --stdio option output support. The output
tables are dumped directly to the stdio.

$ perf c2c report
=================================================
Shared Data Cache Line Table
=================================================
#
# Total ----- LLC Load Hitm ----- ---- Store Reference ---- --- Load Dram ---- LLC Total ----- Core Load Hit ----- -- LLC Load Hit --
# Cacheline records %hitm Total Lcl Rmt Total L1Hit L1Miss Lcl Rmt Ld Miss Loads FB L1 L2 Llc Rmt
# .................. ....... ....... ....... ....... ....... ....... ....... ....... ........ ........ ....... ....... ....... ....... ....... ........ ........
#
0xffff88000235f840 17 0.00% 0 0 0 17 17 0 0 0 0 0 0 0 0 0 0
...

=================================================
Shared Cache Line Distribution Pareto
=================================================
#
# ----- HITM ----- -- Store Refs -- Data address ---------- cycles ---------- cpu Shared
# Rmt Lcl L1 Hit L1 Miss Offset Pid Tid rmt hitm lcl hitm load cnt Symbol Object Node
# ....... ....... ....... ....... .................. ....... ..................... ........ ........ ........ ........ .................... ................. ....
#
------------------------------------------------------
0 0 17 0 0xffff88000235f840
------------------------------------------------------
0.00% 0.00% 5.88% 0.00% 0x0 11474 11474:kworker/u16:5 0 0 0 1 [k] rmap_walk_file [kernel.kallsyms] 0
0.00% 0.00% 5.88% 0.00% 0x10 11474 11474:kworker/u16:5 0 0 0 1 [k] lock_page_memcg [kernel.kallsyms] 0
0.00% 0.00% 11.76% 0.00% 0x20 11474 11474:kworker/u16:5 0 0 0 1 [k] page_mapping [kernel.kallsyms] 0
0.00% 0.00% 64.71% 0.00% 0x28 11474 11474:kworker/u16:5 0 0 0 1 [k] __test_set_page_writeback [kernel.kallsyms] 0
0.00% 0.00% 11.76% 0.00% 0x30 11474 11474:kworker/u16:5 0 0 0 1 [k] page_mapped [kernel.kallsyms] 0
...

Link: http://lkml.kernel.org/n/tip-eorco9r0oe...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 83 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index b7404387faf7..add27479cc23 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -13,6 +13,7 @@
#include "tool.h"
#include "data.h"
#include "sort.h"
+#include <asm/bug.h>

struct c2c_hists {
struct hists hists;
@@ -1719,6 +1720,85 @@ static int setup_nodes(struct perf_session *session)
return 0;
}

+static void print_cacheline(struct c2c_hists *c2c_hists,
+ struct hist_entry *he_cl,
+ struct perf_hpp_list *hpp_list,
+ FILE *out)
+{
+ char bf[1000];
+ struct perf_hpp hpp = {
+ .buf = bf,
+ .size = 1000,
+ };
+ static bool once;
+
+ if (!once) {
+ hists__fprintf_headers(&c2c_hists->hists, out);
+ once = true;
+ } else {
+ fprintf(out, "\n");
+ }
+
+ fprintf(out, " ------------------------------------------------------\n");
+ __hist_entry__snprintf(he_cl, &hpp, hpp_list);
+ fprintf(out, "%s\n", bf);
+ fprintf(out, " ------------------------------------------------------\n");
+
+ hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, true);
+}
+
+static void print_pareto(FILE *out)
+{
+ struct perf_hpp_list hpp_list;
+ struct rb_node *nd;
+ int ret;
+
+ perf_hpp_list__init(&hpp_list);
+ ret = hpp_list__parse(&hpp_list,
+ "cl_rmt_hitm,"
+ "cl_lcl_hitm,"
+ "cl_stores_l1hit,"
+ "cl_stores_l1miss,"
+ "dcacheline",
+ NULL);
+
+ if (WARN_ONCE(ret, "failed to setup sort entries\n"))
+ return;
+
+ nd = rb_first(&c2c.hists.hists.entries);
+
+ for (; nd; nd = rb_next(nd)) {
+ struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
+ struct c2c_hist_entry *c2c_he;
+
+ if (he->filtered)
+ continue;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ print_cacheline(c2c_he->hists, he, &hpp_list, out);
+ }
+}
+
+static void perf_c2c__hists_fprintf(FILE *out)
+{
+ setup_pager();
+
+ fprintf(out, "\n");
+ fprintf(out, "=================================================\n");
+ fprintf(out, " Shared Data Cache Line Table \n");
+ fprintf(out, "=================================================\n");
+ fprintf(out, "#\n");
+
+ hists__fprintf(&c2c.hists.hists, true, 0, 0, 0, stdout, false);
+
+ fprintf(out, "\n");
+ fprintf(out, "=================================================\n");
+ fprintf(out, " Shared Cache Line Distribution Pareto \n");
+ fprintf(out, "=================================================\n");
+ fprintf(out, "#\n");
+
+ print_pareto(out);
+}

static int perf_c2c__report(int argc, const char **argv)
{
@@ -1803,6 +1883,9 @@ static int perf_c2c__report(int argc, const char **argv)

ui_progress__finish();

+ use_browser = 0;
+ perf_c2c__hists_fprintf(stdout);

Jiri Olsa

unread,
Sep 22, 2016, 11:50:06 AM9/22/16
to
Adding 5 stores related dimension key wrappers.

First 3 are to be displayed in the main cachelines
overall output:

stores, stores_l1hit, stores_l1miss

The latter 2 are to be displayed within single
cacheline output:

cl_stores_l1hit, cl_stores_l1miss

They all display bare numbers of stores for
cacheline or its related offsets.

Link: http://lkml.kernel.org/n/tip-qeml8v53v6...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 06c4d3e92ed2..7c62a749eaca 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -409,6 +409,9 @@ __f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \

STAT_FN(rmt_hitm)
STAT_FN(lcl_hitm)
+STAT_FN(store)
+STAT_FN(st_l1hit)
+STAT_FN(st_l1miss)

#define HEADER_LOW(__h) \
{ \
@@ -509,6 +512,46 @@ static struct c2c_dimension dim_cl_lcl_hitm = {
.width = 7,
};

+static struct c2c_dimension dim_stores = {
+ .header = HEADER_SPAN("---- Store Reference ----", "Total", 2),
+ .name = "stores",
+ .cmp = store_cmp,
+ .entry = store_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_stores_l1hit = {
+ .header = HEADER_SPAN_LOW("L1Hit"),
+ .name = "stores_l1hit",
+ .cmp = st_l1hit_cmp,
+ .entry = st_l1hit_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_stores_l1miss = {
+ .header = HEADER_SPAN_LOW("L1Miss"),
+ .name = "stores_l1miss",
+ .cmp = st_l1miss_cmp,
+ .entry = st_l1miss_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_cl_stores_l1hit = {
+ .header = HEADER_SPAN("-- Store Refs --", "L1 Hit", 1),
+ .name = "cl_stores_l1hit",
+ .cmp = st_l1hit_cmp,
+ .entry = st_l1hit_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_cl_stores_l1miss = {
+ .header = HEADER_SPAN_LOW("L1 Miss"),
+ .name = "cl_stores_l1miss",
+ .cmp = st_l1miss_cmp,
+ .entry = st_l1miss_entry,
+ .width = 7,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -518,6 +561,11 @@ static struct c2c_dimension *dimensions[] = {
&dim_rmt_hitm,
&dim_cl_lcl_hitm,
&dim_cl_rmt_hitm,
+ &dim_stores,
+ &dim_stores_l1hit,
+ &dim_stores_l1miss,
+ &dim_cl_stores_l1hit,
+ &dim_cl_stores_l1miss,
NULL,
};

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:07 AM9/22/16
to
Adding tid dimension key wrapper.

It is to be displayed in the single cacheline output:

tid

It's a wrapper for global sort_thread sort entry with
c2c specific header.

Link: http://lkml.kernel.org/n/tip-fr0socae5s...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 7 +++++++
1 file changed, 7 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 20b5e7f64412..55a5a2eadfe8 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1095,6 +1095,12 @@ static struct c2c_dimension dim_pid = {
.width = 7,
};

+static struct c2c_dimension dim_tid = {
+ .header = HEADER_LOW("Tid"),
+ .name = "tid",
+ .se = &sort_thread,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -1125,6 +1131,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_dram_lcl,
&dim_dram_rmt,
&dim_pid,
+ &dim_tid,
NULL,
};

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:07 AM9/22/16
to
Adding total record dimension key wrapper.

It is to be displayed in the main cachelines
overall output:

tot_recs

It displays sum of all cachelines accesses.

Link: http://lkml.kernel.org/n/tip-wojujik7zz...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 64 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 419a46926707..d96cad97b4d7 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -456,6 +456,61 @@ ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
return llc_miss(&c2c_left->stats) - llc_miss(&c2c_right->stats);
}

+static uint64_t total_records(struct c2c_stats *stats)
+{
+ uint64_t lclmiss, ldcnt, total;
+
+ lclmiss = stats->lcl_dram +
+ stats->rmt_dram +
+ stats->rmt_hitm +
+ stats->rmt_hit;
+
+ ldcnt = lclmiss +
+ stats->ld_fbhit +
+ stats->ld_l1hit +
+ stats->ld_l2hit +
+ stats->ld_llchit +
+ stats->lcl_hitm;
+
+ total = ldcnt +
+ stats->st_l1hit +
+ stats->st_l1miss;
+
+ return total;
+}
+
+static int
+tot_recs_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ uint64_t tot_recs;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ tot_recs = total_records(&c2c_he->stats);
+
+ return snprintf(hpp->buf, hpp->size, "%*" PRIu64, width, tot_recs);
+}
+
+static int64_t
+tot_recs_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ struct c2c_hist_entry *c2c_left;
+ struct c2c_hist_entry *c2c_right;
+ uint64_t tot_recs_left;
+ uint64_t tot_recs_right;
+
+ c2c_left = container_of(left, struct c2c_hist_entry, he);
+ c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+ tot_recs_left = total_records(&c2c_left->stats);
+ tot_recs_right = total_records(&c2c_right->stats);
+
+ return tot_recs_left - tot_recs_right;
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -643,6 +698,14 @@ static struct c2c_dimension dim_ld_llcmiss = {
.width = 7,
};

+static struct c2c_dimension dim_tot_recs = {
+ .header = HEADER_BOTH("Total", "records"),
+ .name = "tot_recs",
+ .cmp = tot_recs_cmp,
+ .entry = tot_recs_entry,
+ .width = 7,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -663,6 +726,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_ld_llchit,
&dim_ld_rmthit,
&dim_ld_llcmiss,
+ &dim_tot_recs,
NULL,
};

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:07 AM9/22/16
to
Currently we sort and limit displayed data based on
the remote HITMs count. Adding support to switch to
local HITMs via --display option:

--display ... lcl,rmt

Link: http://lkml.kernel.org/n/tip-inykbom2f1...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 114 +++++++++++++++++++++++++++++++++++++++--------
1 file changed, 95 insertions(+), 19 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index ff89c0b86c44..771991cf6946 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -63,6 +63,13 @@ struct perf_c2c {
/* HITM shared clines stats */
struct c2c_stats hitm_stats;
int shared_clines;
+
+ int display;
+};
+
+enum {
+ DISPLAY_LCL,
+ DISPLAY_RMT,
};

static struct perf_c2c c2c;
@@ -681,15 +688,24 @@ static double percent_hitm(struct c2c_hist_entry *c2c_he)
struct c2c_hists *hists;
struct c2c_stats *stats;
struct c2c_stats *total;
- int tot, st;
+ int tot = 0, st = 0;
double p;

hists = container_of(c2c_he->he.hists, struct c2c_hists, hists);
stats = &c2c_he->stats;
total = &hists->stats;

- st = stats->rmt_hitm;
- tot = total->rmt_hitm;
+ switch (c2c.display) {
+ case DISPLAY_RMT:
+ st = stats->rmt_hitm;
+ tot = total->rmt_hitm;
+ break;
+ case DISPLAY_LCL:
+ st = stats->lcl_hitm;
+ tot = total->lcl_hitm;
+ default:
+ break;
+ }

p = tot ? (double) st / tot : 0;

@@ -972,14 +988,26 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num);
advance_hpp(hpp, ret);

+ #define DISPLAY_HITM(__h) \
+ if (c2c_he->stats.__h> 0) { \
+ ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", \
+ percent(stats->__h, c2c_he->stats.__h));\
+ } else { \
+ ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); \
+ }

- if (c2c_he->stats.rmt_hitm > 0) {
- ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ",
- percent(stats->rmt_hitm, c2c_he->stats.rmt_hitm));
- } else {
- ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a");
+ switch (c2c.display) {
+ case DISPLAY_RMT:
+ DISPLAY_HITM(rmt_hitm);
+ break;
+ case DISPLAY_LCL:
+ DISPLAY_HITM(lcl_hitm);
+ default:
+ break;
}

+ #undef DISPLAY_HITM
+
advance_hpp(hpp, ret);

if (c2c_he->stats.store > 0) {
@@ -1255,8 +1283,12 @@ static struct c2c_dimension dim_tot_loads = {
.width = 7,
};

+static struct c2c_header percent_hitm_header[] = {
+ [DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"),
+ [DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"),
+};
+
static struct c2c_dimension dim_percent_hitm = {
- .header = HEADER_LOW("%hitm"),
.name = "percent_hitm",
.cmp = percent_hitm_cmp,
.entry = percent_hitm_entry,
@@ -1648,23 +1680,39 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats)

c2c_he = container_of(he, struct c2c_hist_entry, he);

- if (stats->rmt_hitm) {
- ld_dist = ((double)c2c_he->stats.rmt_hitm / stats->rmt_hitm);
- if (ld_dist < DISPLAY_LINE_LIMIT)
- he->filtered = HIST_FILTER__C2C;
- } else {
- he->filtered = HIST_FILTER__C2C;
+#define FILTER_HITM(__h) \
+ if (stats->__h) { \
+ ld_dist = ((double)c2c_he->stats.__h / stats->__h); \
+ if (ld_dist < DISPLAY_LINE_LIMIT) \
+ he->filtered = HIST_FILTER__C2C; \
+ } else { \
+ he->filtered = HIST_FILTER__C2C; \
}

+ switch (c2c.display) {
+ case DISPLAY_LCL:
+ FILTER_HITM(lcl_hitm);
+ break;
+ case DISPLAY_RMT:
+ FILTER_HITM(rmt_hitm);
+ default:
+ break;
+ };
+
+#undef FILTER_HITM
+
return he->filtered == 0;
}

static inline int valid_hitm_or_store(struct hist_entry *he)
{
struct c2c_hist_entry *c2c_he;
+ bool has_hitm;

c2c_he = container_of(he, struct c2c_hist_entry, he);
- return c2c_he->stats.rmt_hitm || c2c_he->stats.store;
+ has_hitm = c2c.display == DISPLAY_LCL ?
+ c2c_he->stats.lcl_hitm : c2c_he->stats.rmt_hitm;
+ return has_hitm || c2c_he->stats.store;
}

static int filter_cb(struct hist_entry *he)
@@ -1945,6 +1993,8 @@ static void print_c2c_info(FILE *out, struct perf_session *session)
perf_evsel__name(evsel));
first = false;
}
+ fprintf(out, " Cachelines sort on : %s HITMs\n",
+ c2c.display == DISPLAY_LCL ? "Local" : "Remote");
}

static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session)
@@ -2077,8 +2127,10 @@ static int perf_c2c_browser__title(struct hist_browser *browser,
char *bf, size_t size)
{
scnprintf(bf, size,
- "Shared Data Cache Line Table "
- "(%lu entries)", browser->nr_non_filtered_entries);
+ "Shared Data Cache Line Table "
+ "(%lu entries, sorted on %s HITMs)",
+ browser->nr_non_filtered_entries,
+ c2c.display == DISPLAY_LCL ? "local" : "remote");
return 0;
}

@@ -2150,6 +2202,8 @@ static void ui_quirks(void)
dim_offset.width = 5;
dim_offset.header = header_offset_tui;
}
+
+ dim_percent_hitm.header = percent_hitm_header[c2c.display];
}

#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
@@ -2204,6 +2258,22 @@ static int setup_callchain(struct perf_evlist *evlist)
return 0;
}

+static int setup_display(const char *str)
+{
+ const char *display = str ?: "rmt";
+
+ if (!strcmp(display, "rmt"))
+ c2c.display = DISPLAY_RMT;
+ else if (!strcmp(display, "lcl"))
+ c2c.display = DISPLAY_LCL;
+ else {
+ pr_err("failed: unknown display type: %s\n", str);
+ return -1;
+ }
+
+ return 0;
+}
+
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
@@ -2212,6 +2282,7 @@ static int perf_c2c__report(int argc, const char **argv)
.mode = PERF_DATA_MODE_READ,
};
char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
+ const char *display = NULL;
const struct option c2c_options[] = {
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
@@ -2230,6 +2301,7 @@ static int perf_c2c__report(int argc, const char **argv)
"print_type,threshold[,print_limit],order,sort_key[,branch],value",
callchain_help, &parse_callchain_opt,
callchain_default_opt),
+ OPT_STRING('d', "display", &display, NULL, "lcl,rmt"),
OPT_END()
};
int err = 0;
@@ -2254,6 +2326,10 @@ static int perf_c2c__report(int argc, const char **argv)

file.path = input_name;

+ err = setup_display(display);
+ if (err)
+ goto out;
+
err = c2c_hists__init(&c2c.hists, "dcacheline", 2);
if (err) {
pr_debug("Failed to initialize hists\n");
@@ -2301,7 +2377,7 @@ static int perf_c2c__report(int argc, const char **argv)
"tot_loads,"
"ld_fbhit,ld_l1hit,ld_l2hit,"
"ld_lclhit,ld_rmthit",
- "rmt_hitm"
+ c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm"
);

ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:07 AM9/22/16
to
Adding hitm/store percent dimension key wrappers.

They are to be displayed in the single cacheline output:

percent_rmt_hitm, percent_lcl_hitm, percent_stores_l1hit, percent_stores_l1miss

They display percentage of HITMs/stores for specific
offset in the cacheline.

Link: http://lkml.kernel.org/n/tip-t365aosxtd...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 206 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 206 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index e24472f100c6..b2992cf96130 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -639,6 +639,171 @@ percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
return per_left - per_right;
}

+static struct c2c_stats *he_stats(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ return &c2c_he->stats;
+}
+
+static struct c2c_stats *total_stats(struct hist_entry *he)
+{
+ struct c2c_hists *hists;
+
+ hists = container_of(he->hists, struct c2c_hists, hists);
+ return &hists->stats;
+}
+
+static double percent(int st, int tot)
+{
+ return tot ? 100. * (double) st / (double) tot : 0;
+}
+
+#define PERCENT(__h, __f) percent(he_stats(__h)->__f, total_stats(__h)->__f)
+
+#define PERCENT_FN(__f) \
+static double percent_ ## __f(struct c2c_hist_entry *c2c_he) \
+{ \
+ struct c2c_hists *hists; \
+ \
+ hists = container_of(c2c_he->he.hists, struct c2c_hists, hists); \
+ return percent(c2c_he->stats.__f, hists->stats.__f); \
+}
+
+PERCENT_FN(rmt_hitm)
+PERCENT_FN(lcl_hitm)
+PERCENT_FN(st_l1hit)
+PERCENT_FN(st_l1miss)
+
+static int
+percent_rmt_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per = PERCENT(he, rmt_hitm);
+ char buf[10];
+
+ snprintf(buf, 10, "%.2F%%", per);
+ return snprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+static int
+percent_rmt_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_rmt_hitm);
+}
+
+static int64_t
+percent_rmt_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ double per_left;
+ double per_right;
+
+ per_left = PERCENT(left, lcl_hitm);
+ per_right = PERCENT(right, lcl_hitm);
+
+ return per_left - per_right;
+}
+
+static int
+percent_lcl_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per = PERCENT(he, lcl_hitm);
+ char buf[10];
+
+ snprintf(buf, 10, "%.2F%%", per);
+ return snprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+static int
+percent_lcl_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_lcl_hitm);
+}
+
+static int64_t
+percent_lcl_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ double per_left;
+ double per_right;
+
+ per_left = PERCENT(left, lcl_hitm);
+ per_right = PERCENT(right, lcl_hitm);
+
+ return per_left - per_right;
+}
+
+static int
+percent_stores_l1hit_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per = PERCENT(he, st_l1hit);
+ char buf[10];
+
+ snprintf(buf, 10, "%.2F%%", per);
+ return snprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+static int
+percent_stores_l1hit_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_st_l1hit);
+}
+
+static int64_t
+percent_stores_l1hit_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ double per_left;
+ double per_right;
+
+ per_left = PERCENT(left, st_l1hit);
+ per_right = PERCENT(right, st_l1hit);
+
+ return per_left - per_right;
+}
+
+static int
+percent_stores_l1miss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per = PERCENT(he, st_l1miss);
+ char buf[10];
+
+ snprintf(buf, 10, "%.2F%%", per);
+ return snprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+static int
+percent_stores_l1miss_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_st_l1miss);
+}
+
+static int64_t
+percent_stores_l1miss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ double per_left;
+ double per_right;
+
+ per_left = PERCENT(left, st_l1miss);
+ per_right = PERCENT(right, st_l1miss);
+
+ return per_left - per_right;
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -851,6 +1016,42 @@ static struct c2c_dimension dim_percent_hitm = {
.width = 7,
};

+static struct c2c_dimension dim_percent_rmt_hitm = {
+ .header = HEADER_SPAN("----- HITM -----", "Rmt", 1),
+ .name = "percent_rmt_hitm",
+ .cmp = percent_rmt_hitm_cmp,
+ .entry = percent_rmt_hitm_entry,
+ .color = percent_rmt_hitm_color,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_percent_lcl_hitm = {
+ .header = HEADER_SPAN_LOW("Lcl"),
+ .name = "percent_lcl_hitm",
+ .cmp = percent_lcl_hitm_cmp,
+ .entry = percent_lcl_hitm_entry,
+ .color = percent_lcl_hitm_color,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_percent_stores_l1hit = {
+ .header = HEADER_SPAN("-- Store Refs --", "L1 Hit", 1),
+ .name = "percent_stores_l1hit",
+ .cmp = percent_stores_l1hit_cmp,
+ .entry = percent_stores_l1hit_entry,
+ .color = percent_stores_l1hit_color,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_percent_stores_l1miss = {
+ .header = HEADER_SPAN_LOW("L1 Miss"),
+ .name = "percent_stores_l1miss",
+ .cmp = percent_stores_l1miss_cmp,
+ .entry = percent_stores_l1miss_entry,
+ .color = percent_stores_l1miss_color,
+ .width = 7,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -874,6 +1075,10 @@ static struct c2c_dimension *dimensions[] = {
&dim_tot_recs,
&dim_tot_loads,
&dim_percent_hitm,
+ &dim_percent_rmt_hitm,
+ &dim_percent_lcl_hitm,
+ &dim_percent_stores_l1hit,
+ &dim_percent_stores_l1miss,
NULL,
};

@@ -961,6 +1166,7 @@ static struct c2c_fmt *get_format(const char *name)

fmt->cmp = dim->se ? c2c_se_cmp : dim->cmp;
fmt->sort = dim->se ? c2c_se_cmp : dim->cmp;
+ fmt->color = dim->se ? NULL : dim->color;
fmt->entry = dim->se ? c2c_se_entry : dim->entry;
fmt->header = c2c_header;
fmt->width = c2c_width;
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:08 AM9/22/16
to
Adding iaddr dimension key support. It displays
code address (as hex number) responsible for the
accesses.

Using c2c wrapper to standard 'symbol_iaddr' object
to define own header and simple (just address) code
address output.

Link: http://lkml.kernel.org/n/tip-rhshygbst6...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 7c52481ec36b..a2881677c315 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -326,6 +326,26 @@ offset_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
return (int64_t)(r - l);
}

+static int
+iaddr_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ uint64_t addr = 0;
+ int width = c2c_width(fmt, hpp, he->hists);
+
+ if (he->mem_info)
+ addr = he->mem_info->iaddr.addr;
+
+ return snprintf(hpp->buf, hpp->size, "%*s", width, hex_str(addr));
+}
+
+static int64_t
+iaddr_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return sort__iaddr_cmp(left, right);
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -377,9 +397,18 @@ static struct c2c_dimension dim_offset = {
.width = 18,
};

+static struct c2c_dimension dim_iaddr = {
+ .header = HEADER_LOW("Code address"),
+ .name = "iaddr",
+ .cmp = iaddr_cmp,
+ .entry = iaddr_entry,
+ .width = 18,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
+ &dim_iaddr,
NULL,
};

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:08 AM9/22/16
to
Display global stats table as part of the stdio output
or when --stats option is speicified:

$ perf c2c report --stats
=================================================
Trace Event Information
=================================================
Total records : 41237
Locked Load/Store Operations : 4075
Load Operations : 20526
Loads - uncacheable : 0
Loads - IO : 0
Loads - Miss : 552
Loads - no mapping : 31
Load Fill Buffer Hit : 7333
Load L1D hit : 6398
Load L2D hit : 144
Load LLC hit : 4889
Load Local HITM : 1185
Load Remote HITM : 838
Load Remote HIT : 52
Load Local DRAM : 183
Load Remote DRAM : 106
Load MESI State Exclusive : 289
Load MESI State Shared : 0
Load LLC Misses : 1179
LLC Misses to Local DRAM : 15.5%
LLC Misses to Remote DRAM : 9.0%
LLC Misses to Remote cache (HIT) : 4.4%
LLC Misses to Remote cache (HITM) : 71.1%
Store Operations : 20711
Store - uncacheable : 0
Store - no mapping : 1
Store L1D Hit : 20158
Store L1D Miss : 552
No Page Map Rejects : 7
Unable to parse data source : 0

Original-patch-by: Dick Fowles <rfo...@redhat.com>
Original-patch-by: Don Zickus <dzi...@redhat.com>
Link: http://lkml.kernel.org/n/tip-qkyvao3qsr...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 56 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index a89aa8408c45..49a3af556fa4 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -55,6 +55,7 @@ struct perf_c2c {

bool show_src;
bool use_stdio;
+ bool stats_only;
};

static struct perf_c2c c2c;
@@ -1728,6 +1729,51 @@ static int setup_nodes(struct perf_session *session)
return 0;
}

+static void print_c2c__display_stats(FILE *out)
+{
+ int llc_misses;
+ struct c2c_stats *stats = &c2c.hists.stats;
+
+ llc_misses = stats->lcl_dram +
+ stats->rmt_dram +
+ stats->rmt_hit +
+ stats->rmt_hitm;
+
+ fprintf(out, "=================================================\n");
+ fprintf(out, " Trace Event Information \n");
+ fprintf(out, "=================================================\n");
+ fprintf(out, " Total records : %10d\n", stats->nr_entries);
+ fprintf(out, " Locked Load/Store Operations : %10d\n", stats->locks);
+ fprintf(out, " Load Operations : %10d\n", stats->load);
+ fprintf(out, " Loads - uncacheable : %10d\n", stats->ld_uncache);
+ fprintf(out, " Loads - IO : %10d\n", stats->ld_io);
+ fprintf(out, " Loads - Miss : %10d\n", stats->ld_miss);
+ fprintf(out, " Loads - no mapping : %10d\n", stats->ld_noadrs);
+ fprintf(out, " Load Fill Buffer Hit : %10d\n", stats->ld_fbhit);
+ fprintf(out, " Load L1D hit : %10d\n", stats->ld_l1hit);
+ fprintf(out, " Load L2D hit : %10d\n", stats->ld_l2hit);
+ fprintf(out, " Load LLC hit : %10d\n", stats->ld_llchit + stats->lcl_hitm);
+ fprintf(out, " Load Local HITM : %10d\n", stats->lcl_hitm);
+ fprintf(out, " Load Remote HITM : %10d\n", stats->rmt_hitm);
+ fprintf(out, " Load Remote HIT : %10d\n", stats->rmt_hit);
+ fprintf(out, " Load Local DRAM : %10d\n", stats->lcl_dram);
+ fprintf(out, " Load Remote DRAM : %10d\n", stats->rmt_dram);
+ fprintf(out, " Load MESI State Exclusive : %10d\n", stats->ld_excl);
+ fprintf(out, " Load MESI State Shared : %10d\n", stats->ld_shared);
+ fprintf(out, " Load LLC Misses : %10d\n", llc_misses);
+ fprintf(out, " LLC Misses to Local DRAM : %10.1f%%\n", ((double)stats->lcl_dram/(double)llc_misses) * 100.);
+ fprintf(out, " LLC Misses to Remote DRAM : %10.1f%%\n", ((double)stats->rmt_dram/(double)llc_misses) * 100.);
+ fprintf(out, " LLC Misses to Remote cache (HIT) : %10.1f%%\n", ((double)stats->rmt_hit /(double)llc_misses) * 100.);
+ fprintf(out, " LLC Misses to Remote cache (HITM) : %10.1f%%\n", ((double)stats->rmt_hitm/(double)llc_misses) * 100.);
+ fprintf(out, " Store Operations : %10d\n", stats->store);
+ fprintf(out, " Store - uncacheable : %10d\n", stats->st_uncache);
+ fprintf(out, " Store - no mapping : %10d\n", stats->st_noadrs);
+ fprintf(out, " Store L1D Hit : %10d\n", stats->st_l1hit);
+ fprintf(out, " Store L1D Miss : %10d\n", stats->st_l1miss);
+ fprintf(out, " No Page Map Rejects : %10d\n", stats->nomap);
+ fprintf(out, " Unable to parse data source : %10d\n", stats->noparse);
+}
+
static void print_cacheline(struct c2c_hists *c2c_hists,
struct hist_entry *he_cl,
struct perf_hpp_list *hpp_list,
@@ -1791,6 +1837,11 @@ static void perf_c2c__hists_fprintf(FILE *out)
{
setup_pager();

+ print_c2c__display_stats(out);
+
+ if (c2c.stats_only)
+ return;
+
fprintf(out, "\n");
fprintf(out, "=================================================\n");
fprintf(out, " Shared Data Cache Line Table \n");
@@ -2002,6 +2053,8 @@ static int perf_c2c__report(int argc, const char **argv)
#ifdef HAVE_SLANG_SUPPORT
OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"),
#endif
+ OPT_BOOLEAN(0, "stats", &c2c.stats_only,
+ "Use the stdio interface"),
OPT_END()
};
int err = 0;
@@ -2011,6 +2064,9 @@ static int perf_c2c__report(int argc, const char **argv)
if (argc)
usage_with_options(report_c2c_usage, c2c_options);

+ if (c2c.stats_only)
+ c2c.use_stdio = true;
+
if (c2c.use_stdio)
use_browser = 0;
else
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:09 AM9/22/16
to
Adding cpu count dimension key wrapper.

It is to be displayed in the single cacheline output:

cpucnt

It displays number of distinct cpus that hit cacheline.

Link: http://lkml.kernel.org/n/tip-ib2kdwam52...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 5d42f7912450..2318249362f8 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1012,6 +1012,20 @@ MEAN_ENTRY(mean_rmt_entry, rmt_hitm);
MEAN_ENTRY(mean_lcl_entry, lcl_hitm);
MEAN_ENTRY(mean_load_entry, load);

+static int
+cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[10];
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+ snprintf(buf, 10, "%d", bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt));
+ return snprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -1338,6 +1352,14 @@ static struct c2c_dimension dim_mean_load = {
.width = 8,
};

+static struct c2c_dimension dim_cpucnt = {
+ .header = HEADER_BOTH("cpu", "cnt"),
+ .name = "cpucnt",
+ .cmp = empty_cmp,
+ .entry = cpucnt_entry,
+ .width = 8,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -1375,6 +1397,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_mean_rmt,
&dim_mean_lcl,
&dim_mean_load,
+ &dim_cpucnt,
NULL,
};

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:09 AM9/22/16
to
Adding statistic dimension key wrapper.

It is to be displayed in the single cacheline output:

median, mean_rmt, mean_lcl, mean_load, stddev

It displays statistics hits related to cacheline accesses.

Link: http://lkml.kernel.org/n/tip-m1r4uc9lcy...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 80 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 3ffe051f377d..5d42f7912450 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -20,11 +20,20 @@ struct c2c_hists {
struct c2c_stats stats;
};

+struct compute_stats {
+ struct stats lcl_hitm;
+ struct stats rmt_hitm;
+ struct stats load;
+};
+
struct c2c_hist_entry {
struct c2c_hists *hists;
struct c2c_stats stats;
unsigned long *cpuset;
struct c2c_stats *node_stats;
+
+ struct compute_stats cstats;
+
/*
* must be at the end,
* because of its callchain dynamic entry
@@ -61,6 +70,10 @@ static void *c2c_he_zalloc(size_t size)
if (!c2c_he->node_stats)
return NULL;

+ init_stats(&c2c_he->cstats.lcl_hitm);
+ init_stats(&c2c_he->cstats.rmt_hitm);
+ init_stats(&c2c_he->cstats.load);
+
return &c2c_he->he;
}

@@ -120,6 +133,20 @@ static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
set_bit(sample->cpu, c2c_he->cpuset);
}

+static void compute_stats(struct c2c_hist_entry *c2c_he,
+ struct c2c_stats *stats,
+ u64 weight)
+{
+ struct compute_stats *cstats = &c2c_he->cstats;
+
+ if (stats->rmt_hitm)
+ update_stats(&cstats->rmt_hitm, weight);
+ else if (stats->lcl_hitm)
+ update_stats(&cstats->lcl_hitm, weight);
+ else if (stats->load)
+ update_stats(&cstats->load, weight);
+}
+
static int process_sample_event(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -198,6 +225,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
c2c_add_stats(&c2c_hists->stats, &stats);
c2c_add_stats(&c2c_he->node_stats[node], &stats);

+ compute_stats(c2c_he, &stats, sample->weight);
+
c2c_he__set_cpu(c2c_he, sample);

hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
@@ -959,6 +988,30 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
return 0;
}

+static int
+mean_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he, double mean)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[10];
+
+ snprintf(buf, 10, "%6.0f", mean);
+ return snprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+#define MEAN_ENTRY(__func, __val) \
+static int \
+__func(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ struct c2c_hist_entry *c2c_he; \
+ c2c_he = container_of(he, struct c2c_hist_entry, he); \
+ return mean_entry(fmt, hpp, he, avg_stats(&c2c_he->cstats.__val)); \
+}
+
+MEAN_ENTRY(mean_rmt_entry, rmt_hitm);
+MEAN_ENTRY(mean_lcl_entry, lcl_hitm);
+MEAN_ENTRY(mean_load_entry, load);
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -1261,6 +1314,30 @@ static struct c2c_dimension dim_node = {
.width = 4,
};

+static struct c2c_dimension dim_mean_rmt = {
+ .header = HEADER_SPAN("---------- cycles ----------", "rmt hitm", 2),
+ .name = "mean_rmt",
+ .cmp = empty_cmp,
+ .entry = mean_rmt_entry,
+ .width = 8,
+};
+
+static struct c2c_dimension dim_mean_lcl = {
+ .header = HEADER_SPAN_LOW("lcl hitm"),
+ .name = "mean_lcl",
+ .cmp = empty_cmp,
+ .entry = mean_lcl_entry,
+ .width = 8,
+};
+
+static struct c2c_dimension dim_mean_load = {
+ .header = HEADER_SPAN_LOW("load"),
+ .name = "mean_load",
+ .cmp = empty_cmp,
+ .entry = mean_load_entry,
+ .width = 8,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -1295,6 +1372,9 @@ static struct c2c_dimension *dimensions[] = {
&dim_symbol,
&dim_dso,
&dim_node,
+ &dim_mean_rmt,
+ &dim_mean_lcl,
+ &dim_mean_load,
NULL,
};

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:09 AM9/22/16
to
Adding pid dimension key wrapper.

It is to be displayed in the single cacheline output:

pid

We currently don't have a single 'pid' sort/display entry,
which would output just pid number, hence adding it into
c2c code.

Link: http://lkml.kernel.org/n/tip-3o23qrspxc...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 030a33a93caf..20b5e7f64412 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -807,6 +807,22 @@ percent_stores_l1miss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
STAT_FN(lcl_dram)
STAT_FN(rmt_dram)

+static int
+pid_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+
+ return snprintf(hpp->buf, hpp->size, "%*d", width, he->thread->pid_);
+}
+
+static int64_t
+pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return left->thread->pid_ - right->thread->pid_;
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -1071,6 +1087,14 @@ static struct c2c_dimension dim_dram_rmt = {
.width = 8,
};

+static struct c2c_dimension dim_pid = {
+ .header = HEADER_LOW("Pid"),
+ .name = "pid",
+ .cmp = pid_cmp,
+ .entry = pid_entry,
+ .width = 7,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -1100,6 +1124,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_percent_stores_l1miss,
&dim_dram_lcl,
&dim_dram_rmt,
+ &dim_pid,
NULL,
};

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:10 AM9/22/16
to
Using resort callbacks to compute the columns' width.

Computing only the global ones, c2c entries have fixed
width only.

Link: http://lkml.kernel.org/n/tip-zyayvq2u3d...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 12 ++++++++++++
1 file changed, 12 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index ee64537493ce..c7fe81d49016 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1722,11 +1722,21 @@ static inline int valid_hitm_or_store(struct hist_entry *he)
return has_hitm || c2c_he->stats.store;
}

+static void calc_width(struct hist_entry *he)
+{
+ struct c2c_hists *c2c_hists;
+
+ c2c_hists = container_of(he->hists, struct c2c_hists, hists);
+ hists__calc_col_len(&c2c_hists->hists, he);
+}
+
static int filter_cb(struct hist_entry *he)
{
if (c2c.show_src && !he->srcline)
he->srcline = hist_entry__get_srcline(he);

+ calc_width(he);
+
if (!valid_hitm_or_store(he))
he->filtered = HIST_FILTER__C2C;

@@ -1742,6 +1752,8 @@ static int resort_cl_cb(struct hist_entry *he)
c2c_he = container_of(he, struct c2c_hist_entry, he);
c2c_hists = c2c_he->hists;

+ calc_width(he);
+
if (display && c2c_hists) {
c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort);

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:11 AM9/22/16
to
Adding 3 loads related dimension key wrappers.

They are to be displayed in the main cachelines
overall output:

ld_fbhit, ld_l1hit, ld_l2hit

They all display bare numbers of loads for
FB (Fill Buffer), L1 and L2 cache.

Link: http://lkml.kernel.org/n/tip-wxrzhy74zl...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 7c62a749eaca..698afbf6330f 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -412,6 +412,9 @@ STAT_FN(lcl_hitm)
STAT_FN(store)
STAT_FN(st_l1hit)
STAT_FN(st_l1miss)
+STAT_FN(ld_fbhit)
+STAT_FN(ld_l1hit)
+STAT_FN(ld_l2hit)

#define HEADER_LOW(__h) \
{ \
@@ -552,6 +555,30 @@ static struct c2c_dimension dim_cl_stores_l1miss = {
.width = 7,
};

+static struct c2c_dimension dim_ld_fbhit = {
+ .header = HEADER_SPAN("----- Core Load Hit -----", "FB", 2),
+ .name = "ld_fbhit",
+ .cmp = ld_fbhit_cmp,
+ .entry = ld_fbhit_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_ld_l1hit = {
+ .header = HEADER_SPAN_LOW("L1"),
+ .name = "ld_l1hit",
+ .cmp = ld_l1hit_cmp,
+ .entry = ld_l1hit_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_ld_l2hit = {
+ .header = HEADER_SPAN_LOW("L2"),
+ .name = "ld_l2hit",
+ .cmp = ld_l2hit_cmp,
+ .entry = ld_l2hit_entry,
+ .width = 7,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -566,6 +593,9 @@ static struct c2c_dimension *dimensions[] = {
&dim_stores_l1miss,
&dim_cl_stores_l1hit,
&dim_cl_stores_l1miss,
+ &dim_ld_fbhit,
+ &dim_ld_l1hit,
+ &dim_ld_l2hit,
NULL,
};

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:50:17 AM9/22/16
to
Adding node dimension key wrapper.

It is to be displayed in the single cacheline output:

node

It displays nodes hits related to cacheline accesses.

The node filed comes in 3 flavors:
- node IDs separated by ','
- node IDs with stats for each ID, in following format:
Node{cpus %hitms %stores}
- node IDs with list of affected CPUs in following format:
Node{cpu list}

User can switch the flavor with -N option (-NN,-NNN).
It will be available in TUI to switch this with 'n' key.

Link: http://lkml.kernel.org/n/tip-6742e6g0r7...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 219 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 219 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index eba46b94b69e..3ffe051f377d 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1,6 +1,7 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/stringify.h>
+#include <asm/bug.h>
#include "util.h"
#include "debug.h"
#include "builtin.h"
@@ -22,6 +23,8 @@ struct c2c_hists {
struct c2c_hist_entry {
struct c2c_hists *hists;
struct c2c_stats stats;
+ unsigned long *cpuset;
+ struct c2c_stats *node_stats;
/*
* must be at the end,
* because of its callchain dynamic entry
@@ -32,6 +35,12 @@ struct c2c_hist_entry {
struct perf_c2c {
struct perf_tool tool;
struct c2c_hists hists;
+
+ unsigned long **nodes;
+ int nodes_cnt;
+ int cpus_cnt;
+ int *cpu2node;
+ int node_info;
};

static struct perf_c2c c2c;
@@ -44,6 +53,14 @@ static void *c2c_he_zalloc(size_t size)
if (!c2c_he)
return NULL;

+ c2c_he->cpuset = bitmap_alloc(c2c.cpus_cnt);
+ if (!c2c_he->cpuset)
+ return NULL;
+
+ c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats));
+ if (!c2c_he->node_stats)
+ return NULL;
+
return &c2c_he->he;
}

@@ -57,6 +74,8 @@ static void c2c_he_free(void *he)
free(c2c_he->hists);
}

+ free(c2c_he->cpuset);
+ free(c2c_he->node_stats);
free(c2c_he);
}

@@ -91,6 +110,16 @@ he__get_c2c_hists(struct hist_entry *he,
return hists;
}

+static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
+ struct perf_sample *sample)
+{
+ if (WARN_ONCE(sample->cpu == (unsigned int) -1,
+ "WARNING: no sample cpu value"))
+ return;
+
+ set_bit(sample->cpu, c2c_he->cpuset);
+}
+
static int process_sample_event(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -131,10 +160,23 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
c2c_add_stats(&c2c_he->stats, &stats);
c2c_add_stats(&c2c_hists->stats, &stats);

+ c2c_he__set_cpu(c2c_he, sample);
+
hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
ret = hist_entry__append_callchain(he, sample);

if (!ret) {
+ /*
+ * There's already been warning about missing
+ * sample's cpu value. Let's account all to
+ * node 0 in this case, without any further
+ * warning.
+ *
+ * Doing node stats only for single callchain data.
+ */
+ int cpu = sample->cpu == (unsigned int) -1 ? 0 : sample->cpu;
+ int node = c2c.cpu2node[cpu];
+
mi = mi_dup;

mi_dup = memdup(mi, sizeof(*mi));
@@ -154,6 +196,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
c2c_he = container_of(he, struct c2c_hist_entry, he);
c2c_add_stats(&c2c_he->stats, &stats);
c2c_add_stats(&c2c_hists->stats, &stats);
+ c2c_add_stats(&c2c_he->node_stats[node], &stats);
+
+ c2c_he__set_cpu(c2c_he, sample);

hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
ret = hist_entry__append_callchain(he, sample);
@@ -823,6 +868,97 @@ pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
return left->thread->pid_ - right->thread->pid_;
}

+static int64_t
+empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left __maybe_unused,
+ struct hist_entry *right __maybe_unused)
+{
+ return 0;
+}
+
+static int
+node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ bool first = true;
+ int node;
+ int ret = 0;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+ for (node = 0; node < c2c.nodes_cnt; node++) {
+ DECLARE_BITMAP(set, c2c.cpus_cnt);
+
+ bitmap_zero(set, c2c.cpus_cnt);
+ bitmap_and(set, c2c_he->cpuset, c2c.nodes[node], c2c.cpus_cnt);
+
+ if (!bitmap_weight(set, c2c.cpus_cnt)) {
+ if (c2c.node_info == 1) {
+ ret = scnprintf(hpp->buf, hpp->size, "%21s", " ");
+ advance_hpp(hpp, ret);
+ }
+ continue;
+ }
+
+ if (!first) {
+ ret = scnprintf(hpp->buf, hpp->size, " ");
+ advance_hpp(hpp, ret);
+ }
+
+ switch (c2c.node_info) {
+ case 0:
+ ret = scnprintf(hpp->buf, hpp->size, "%2d", node);
+ advance_hpp(hpp, ret);
+ break;
+ case 1:
+ {
+ int num = bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt);
+ struct c2c_stats *stats = &c2c_he->node_stats[node];
+
+ ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num);
+ advance_hpp(hpp, ret);
+
+
+ if (c2c_he->stats.rmt_hitm > 0) {
+ ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ",
+ percent(stats->rmt_hitm, c2c_he->stats.rmt_hitm));
+ } else {
+ ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a");
+ }
+
+ advance_hpp(hpp, ret);
+
+ if (c2c_he->stats.store > 0) {
+ ret = scnprintf(hpp->buf, hpp->size, "%5.1f%%}",
+ percent(stats->store, c2c_he->stats.store));
+ } else {
+ ret = scnprintf(hpp->buf, hpp->size, "%6s}", "n/a");
+ }
+
+ advance_hpp(hpp, ret);
+ break;
+ }
+ case 2:
+ ret = scnprintf(hpp->buf, hpp->size, "%2d{", node);
+ advance_hpp(hpp, ret);
+
+ ret = bitmap_scnprintf(set, c2c.cpus_cnt, hpp->buf, hpp->size);
+ advance_hpp(hpp, ret);
+
+ ret = scnprintf(hpp->buf, hpp->size, "}");
+ advance_hpp(hpp, ret);
+ break;
+ default:
+ break;
+ }
+
+ first = false;
+ }
+
+ return 0;
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -1112,6 +1248,19 @@ static struct c2c_dimension dim_dso = {
.se = &sort_dso,
};

+static struct c2c_header header_node[3] = {
+ HEADER_LOW("Node"),
+ HEADER_LOW("Node{cpus %hitms %stores}"),
+ HEADER_LOW("Node{cpu list}"),
+};
+
+static struct c2c_dimension dim_node = {
+ .name = "node",
+ .cmp = empty_cmp,
+ .entry = node_entry,
+ .width = 4,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -1145,6 +1294,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_tid,
&dim_symbol,
&dim_dso,
+ &dim_node,
NULL,
};

@@ -1371,6 +1521,68 @@ static int resort_cl_cb(struct hist_entry *he)
return 0;
}

+static void setup_nodes_header(void)
+{
+ dim_node.header = header_node[c2c.node_info];
+}
+
+static int setup_nodes(struct perf_session *session)
+{
+ struct numa_node *n;
+ unsigned long **nodes;
+ int node, cpu;
+ int *cpu2node;
+
+ if (c2c.node_info > 2)
+ c2c.node_info = 2;
+
+ c2c.nodes_cnt = session->header.env.nr_numa_nodes;
+ c2c.cpus_cnt = session->header.env.nr_cpus_online;
+
+ n = session->header.env.numa_nodes;
+ if (!n)
+ return -EINVAL;
+
+ nodes = zalloc(sizeof(unsigned long *) * c2c.nodes_cnt);
+ if (!nodes)
+ return -ENOMEM;
+
+ c2c.nodes = nodes;
+
+ cpu2node = zalloc(sizeof(int) * c2c.cpus_cnt);
+ if (!cpu2node)
+ return -ENOMEM;
+
+ for (cpu = 0; cpu < c2c.cpus_cnt; cpu++)
+ cpu2node[cpu] = -1;
+
+ c2c.cpu2node = cpu2node;
+
+ for (node = 0; node < c2c.nodes_cnt; node++) {
+ struct cpu_map *map = n[node].map;
+ unsigned long *set;
+
+ set = bitmap_alloc(c2c.cpus_cnt);
+ if (!set)
+ return -ENOMEM;
+
+ for (cpu = 0; cpu < map->nr; cpu++) {
+ set_bit(map->map[cpu], set);
+
+ if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug"))
+ return -EINVAL;
+
+ cpu2node[map->map[cpu]] = node;
+ }
+
+ nodes[node] = set;
+ }
+
+ setup_nodes_header();
+ return 0;
+}
+
+
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
@@ -1385,6 +1597,8 @@ static int perf_c2c__report(int argc, const char **argv)
"be more verbose (show counter open errors, etc)"),
OPT_STRING('i', "input", &input_name, "file",
"the input file to process"),
+ OPT_INCR('N', "node-info", &c2c.node_info,
+ "show extra node info in report (repeat for more info)"),
OPT_END()
};
int err = 0;
@@ -1410,6 +1624,11 @@ static int perf_c2c__report(int argc, const char **argv)
pr_debug("No memory for session\n");
goto out;
}
+ err = setup_nodes(session);
+ if (err) {
+ pr_err("Failed setup nodes\n");
+ goto out;
+ }

if (symbol__init(&session->header.env) < 0)
goto out_session;
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 11:51:46 AM9/22/16
to
Adding 2 LLC load related dimension key wrappers.

They are to be displayed in the main cachelines
overall output:

ld_lclhit, ld_rmthit

They display bare numbers of LLC and remote loads
for cacheline.

Link: http://lkml.kernel.org/n/tip-ahjg0voauf...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 698afbf6330f..893b813904bc 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -415,6 +415,8 @@ STAT_FN(st_l1miss)
STAT_FN(ld_fbhit)
STAT_FN(ld_l1hit)
STAT_FN(ld_l2hit)
+STAT_FN(ld_llchit)
+STAT_FN(rmt_hit)

#define HEADER_LOW(__h) \
{ \
@@ -579,6 +581,22 @@ static struct c2c_dimension dim_ld_l2hit = {
.width = 7,
};

+static struct c2c_dimension dim_ld_llchit = {
+ .header = HEADER_SPAN("-- LLC Load Hit --", "Llc", 1),
+ .name = "ld_lclhit",
+ .cmp = ld_llchit_cmp,
+ .entry = ld_llchit_entry,
+ .width = 8,
+};
+
+static struct c2c_dimension dim_ld_rmthit = {
+ .header = HEADER_SPAN_LOW("Rmt"),
+ .name = "ld_rmthit",
+ .cmp = rmt_hit_cmp,
+ .entry = rmt_hit_entry,
+ .width = 8,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -596,6 +614,8 @@ static struct c2c_dimension *dimensions[] = {
&dim_ld_fbhit,
&dim_ld_l1hit,
&dim_ld_l2hit,
+ &dim_ld_llchit,
+ &dim_ld_rmthit,
NULL,
};

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 12:00:05 PM9/22/16
to
Will be used from external places in following patches.

Link: http://lkml.kernel.org/n/tip-4jyvw21cac...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/util/sort.c | 6 +++---
tools/perf/util/sort.h | 6 ++++++
2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 9f7c1ea9e3ad..452e15a10dd2 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -867,7 +867,7 @@ struct sort_entry sort_cycles = {
};

/* --sort daddr_sym */
-static int64_t
+int64_t
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
{
uint64_t l = 0, r = 0;
@@ -896,7 +896,7 @@ static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf,
width);
}

-static int64_t
+int64_t
sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
{
uint64_t l = 0, r = 0;
@@ -1062,7 +1062,7 @@ static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
return repsep_snprintf(bf, size, "%-*s", width, out);
}

-static int64_t
+int64_t
sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
{
u64 l, r;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index e93b0fa43704..099c97557d33 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -274,4 +274,10 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
struct perf_evlist *evlist,
int level);
int output_field_add(struct perf_hpp_list *list, char *tok);
+int64_t
+sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);

Jiri Olsa

unread,
Sep 22, 2016, 12:00:05 PM9/22/16
to
Will be used from external places in following patches.

Link: http://lkml.kernel.org/n/tip-uip4x9u74t...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/ui/stdio/hist.c | 4 ++--
tools/perf/util/hist.h | 2 ++
2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 189665c315cc..a2a50ef4176d 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -373,8 +373,8 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
return 0;
}

-static int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
- struct perf_hpp_list *hpp_list)
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list)
{
const char *sep = symbol_conf.field_sep;
struct perf_hpp_fmt *fmt;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 6150b94e0d23..ecc4c0c5ce18 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -487,5 +487,7 @@ static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list);

#endif /* __PERF_HIST_H */
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 12:00:06 PM9/22/16
to
Will be used from external places in following patches.

Link: http://lkml.kernel.org/n/tip-7garqfmx5i...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/util/sort.c | 2 +-
tools/perf/util/sort.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 1884d7f9b9d2..9e1f6f75a50f 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2748,7 +2748,7 @@ static int setup_output_list(struct perf_hpp_list *list, char *str)
return ret;
}

-static void reset_dimensions(void)
+void reset_dimensions(void)
{
unsigned int i;

diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 9505483cb95c..4efadc1e98c5 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -269,4 +269,5 @@ int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, i
bool is_strict_order(const char *order);

int hpp_dimension__add_output(unsigned col);
+void reset_dimensions(void);

Jiri Olsa

unread,
Sep 22, 2016, 12:00:06 PM9/22/16
to
Introducing c2c_add_stats function helper to
cumulate c2c_stats.

Original-patch-by: Dick Fowles <rfo...@redhat.com>
Original-patch-by: Don Zickus <dzi...@redhat.com>
tools/perf/util/mem-events.c | 30 ++++++++++++++++++++++++++++++
tools/perf/util/mem-events.h | 1 +
2 files changed, 31 insertions(+)

diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 502fcee91973..e50773286ef6 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -366,3 +366,33 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
#undef P
return err;
}
+
+void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
+{
+ stats->nr_entries += add->nr_entries;
+
+ stats->locks += add->locks;
+ stats->store += add->store;
+ stats->st_uncache += add->st_uncache;
+ stats->st_noadrs += add->st_noadrs;
+ stats->st_l1hit += add->st_l1hit;
+ stats->st_l1miss += add->st_l1miss;
+ stats->load += add->load;
+ stats->ld_excl += add->ld_excl;
+ stats->ld_shared += add->ld_shared;
+ stats->ld_uncache += add->ld_uncache;
+ stats->ld_io += add->ld_io;
+ stats->ld_miss += add->ld_miss;
+ stats->ld_noadrs += add->ld_noadrs;
+ stats->ld_fbhit += add->ld_fbhit;
+ stats->ld_l1hit += add->ld_l1hit;
+ stats->ld_l2hit += add->ld_l2hit;
+ stats->ld_llchit += add->ld_llchit;
+ stats->lcl_hitm += add->lcl_hitm;
+ stats->rmt_hitm += add->rmt_hitm;
+ stats->rmt_hit += add->rmt_hit;
+ stats->lcl_dram += add->lcl_dram;
+ stats->rmt_dram += add->rmt_dram;
+ stats->nomap += add->nomap;
+ stats->noparse += add->noparse;
+}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index e111a2a2b18f..faf80403b519 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -68,5 +68,6 @@ struct c2c_stats {

struct hist_entry;
int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi);
+void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add);

#endif /* __PERF_MEM_EVENTS_H */
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 12:00:06 PM9/22/16
to
Adding helping macros to define header objects.
It will be used in following patches, that add
new dimensions.

The c2c report will support 2 line headers, hence
we only define line[0/1] in macros.

Link: http://lkml.kernel.org/n/tip-tkgrfvlw0m...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 7bf6248dbd75..cfa12930b77b 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -271,6 +271,41 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
return scnprintf(hpp->buf, hpp->size, "%*s", width, text);
}

+#define HEADER_LOW(__h) \
+ { \
+ .line[1] = { \
+ .text = __h, \
+ }, \
+ }
+
+#define HEADER_BOTH(__h0, __h1) \
+ { \
+ .line[0] = { \
+ .text = __h0, \
+ }, \
+ .line[1] = { \
+ .text = __h1, \
+ }, \
+ }
+
+#define HEADER_SPAN(__h0, __h1, __s) \
+ { \
+ .line[0] = { \
+ .text = __h0, \
+ .span = __s, \
+ }, \
+ .line[1] = { \
+ .text = __h1, \
+ }, \
+ }
+
+#define HEADER_SPAN_LOW(__h) \
+ { \
+ .line[1] = { \
+ .text = __h, \
+ }, \
+ }
+
static struct c2c_dimension *dimensions[] = {
NULL,
};
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 12:00:06 PM9/22/16
to
Store cacheline related entries in nested hist
object for each cacheline data. Nested entries
are sorted by 'offset' within related cacheline.

We will allow specific sort keys to be configured
for nested cacheline data entries in following
patches.

Link: http://lkml.kernel.org/n/tip-37f751rgqa...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 90 ++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 84 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 29fb9573e292..cd0406ab8b5d 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -61,6 +61,32 @@ static struct hist_entry_ops c2c_entry_ops = {
.free = c2c_he_free,
};

+static int c2c_hists__init(struct c2c_hists *hists,
+ const char *sort);
+
+static struct hists*
+he__get_hists(struct hist_entry *he,
+ const char *sort)
+{
+ struct c2c_hist_entry *c2c_he;
+ struct c2c_hists *hists;
+ int ret;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ if (c2c_he->hists)
+ return &c2c_he->hists->hists;
+
+ hists = c2c_he->hists = zalloc(sizeof(*hists));
+ if (!hists)
+ return NULL;
+
+ ret = c2c_hists__init(hists, sort);
+ if (ret)
+ free(hists);
+
+ return &hists->hists;
+}
+
static int process_sample_event(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -70,7 +96,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
struct hists *hists = &c2c.hists.hists;
struct hist_entry *he;
struct addr_location al;
- struct mem_info *mi;
+ struct mem_info *mi, *mi_dup;
int ret;

if (machine__resolve(machine, &al, sample) < 0) {
@@ -83,19 +109,50 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (mi == NULL)
return -ENOMEM;

+ mi_dup = memdup(mi, sizeof(*mi));
+ if (!mi_dup)
+ goto free_mi;
+
he = hists__add_entry_ops(hists, &c2c_entry_ops,
&al, NULL, NULL, mi,
sample, true);
- if (he == NULL) {
- free(mi);
- return -ENOMEM;
- }
+ if (he == NULL)
+ goto free_mi_dup;

hists__inc_nr_samples(hists, he->filtered);
ret = hist_entry__append_callchain(he, sample);

+ if (!ret) {
+ mi = mi_dup;
+
+ mi_dup = memdup(mi, sizeof(*mi));
+ if (!mi_dup)
+ goto free_mi;
+
+ hists = he__get_hists(he, "offset");
+ if (!hists)
+ goto free_mi_dup;
+
+ he = hists__add_entry_ops(hists, &c2c_entry_ops,
+ &al, NULL, NULL, mi,
+ sample, true);
+ if (he == NULL)
+ goto free_mi_dup;
+
+ hists__inc_nr_samples(hists, he->filtered);
+ ret = hist_entry__append_callchain(he, sample);
+ }
+
+out:
addr_location__put(&al);
return ret;
+
+free_mi_dup:
+ free(mi_dup);
+free_mi:
+ free(mi);
+ ret = -ENOMEM;
+ goto out;
}

static struct perf_c2c c2c = {
@@ -400,6 +457,27 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
return hpp_list__parse(&c2c_hists->list, output, sort);
}

+static int filter_cb(struct hist_entry *he __maybe_unused)
+{
+ return 0;
+}
+
+static int resort_cl_cb(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ struct c2c_hists *c2c_hists;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ c2c_hists = c2c_he->hists;
+
+ if (c2c_hists) {
+ hists__collapse_resort(&c2c_hists->hists, NULL);
+ hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb);
+ }
+
+ return 0;
+}
+
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
@@ -458,7 +536,7 @@ static int perf_c2c__report(int argc, const char **argv)
ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");

hists__collapse_resort(&c2c.hists.hists, NULL);
- hists__output_resort(&c2c.hists.hists, &prog);
+ hists__output_resort_cb(&c2c.hists.hists, &prog, resort_cl_cb);

ui_progress__finish();

--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 12:00:06 PM9/22/16
to
Adding c2c record subcommand. It setups options related
to HITM cacheline analysis and calls standard perf
record command.

$ sudo perf c2c record -v -- -a
calling: record -W -d --sample-cpu -e cpu/mem-loads,ldlat=30/P -e cpu/mem-stores/P -a
...

It produces perf.data, which is to be reported by
perf c2c report, that comes in following patches.

Details are described in the man page, which is
added in one of the following patches.

Link: http://lkml.kernel.org/n/tip-hjxkryl43n...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 114 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 8252ed0ba5d0..58924c67f818 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -4,12 +4,116 @@
#include "debug.h"
#include "builtin.h"
#include <subcmd/parse-options.h>
+#include "mem-events.h"

static const char * const c2c_usage[] = {
"perf c2c",
NULL
};

+static int parse_record_events(const struct option *opt __maybe_unused,
+ const char *str, int unset __maybe_unused)
+{
+ bool *event_set = (bool *) opt->value;
+
+ *event_set = true;
+ return perf_mem_events__parse(str);
+}
+
+
+static const char * const __usage_record[] = {
+ "perf c2c record [<options>] [<command>]",
+ "perf c2c record [<options>] -- <command> [<options>]",
+ NULL
+};
+
+static const char * const *record_mem_usage = __usage_record;
+
+static int perf_c2c__record(int argc, const char **argv)
+{
+ int rec_argc, i = 0, j;
+ const char **rec_argv;
+ int ret;
+ bool all_user = false, all_kernel = false;
+ bool event_set = false;
+ struct option options[] = {
+ OPT_CALLBACK('e', "event", &event_set, "event",
+ "event selector. Use 'perf mem record -e list' to list available events",
+ parse_record_events),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show counter open errors, etc)"),
+ OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"),
+ OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"),
+ OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"),
+ OPT_END()
+ };
+
+ if (perf_mem_events__init()) {
+ pr_err("failed: memory events not supported\n");
+ return -1;
+ }
+
+ argc = parse_options(argc, argv, options, record_mem_usage,
+ PARSE_OPT_KEEP_UNKNOWN);
+
+ rec_argc = argc + 10; /* max number of arguments */
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+ if (!rec_argv)
+ return -1;
+
+ rec_argv[i++] = "record";
+
+ if (!event_set) {
+ perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
+ perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+ }
+
+ if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
+ rec_argv[i++] = "-W";
+
+ rec_argv[i++] = "-d";
+ rec_argv[i++] = "--sample-cpu";
+
+ for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ if (!perf_mem_events[j].record)
+ continue;
+
+ if (!perf_mem_events[j].supported) {
+ pr_err("failed: event '%s' not supported\n",
+ perf_mem_events[j].name);
+ return -1;
+ }
+
+ rec_argv[i++] = "-e";
+ rec_argv[i++] = perf_mem_events__name(j);
+ };
+
+ if (all_user)
+ rec_argv[i++] = "--all-user";
+
+ if (all_kernel)
+ rec_argv[i++] = "--all-kernel";
+
+ for (j = 0; j < argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ if (verbose > 0) {
+ pr_debug("calling: ");
+
+ j = 0;
+
+ while (rec_argv[j]) {
+ pr_debug("%s ", rec_argv[j]);
+ j++;
+ }
+ pr_debug("\n");
+ }
+
+ ret = cmd_record(i, rec_argv, NULL);
+ free(rec_argv);
+ return ret;
+}
+
int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)
{
const struct option c2c_options[] = {
@@ -19,5 +123,15 @@ int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)

argc = parse_options(argc, argv, c2c_options, c2c_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if (!argc)
+ usage_with_options(c2c_usage, c2c_options);
+
+ if (!strncmp(argv[0], "rec", 3)) {
+ return perf_c2c__record(argc, argv);
+ } else {
+ usage_with_options(c2c_usage, c2c_options);
+ }
+
return 0;
}
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 12:00:06 PM9/22/16
to
Em Thu, Sep 22, 2016 at 05:36:36PM +0200, Jiri Olsa escreveu:
> Will be used from external places in following patches.
>
> Link: http://lkml.kernel.org/n/tip-uip4x9u74t...@git.kernel.org
> Signed-off-by: Jiri Olsa <jo...@kernel.org>

Thanks, applied,

- Arnaldo

Jiri Olsa

unread,
Sep 22, 2016, 12:00:06 PM9/22/16
to
Will be used from external places in following patches.

Link: http://lkml.kernel.org/n/tip-ydj205bfen...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/ui/stdio/hist.c | 2 +-
tools/perf/util/hist.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index a2a50ef4176d..89d8441f9890 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -702,7 +702,7 @@ hists__fprintf_standard_headers(struct hists *hists,
return hpp_list->nr_header_lines + 2;
}

-static int hists__fprintf_headers(struct hists *hists, FILE *fp)
+int hists__fprintf_headers(struct hists *hists, FILE *fp)
{
char bf[1024];
struct perf_hpp dummy_hpp = {
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ecc4c0c5ce18..9928fed8bc59 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -489,5 +489,6 @@ int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
struct perf_hpp_list *hpp_list);
+int hists__fprintf_headers(struct hists *hists, FILE *fp);

Jiri Olsa

unread,
Sep 22, 2016, 12:00:07 PM9/22/16
to
Adding c2c command base wirings. Its implementation
is going to be added gradually in following patches.

Link: http://lkml.kernel.org/n/tip-svq2kccqja...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/Build | 1 +
tools/perf/builtin-c2c.c | 23 +++++++++++++++++++++++
tools/perf/builtin.h | 1 +
tools/perf/perf.c | 1 +
4 files changed, 26 insertions(+)
create mode 100644 tools/perf/builtin-c2c.c

diff --git a/tools/perf/Build b/tools/perf/Build
index a43fae7f439a..b12d5d1666e3 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -21,6 +21,7 @@ perf-y += builtin-inject.o
perf-y += builtin-mem.o
perf-y += builtin-data.o
perf-y += builtin-version.o
+perf-y += builtin-c2c.o

perf-$(CONFIG_AUDIT) += builtin-trace.o
perf-$(CONFIG_LIBELF) += builtin-probe.o
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
new file mode 100644
index 000000000000..8252ed0ba5d0
--- /dev/null
+++ b/tools/perf/builtin-c2c.c
@@ -0,0 +1,23 @@
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include "util.h"
+#include "debug.h"
+#include "builtin.h"
+#include <subcmd/parse-options.h>
+
+static const char * const c2c_usage[] = {
+ "perf c2c",
+ NULL
+};
+
+int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+ const struct option c2c_options[] = {
+ OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+ OPT_END()
+ };
+
+ argc = parse_options(argc, argv, c2c_options, c2c_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ return 0;
+}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 41c24010ab43..0bcf68e98ccc 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -18,6 +18,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix);
int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
int cmd_buildid_list(int argc, const char **argv, const char *prefix);
int cmd_config(int argc, const char **argv, const char *prefix);
+int cmd_c2c(int argc, const char **argv, const char *prefix);
int cmd_diff(int argc, const char **argv, const char *prefix);
int cmd_evlist(int argc, const char **argv, const char *prefix);
int cmd_help(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 64c06961bfe4..aa23b3347d6b 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -43,6 +43,7 @@ static struct cmd_struct commands[] = {
{ "buildid-cache", cmd_buildid_cache, 0 },
{ "buildid-list", cmd_buildid_list, 0 },
{ "config", cmd_config, 0 },
+ { "c2c", cmd_c2c, 0 },
{ "diff", cmd_diff, 0 },
{ "evlist", cmd_evlist, 0 },
{ "help", cmd_help, 0 },
--
2.7.4

Jiri Olsa

unread,
Sep 22, 2016, 12:00:08 PM9/22/16
to
Adding c2c report subcommand. It reads the
perf.data and displays shared data analysis.

This patch adds report basic wirings. It gets
fully implemented in following patches.

Link: http://lkml.kernel.org/n/tip-8smklfkvee...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 58924c67f818..3fac3a294bdd 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -5,12 +5,74 @@
#include "builtin.h"
#include <subcmd/parse-options.h>
#include "mem-events.h"
+#include "session.h"
+#include "hist.h"
+#include "tool.h"
+#include "data.h"
+
+struct perf_c2c {
+ struct perf_tool tool;
+};
+
+static struct perf_c2c c2c;

static const char * const c2c_usage[] = {
- "perf c2c",
+ "perf c2c {record|report}",
NULL
};

+static const char * const __usage_report[] = {
+ "perf c2c report",
+ NULL
+};
+
+static const char * const *report_c2c_usage = __usage_report;
+
+static int perf_c2c__report(int argc, const char **argv)
+{
+ struct perf_session *session;
+ struct perf_data_file file = {
+ .mode = PERF_DATA_MODE_READ,
+ };
+ const struct option c2c_options[] = {
+ OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show counter open errors, etc)"),
+ OPT_STRING('i', "input", &input_name, "file",
+ "the input file to process"),
+ OPT_END()
+ };
+ int err = 0;
+
+ argc = parse_options(argc, argv, c2c_options, report_c2c_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (!argc)
+ usage_with_options(report_c2c_usage, c2c_options);
+
+ file.path = input_name;
+
+ session = perf_session__new(&file, 0, &c2c.tool);
+ if (session == NULL) {
+ pr_debug("No memory for session\n");
+ goto out;
+ }
+
+ if (symbol__init(&session->header.env) < 0)
+ goto out_session;
+
+ /* No pipe support at the moment. */
+ if (perf_data_file__is_pipe(session->file)) {
+ pr_debug("No pipe support at the moment.\n");
+ goto out_session;
+ }
+
+out_session:
+ perf_session__delete(session);
+out:
+ return err;
+}
+
static int parse_record_events(const struct option *opt __maybe_unused,
const char *str, int unset __maybe_unused)
{
@@ -129,6 +191,8 @@ int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)

if (!strncmp(argv[0], "rec", 3)) {
return perf_c2c__record(argc, argv);
+ } else if (!strncmp(argv[0], "rep", 3)) {
+ return perf_c2c__report(argc, argv);
} else {
usage_with_options(c2c_usage, c2c_options);
}
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 12:00:10 PM9/22/16
to
Em Thu, Sep 22, 2016 at 05:36:34PM +0200, Jiri Olsa escreveu:
> Will be used from external places in following patches.
>
> Link: http://lkml.kernel.org/n/tip-4jyvw21cac...@git.kernel.org
> Signed-off-by: Jiri Olsa <jo...@kernel.org>

Thanks, applied,

- Arnaldo

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 12:00:10 PM9/22/16
to
Em Thu, Sep 22, 2016 at 05:36:37PM +0200, Jiri Olsa escreveu:
> Will be used from external places in following patches.
>
> Link: http://lkml.kernel.org/n/tip-ydj205bfen...@git.kernel.org
> Signed-off-by: Jiri Olsa <jo...@kernel.org>

Thanks, applied,

- Arnaldo

Jiri Olsa

unread,
Sep 22, 2016, 12:00:10 PM9/22/16
to
Fallback to standard dimensions in case we don't
find the dimension within c2c ones.

Link: http://lkml.kernel.org/n/tip-w3yrcawal0...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 6b58b537bc9d..a3481f86e2ae 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -213,8 +213,10 @@ static int c2c_hists__init_output(struct perf_hpp_list *hpp_list, char *name)
{
struct c2c_fmt *c2c_fmt = get_format(name);

- if (!c2c_fmt)
- return -1;
+ if (!c2c_fmt) {
+ reset_dimensions();
+ return output_field_add(hpp_list, name);
+ }

perf_hpp_list__column_register(hpp_list, &c2c_fmt->fmt);
return 0;
@@ -224,8 +226,10 @@ static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name)
{
struct c2c_fmt *c2c_fmt = get_format(name);

- if (!c2c_fmt)
- return -1;
+ if (!c2c_fmt) {
+ reset_dimensions();
+ return sort_dimension__add(hpp_list, name, NULL, 0);
+ }

perf_hpp_list__register_sort_field(hpp_list, &c2c_fmt->fmt);
return 0;
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 12:00:13 PM9/22/16
to
Em Thu, Sep 22, 2016 at 05:36:29PM +0200, Jiri Olsa escreveu:
> Add __hist_entry__snprintf to take perf_hpp_list as an argument
> instead of using he->hists->hpp_list. This way we can display
> arbitrary list of entries regardles of the hists setup, which
> will be useful in following patches.

Thanks, applied,

- Arnaldo

Jiri Olsa

unread,
Sep 22, 2016, 12:00:13 PM9/22/16
to
Allow to reuse 'struct sort_entry' objects
within c2c dimension support.

In case the 'struct sort_entry' object meets
the need of c2c report we will use it directly
in following patches.

Link: http://lkml.kernel.org/n/tip-a4jraum43u...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
tools/perf/builtin-c2c.c | 82 ++++++++++++++++++++++++++++++++++++++----------
1 file changed, 65 insertions(+), 17 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 63c0e2d8d2d8..6b58b537bc9d 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -9,6 +9,7 @@
#include "hist.h"
#include "tool.h"
#include "data.h"
+#include "sort.h"

struct c2c_hists {
struct hists hists;
@@ -47,6 +48,7 @@ struct c2c_dimension {
struct c2c_header header;
const char *name;
int width;
+ struct sort_entry *se;

int64_t (*cmp)(struct perf_hpp_fmt *fmt,
struct hist_entry *, struct hist_entry *);
@@ -66,34 +68,47 @@ static int c2c_width(struct perf_hpp_fmt *fmt,
struct hists *hists __maybe_unused)
{
struct c2c_fmt *c2c_fmt;
+ struct c2c_dimension *dim;

c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
- return c2c_fmt->dim->width;
+ dim = c2c_fmt->dim;
+
+ return dim->se ? hists__col_len(hists, dim->se->se_width_idx) :
+ c2c_fmt->dim->width;
}

static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct hists *hists __maybe_unused, int line, int *span)
+ struct hists *hists, int line, int *span)
{
+ struct perf_hpp_list *hpp_list = hists->hpp_list;
struct c2c_fmt *c2c_fmt;
struct c2c_dimension *dim;
- int len = c2c_width(fmt, hpp, hists);
- const char *text;
+ const char *text = NULL;
+ int width = c2c_width(fmt, hpp, hists);

c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
dim = c2c_fmt->dim;

- text = dim->header.line[line].text;
- if (text == NULL)
- text = "";
-
- if (*span) {
- (*span)--;
- return 0;
+ if (dim->se) {
+ text = dim->header.line[line].text;
+ /* Use the last line from sort_entry if not defined. */
+ if (!text && (line == hpp_list->nr_header_lines - 1))
+ text = dim->se->se_header;
} else {
- *span = dim->header.line[line].span;
+ text = dim->header.line[line].text;
+
+ if (*span) {
+ (*span)--;
+ return 0;
+ } else {
+ *span = dim->header.line[line].span;
+ }
}

- return scnprintf(hpp->buf, hpp->size, "%*s", len, text);
+ if (text == NULL)
+ text = "";
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, text);
}

static struct c2c_dimension *dimensions[] = {
@@ -130,6 +145,39 @@ static struct c2c_dimension *get_dimension(const char *name)
return NULL;
}

+static int c2c_se_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+ struct c2c_dimension *dim = c2c_fmt->dim;
+ size_t len = fmt->user_len;
+
+ if (!len)
+ len = hists__col_len(he->hists, dim->se->se_width_idx);
+
+ return dim->se->se_snprintf(he, hpp->buf, hpp->size, len);
+}
+
+static int64_t c2c_se_cmp(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+ struct c2c_dimension *dim = c2c_fmt->dim;
+
+ return dim->se->se_cmp(a, b);
+}
+
+static int64_t c2c_se_collapse(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+ struct c2c_dimension *dim = c2c_fmt->dim;
+ int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *);
+
+ collapse_fn = dim->se->se_collapse ?: dim->se->se_cmp;
+ return collapse_fn(a, b);
+}
+
static struct c2c_fmt *get_format(const char *name)
{
struct c2c_dimension *dim = get_dimension(name);
@@ -149,12 +197,12 @@ static struct c2c_fmt *get_format(const char *name)
INIT_LIST_HEAD(&fmt->list);
INIT_LIST_HEAD(&fmt->sort_list);

- fmt->cmp = dim->cmp;
- fmt->sort = dim->cmp;
- fmt->entry = dim->entry;
+ fmt->cmp = dim->se ? c2c_se_cmp : dim->cmp;
+ fmt->sort = dim->se ? c2c_se_cmp : dim->cmp;
+ fmt->entry = dim->se ? c2c_se_entry : dim->entry;
fmt->header = c2c_header;
fmt->width = c2c_width;
- fmt->collapse = dim->cmp;
+ fmt->collapse = dim->se ? c2c_se_collapse : dim->cmp;
fmt->equal = fmt_equal;
fmt->free = fmt_free;

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 12:00:14 PM9/22/16
to
Em Thu, Sep 22, 2016 at 05:36:32PM +0200, Jiri Olsa escreveu:
> Will be used from external places in following patches.
>
> Link: http://lkml.kernel.org/n/tip-7garqfmx5i...@git.kernel.org
> Signed-off-by: Jiri Olsa <jo...@kernel.org>

Thanks, applied,

- Arnaldo

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 5:20:05 PM9/22/16
to
From: Jiri Olsa <jo...@kernel.org>

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-5-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>

tip-bot for Jiri Olsa

unread,
Sep 23, 2016, 1:30:05 AM9/23/16
to
Commit-ID: bcf98740a28579d9412afa9a72e463da386a55a6
Gitweb: http://git.kernel.org/tip/bcf98740a28579d9412afa9a72e463da386a55a6
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:32 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Thu, 22 Sep 2016 13:08:56 -0300

perf tools: Make reset_dimensions global

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-5-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/sort.c | 2 +-
tools/perf/util/sort.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 1884d7f..9e1f6f7 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2748,7 +2748,7 @@ static int setup_output_list(struct perf_hpp_list *list, char *str)
return ret;
}

-static void reset_dimensions(void)
+void reset_dimensions(void)
{
unsigned int i;

diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 9505483..4efadc1 100644

tip-bot for Jiri Olsa

unread,
Sep 23, 2016, 1:30:06 AM9/23/16
to
Commit-ID: 9da44db1493a9d384ddc1bcd1553a1803ff985b6
Gitweb: http://git.kernel.org/tip/9da44db1493a9d384ddc1bcd1553a1803ff985b6
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:29 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Thu, 22 Sep 2016 13:08:56 -0300

perf hists: Add __hist_entry__snprintf function

Add __hist_entry__snprintf() to take a perf_hpp_list as an argument
instead of using he->hists->hpp_list.

This way we can display arbitrary list of entries regardless of the
hists setup, which will be useful in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-2-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/ui/stdio/hist.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index c8dca34..189665c 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -373,7 +373,8 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
return 0;
}

-static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+static int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list)
{
const char *sep = symbol_conf.field_sep;
struct perf_hpp_fmt *fmt;
@@ -384,7 +385,7 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
if (symbol_conf.exclude_other && !he->parent)
return 0;

- hists__for_each_format(he->hists, fmt) {
+ perf_hpp_list__for_each_format(hpp_list, fmt) {
if (perf_hpp__should_skip(fmt, he->hists))
continue;

@@ -410,6 +411,11 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
return hpp->buf - start;
}

+static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+{
+ return __hist_entry__snprintf(he, hpp, he->hists->hpp_list);
+}
+
static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
struct perf_hpp *hpp,
struct hists *hists,

tip-bot for Jiri Olsa

unread,
Sep 23, 2016, 1:40:05 AM9/23/16
to
Commit-ID: 5fe7b9b47c646dbe8501378eb3684ccd802d6d25
Gitweb: http://git.kernel.org/tip/5fe7b9b47c646dbe8501378eb3684ccd802d6d25
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:34 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Thu, 22 Sep 2016 13:08:57 -0300

perf tools: Make several sorting functions global

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-7-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/sort.c | 6 +++---
tools/perf/util/sort.h | 6 ++++++
2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 9f7c1ea..452e15a 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -867,7 +867,7 @@ struct sort_entry sort_cycles = {
};

/* --sort daddr_sym */
-static int64_t
+int64_t
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
{
uint64_t l = 0, r = 0;
@@ -896,7 +896,7 @@ static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf,
width);
}

-static int64_t
+int64_t
sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
{
uint64_t l = 0, r = 0;
@@ -1062,7 +1062,7 @@ static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
return repsep_snprintf(bf, size, "%-*s", width, out);
}

-static int64_t
+int64_t
sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
{
u64 l, r;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index e93b0fa..099c975 100644

tip-bot for Jiri Olsa

unread,
Sep 23, 2016, 1:40:05 AM9/23/16
to
Commit-ID: bd28d0c59805b88001fcc8ad5c6f913d86d8e5c2
Gitweb: http://git.kernel.org/tip/bd28d0c59805b88001fcc8ad5c6f913d86d8e5c2
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:36 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Thu, 22 Sep 2016 13:08:58 -0300

perf hists: Make __hist_entry__snprintf function global

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-9-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/ui/stdio/hist.c | 4 ++--
tools/perf/util/hist.h | 2 ++
2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 189665c..a2a50ef 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -373,8 +373,8 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
return 0;
}

-static int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
- struct perf_hpp_list *hpp_list)
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list)
{
const char *sep = symbol_conf.field_sep;
struct perf_hpp_fmt *fmt;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 6150b94..ecc4c0c 100644

tip-bot for Jiri Olsa

unread,
Sep 23, 2016, 1:40:05 AM9/23/16
to
Commit-ID: 2d831454140f28fa643b78deede4511b9e2c9e5f
Gitweb: http://git.kernel.org/tip/2d831454140f28fa643b78deede4511b9e2c9e5f
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:37 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Thu, 22 Sep 2016 13:08:59 -0300

perf hists: Make hists__fprintf_headers function global

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-10-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/ui/stdio/hist.c | 2 +-
tools/perf/util/hist.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index a2a50ef..89d8441 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -702,7 +702,7 @@ hists__fprintf_standard_headers(struct hists *hists,
return hpp_list->nr_header_lines + 2;
}

-static int hists__fprintf_headers(struct hists *hists, FILE *fp)
+int hists__fprintf_headers(struct hists *hists, FILE *fp)
{
char bf[1024];
struct perf_hpp dummy_hpp = {
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ecc4c0c..9928fed 100644

Peter Zijlstra

unread,
Sep 29, 2016, 5:20:06 AM9/29/16
to
On Thu, Sep 22, 2016 at 05:36:28PM +0200, Jiri Olsa wrote:
> hi,
> sending new version of c2c patches (v3) originally posted in here:
> http://lwn.net/Articles/588866/
>
> I took the old set and reworked it to fit into current upstream code.
> It follows the same logic as original patch and provides (almost) the
> same stdio interface. In addition new TUI interface was added.
>
> The perf c2c tool provides means for Shared Data C2C/HITM analysis.
> It allows you to track down the cacheline contentions. The tool is
> based on x86's load latency and precise store facility events provided
> by Intel CPUs.
>
> The tool was tested by Joe Mario and has proven to be useful and found
> some cachelines contentions. Joe also wrote a blog about c2c tool with
> examples located in here:
>
> https://joemario.github.io/blog/2016/09/01/c2c-blog/
>
> v4 changes:
> - 4 patches already queued
> - used u32 for c2c_stats instead of int [Stanislav]
> - fixed NO_SLANG=1 compilation [Kim]
> - add __hist_entry__snprintf helper [Arnaldo]
>
> Code is also available in:
> git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
> perf/c2c_v4
>
> Testing:
> $ perf c2c record -a [workload]
> $ perf c2c report [--stdio]
> $ man perf-c2c
>
> It's most likely you won't generate any remote HITMs on common
> laptops, so to get results for local HITMs please use:
>
> $ perf c2c report -d lcl [--stdio]

I'll just keep repeating; this is not the tool I want :-( I'll not block
this tool, but I also think its far less usable than it should've been.

https://lkml.kernel.org/r/2015120909...@twins.programming.kicks-ass.net

What I want is a tool that maps memop events (any PEBS memops) back to a
'type::member' form and sorts on that. That doesn't rely on the PEBS
'Data Linear Address' field, as that is useless for dynamically
allocated bits. Instead it would use the IP and Dwarf information to
deduce the 'type::member' of the memop.

I want pahole like output, showing me where the hits (green) and misses
(red) are in a structure.

I want to be able to 'perf memops report -EC task_struct' and see the
expanded task_struct (as per 'pahole -EC task_struct') annotated, not a
data address for each task in my workload (which could be 100+ and
entirely useless).

Currently this is somewhat involved, since Dwarf doesn't include type
information for all memops, so we'd have to disassemble and interpret,
which while tedious is possible.

However, afaik, Stephane has been working with their tools team to get
additional DWARF info to make this easier. Stephane, any updates on
that?

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 11:00:05 AM9/29/16
to
Em Thu, Sep 29, 2016 at 11:19:12AM +0200, Peter Zijlstra escreveu:
> On Thu, Sep 22, 2016 at 05:36:28PM +0200, Jiri Olsa wrote:
> > sending new version of c2c patches (v3) originally posted in here:
> > http://lwn.net/Articles/588866/

> I'll just keep repeating; this is not the tool I want :-( I'll not block
> this tool, but I also think its far less usable than it should've been.

Well, I think its an experimentation with using that info, one that
people have been using and seemingly finding and fixing problems.

Requires more work than the way you describe(d) various times, tho,
indeed. :-\

> https://lkml.kernel.org/r/2015120909...@twins.programming.kicks-ass.net

> What I want is a tool that maps memop events (any PEBS memops) back to a
> 'type::member' form and sorts on that. That doesn't rely on the PEBS
> 'Data Linear Address' field, as that is useless for dynamically
> allocated bits. Instead it would use the IP and Dwarf information to
> deduce the 'type::member' of the memop.

> I want pahole like output, showing me where the hits (green) and misses
> (red) are in a structure.

> I want to be able to 'perf memops report -EC task_struct' and see the
> expanded task_struct (as per 'pahole -EC task_struct') annotated, not a
> data address for each task in my workload (which could be 100+ and
> entirely useless).

> Currently this is somewhat involved, since Dwarf doesn't include type
> information for all memops, so we'd have to disassemble and interpret,
> which while tedious is possible.

> However, afaik, Stephane has been working with their tools team to get
> additional DWARF info to make this easier. Stephane, any updates on
> that?

Yeah, that would be interesting to know, I for one, due to the c2c
effort + this other work Stephane mentioned some time ago, moved working
on such a pahole based tool to the backburner, lots of other patches to
review, test, even proof read to then process all the time :-\

- Arnaldo

Joe Mario

unread,
Oct 1, 2016, 9:50:05 AM10/1/16
to
On 09/29/2016 05:19 AM, Peter Zijlstra wrote:

>
> What I want is a tool that maps memop events (any PEBS memops) back to a
> 'type::member' form and sorts on that. That doesn't rely on the PEBS
> 'Data Linear Address' field, as that is useless for dynamically
> allocated bits. Instead it would use the IP and Dwarf information to
> deduce the 'type::member' of the memop.
>
> I want pahole like output, showing me where the hits (green) and misses
> (red) are in a structure.

I agree that would give valuable insight, but it needs to be
in addition to what this c2c provides today, and not a replacement for.

Ten years ago Robert Hundt created that pahole-style output as a developer option
to the HP-UX compiler. It used compiler feedback to compute every struct
accessed by the application, with exact counts for all reads and writes to
every struct member. It even had affinity information to show how often
field members were accessed together in time.

He and I ran it on numerous large applications. It was awesome, but it
did fall short in a few places that Jiri's c2c patches provide, such as
being able to:

- distinguish where the concurrent cacheline accesses came from (e.g, which
cores, and which nodes).

- see where the loads got resolved from, (local cache, local memory, remote
cache, remote memory).

- see if the hot structs were cacheline aligned or not.

- see if more than one hot struct shares a cachline.

- see how costly, via load latencies, the contention is.

- see, among all the accesses to a cachline, which thread or process is
causing the most harm.

- insight into how many other threads/processes are contending for a
cacheline (and who they are).

The above info has been critical to understanding how best to tackle the
contention uncovered for all those who have used the "perf c2c" prototype.

So yes, the pahole-style addition would be a plus and it would make it easier
to map it back to the struct, but make sure to preserve what the current
"perfc2c" provides that the pahole-style output will not.

Joe

Arnaldo Carvalho de Melo

unread,
Oct 4, 2016, 1:00:07 PM10/4/16
to
Em Thu, Sep 22, 2016 at 05:36:48PM +0200, Jiri Olsa escreveu:
> Adding dcacheline dimension key support. It
> displays cacheline address as hex number.
>
> Using c2c wrapper to standard 'dcacheline' object
> to defined own header and simple (just address)
> cacheline output.

So, at this point I move from:

# perf c2c report
Error: Invalid --fields key: `dcacheline'
#

To:

[root@jouet ~]# perf c2c report
Error: Unknown --fields key: `offset'
Segmentation fault (core dumped)
[root@jouet ~]#


Error: Unknown --fields key: `offset'

Program received signal SIGSEGV, Segmentation fault.
0x0000000000524e1e in hist_entry__cmp (left=0x31a6590, right=0x7fffffffcf10) at util/hist.c:1071
1071 hists__for_each_sort_list(hists, fmt) {
Missing separate debuginfos, use: dnf debuginfo-install audit-libs-2.6.7-1.fc24.x86_64 bzip2-libs-1.0.6-20.fc24.x86_64 elfutils-libelf-0.167-1.fc24.x86_64 elfutils-libs-0.167-1.fc24.x86_64 libcap-ng-0.7.8-1.fc24.x86_64 libunwind-1.1-11.fc24.x86_64 numactl-libs-2.0.11-2.fc24.x86_64 openssl-libs-1.0.2j-1.fc24.x86_64 perl-libs-5.22.2-362.fc24.x86_64 python-libs-2.7.12-5.fc24.x86_64 slang-2.3.0-5.fc24.x86_64 xz-libs-5.2.2-2.fc24.x86_64 zlib-1.2.8-10.fc24.x86_64
(gdb) bt
#0 0x0000000000524e1e in hist_entry__cmp (left=0x31a6590, right=0x7fffffffcf10) at util/hist.c:1071
#1 0x00000000005238cf in hists__findnew_entry (hists=0x31a6230, entry=0x7fffffffcf10, al=0x7fffffffd160, sample_self=true) at util/hist.c:513
#2 0x0000000000523cc9 in __hists__add_entry (hists=0x31a6230, al=0x7fffffffd160, sym_parent=0x0, bi=0x0, mi=0x3497a10, sample=0x7fffffffd360,
sample_self=true, ops=0x96a530 <c2c_entry_ops>) at util/hist.c:604
#3 0x0000000000523dba in hists__add_entry_ops (hists=0x31a6230, ops=0x96a530 <c2c_entry_ops>, al=0x7fffffffd160, sym_parent=0x0, bi=0x0, mi=0x3497a10,
sample=0x7fffffffd360, sample_self=true) at util/hist.c:628
#4 0x0000000000464d1b in process_sample_event (tool=0x96a100 <c2c>, event=0x7ffff41be638, sample=0x7fffffffd360, evsel=0x2072b00, machine=0x2071c78)
at builtin-c2c.c:148
#5 0x00000000004fca6a in perf_evlist__deliver_sample (evlist=0x2071f30, tool=0x96a100 <c2c>, event=0x7ffff41be638, sample=0x7fffffffd360, evsel=0x2072b00,
machine=0x2071c78) at util/session.c:1196
#6 0x00000000004fcbfe in machines__deliver_event (machines=0x2071c78, evlist=0x2071f30, event=0x7ffff41be638, sample=0x7fffffffd360, tool=0x96a100 <c2c>,
file_offset=5256760) at util/session.c:1233
#7 0x00000000004fcf1c in perf_session__deliver_event (session=0x2071b90, event=0x7ffff41be638, sample=0x7fffffffd360, tool=0x96a100 <c2c>,
file_offset=5256760) at util/session.c:1290
#8 0x00000000004f9cad in ordered_events__deliver_event (oe=0x2071e90, event=0x3196098) at util/session.c:114
#9 0x0000000000500293 in __ordered_events__flush (oe=0x2071e90) at util/ordered-events.c:207
#10 0x0000000000500557 in ordered_events__flush (oe=0x2071e90, how=OE_FLUSH__ROUND) at util/ordered-events.c:274
#11 0x00000000004fbc12 in process_finished_round (tool=0x96a100 <c2c>, event=0x7ffff41c7bc0, oe=0x2071e90) at util/session.c:852
#12 0x00000000004fd0ba in perf_session__process_user_event (session=0x2071b90, event=0x7ffff41c7bc0, file_offset=5295040) at util/session.c:1329
#13 0x00000000004fd6e5 in perf_session__process_event (session=0x2071b90, event=0x7ffff41c7bc0, file_offset=5295040) at util/session.c:1455
#14 0x00000000004fe47c in __perf_session__process_events (session=0x2071b90, data_offset=424, data_size=5294624, file_size=5295048) at util/session.c:1824
#15 0x00000000004fe67a in perf_session__process_events (session=0x2071b90) at util/session.c:1878
#16 0x0000000000465d3b in perf_c2c__report (argc=0, argv=0x7fffffffe1d0) at builtin-c2c.c:617
#17 0x0000000000466535 in cmd_c2c (argc=1, argv=0x7fffffffe1d0, prefix=0x0) at builtin-c2c.c:755
#18 0x00000000004aa677 in run_builtin (p=0x975548 <commands+72>, argc=2, argv=0x7fffffffe1d0) at perf.c:358
#19 0x00000000004aa8e4 in handle_internal_command (argc=2, argv=0x7fffffffe1d0) at perf.c:420
#20 0x00000000004aaa29 in run_argv (argcp=0x7fffffffe02c, argv=0x7fffffffe020) at perf.c:466
#21 0x00000000004aae12 in main (argc=2, argv=0x7fffffffe1d0) at perf.c:610
(gdb)

> Link: http://lkml.kernel.org/n/tip-j5enppr8e7...@git.kernel.org
> Signed-off-by: Jiri Olsa <jo...@kernel.org>
> ---
> tools/perf/builtin-c2c.c | 38 ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 38 insertions(+)
>
> diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
> index cfa12930b77b..335c0fd30757 100644
> --- a/tools/perf/builtin-c2c.c
> +++ b/tools/perf/builtin-c2c.c
> @@ -1,5 +1,6 @@
> #include <linux/compiler.h>
> #include <linux/kernel.h>
> +#include <linux/stringify.h>
> #include "util.h"
> #include "debug.h"
> #include "builtin.h"
> @@ -7,6 +8,7 @@
> #include "mem-events.h"
> #include "session.h"
> #include "hist.h"
> +#include "sort.h"
> #include "tool.h"
> #include "data.h"
> #include "sort.h"
> @@ -271,6 +273,33 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
> return scnprintf(hpp->buf, hpp->size, "%*s", width, text);
> }
>
> +static char *hex_str(u64 val)
> +{
> + static char buf[20];
> +
> + snprintf(buf, 20, "0x%" PRIx64, val);
> + return buf;
> +}
> +
> +static int64_t
> +dcacheline_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
> + struct hist_entry *left, struct hist_entry *right)
> +{
> + return sort__dcacheline_cmp(left, right);
> +}
> +
> +static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
> + struct hist_entry *he)
> +{
> + uint64_t addr = 0;
> + int width = c2c_width(fmt, hpp, he->hists);
> +
> + if (he->mem_info)
> + addr = cl_address(he->mem_info->daddr.addr);
> +
> + return snprintf(hpp->buf, hpp->size, "%*s", width, hex_str(addr));
> +}
> +
> #define HEADER_LOW(__h) \
> { \
> .line[1] = { \
> @@ -306,7 +335,16 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
> }, \
> }
>
> +static struct c2c_dimension dim_dcacheline = {
> + .header = HEADER_LOW("Cacheline"),
> + .name = "dcacheline",
> + .cmp = dcacheline_cmp,
> + .entry = dcacheline_entry,
> + .width = 18,
> +};
> +
> static struct c2c_dimension *dimensions[] = {
> + &dim_dcacheline,
> NULL,
> };
>
> --
> 2.7.4

Jiri Olsa

unread,
Oct 4, 2016, 6:50:05 PM10/4/16
to
On Tue, Oct 04, 2016 at 01:50:12PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Thu, Sep 22, 2016 at 05:36:48PM +0200, Jiri Olsa escreveu:
> > Adding dcacheline dimension key support. It
> > displays cacheline address as hex number.
> >
> > Using c2c wrapper to standard 'dcacheline' object
> > to defined own header and simple (just address)
> > cacheline output.
>
> So, at this point I move from:
>
> # perf c2c report
> Error: Invalid --fields key: `dcacheline'
> #
>
> To:
>
> [root@jouet ~]# perf c2c report
> Error: Unknown --fields key: `offset'
> Segmentation fault (core dumped)
> [root@jouet ~]#

oops, attached patch should fix that, but I needed to change
following patches:

perf c2c report: Add cacheline hists processing
perf c2c report: Decode c2c_stats for hist entries

I updated perf/c2c_v4 branch with the fix and rebased
to your current perf/core.. let me know if I need to
repost

thanks,
jirka


---
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 335c0fd30757..74c7822460e1 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -86,7 +86,7 @@ he__get_c2c_hists(struct hist_entry *he,

ret = c2c_hists__init(hists, sort);
if (ret)
- free(hists);
+ zfree(&hists);

return hists;
}

Arnaldo Carvalho de Melo

unread,
Oct 5, 2016, 6:50:05 AM10/5/16
to
ok, I'm trying to fix it myself, starting with this:

+ ret = c2c_hists__init(hists, sort);
+ if (ret)
+ free(hists);
+
+ return &hists->hists;

That is on "perf c2c report: Add cacheline hists processing"

Arnaldo Carvalho de Melo

unread,
Oct 5, 2016, 7:00:05 AM10/5/16
to
Em Wed, Oct 05, 2016 at 07:45:17AM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Wed, Oct 05, 2016 at 12:46:12AM +0200, Jiri Olsa escreveu:
> > On Tue, Oct 04, 2016 at 01:50:12PM -0300, Arnaldo Carvalho de Melo wrote:
> > > Em Thu, Sep 22, 2016 at 05:36:48PM +0200, Jiri Olsa escreveu:
> > > > Adding dcacheline dimension key support. It
> > > > displays cacheline address as hex number.
> > > >
> > > > Using c2c wrapper to standard 'dcacheline' object
> > > > to defined own header and simple (just address)
> > > > cacheline output.
> > >
> > > So, at this point I move from:
> > >
> > > # perf c2c report
> > > Error: Invalid --fields key: `dcacheline'
> > > #
> > >
> > > To:
> > >
> > > [root@jouet ~]# perf c2c report
> > > Error: Unknown --fields key: `offset'
> > > Segmentation fault (core dumped)
> > > [root@jouet ~]#
> >
> > oops, attached patch should fix that, but I needed to change
> > following patches:

Now we're at:

[root@jouet ~]# perf c2c report
Error: Unknown --fields key: `offset'
0x50cbc0 [0x8]: failed to process type: 68
failed to process sample
[root@jouet ~]#

> >
> > perf c2c report: Add cacheline hists processing
> > perf c2c report: Decode c2c_stats for hist entries
> >
> > I updated perf/c2c_v4 branch with the fix and rebased
> > to your current perf/core.. let me know if I need to
> > repost
> >
> > thanks,
> > jirka
>
> ok, I'm trying to fix it myself, starting with this:
>
> + ret = c2c_hists__init(hists, sort);
> + if (ret)
> + free(hists);
> +
> + return &hists->hists;
>
> That is on "perf c2c report: Add cacheline hists processing"

That became:

commit f485e33c4543ac31f2eb77293b49bfe821271bbb
Author: Jiri Olsa <jo...@kernel.org>
Date: Thu Sep 22 17:36:45 2016 +0200

perf c2c report: Add cacheline hists processing

Store cacheline related entries in nested hist object for each cacheline
data. Nested entries are sorted by 'offset' within related cacheline.

We will allow specific sort keys to be configured for nested cacheline
data entries in following patches.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-18-...@kernel.org
[ he__get_hists() should return NULL when c2c_hists__init() fails ]
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 29fb9573e292..df413b564361 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -61,6 +61,34 @@ static struct hist_entry_ops c2c_entry_ops = {
.free = c2c_he_free,
};

+static int c2c_hists__init(struct c2c_hists *hists,
+ const char *sort);
+
+static struct hists*
+he__get_hists(struct hist_entry *he,
+ const char *sort)
+{
+ struct c2c_hist_entry *c2c_he;
+ struct c2c_hists *hists;
+ int ret;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ if (c2c_he->hists)
+ return &c2c_he->hists->hists;
+
+ hists = c2c_he->hists = zalloc(sizeof(*hists));
+ if (!hists)
+ return NULL;
+
+ ret = c2c_hists__init(hists, sort);
+ if (ret) {
+ free(hists);
+ return NULL;
+ }
+
+ return &hists->hists;
+}
+
static int process_sample_event(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -70,7 +98,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
struct hists *hists = &c2c.hists.hists;
struct hist_entry *he;
struct addr_location al;
- struct mem_info *mi;
+ struct mem_info *mi, *mi_dup;
int ret;

if (machine__resolve(machine, &al, sample) < 0) {
@@ -83,19 +111,50 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
@@ -400,6 +459,27 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
return hpp_list__parse(&c2c_hists->list, output, sort);
}

+static int filter_cb(struct hist_entry *he __maybe_unused)
+{
+ return 0;
+}
+
+static int resort_cl_cb(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ struct c2c_hists *c2c_hists;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ c2c_hists = c2c_he->hists;
+
+ if (c2c_hists) {
+ hists__collapse_resort(&c2c_hists->hists, NULL);
+ hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb);
+ }
+
+ return 0;
+}
+
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
@@ -458,7 +538,7 @@ static int perf_c2c__report(int argc, const char **argv)
ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");

hists__collapse_resort(&c2c.hists.hists, NULL);
- hists__output_resort(&c2c.hists.hists, &prog);
+ hists__output_resort_cb(&c2c.hists.hists, &prog, resort_cl_cb);

ui_progress__finish();


> >

Arnaldo Carvalho de Melo

unread,
Oct 5, 2016, 7:10:05 AM10/5/16
to
Em Thu, Sep 22, 2016 at 05:36:48PM +0200, Jiri Olsa escreveu:
> Adding dcacheline dimension key support. It
> displays cacheline address as hex number.
>
> Using c2c wrapper to standard 'dcacheline' object
> to defined own header and simple (just address)
> cacheline output.
>
> Link: http://lkml.kernel.org/n/tip-j5enppr8e7...@git.kernel.org
> Signed-off-by: Jiri Olsa <jo...@kernel.org>
> ---
> tools/perf/builtin-c2c.c | 38 ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 38 insertions(+)
>
> diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
> index cfa12930b77b..335c0fd30757 100644
> --- a/tools/perf/builtin-c2c.c
> +++ b/tools/perf/builtin-c2c.c
> @@ -1,5 +1,6 @@
> #include <linux/compiler.h>
> #include <linux/kernel.h>
> +#include <linux/stringify.h>
> #include "util.h"
> #include "debug.h"
> #include "builtin.h"
> @@ -7,6 +8,7 @@
> #include "mem-events.h"
> #include "session.h"
> #include "hist.h"
> +#include "sort.h"
> #include "tool.h"
> #include "data.h"
> #include "sort.h"
> @@ -271,6 +273,33 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
> return scnprintf(hpp->buf, hpp->size, "%*s", width, text);
> }
>
> +static char *hex_str(u64 val)
> +{
> + static char buf[20];

Ouch, what for?

> +
> + snprintf(buf, 20, "0x%" PRIx64, val);
> + return buf;
> +}
> +
> +static int64_t
> +dcacheline_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
> + struct hist_entry *left, struct hist_entry *right)
> +{
> + return sort__dcacheline_cmp(left, right);
> +}
> +
> +static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
> + struct hist_entry *he)
> +{
> + uint64_t addr = 0;
> + int width = c2c_width(fmt, hpp, he->hists);
> +
> + if (he->mem_info)
> + addr = cl_address(he->mem_info->daddr.addr);
> +
> + return snprintf(hpp->buf, hpp->size, "%*s", width, hex_str(addr));

So here you get that static buffer and then truncate it? Wouldn't the
perf_hpp stuff take care of this? Can't we stop using that static buffer
and this truncation at such a level?

Arnaldo Carvalho de Melo

unread,
Oct 5, 2016, 7:10:05 AM10/5/16
to
Em Wed, Oct 05, 2016 at 08:01:41AM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Thu, Sep 22, 2016 at 05:36:48PM +0200, Jiri Olsa escreveu:
> > +
> > + return snprintf(hpp->buf, hpp->size, "%*s", width, hex_str(addr));
>
> So here you get that static buffer and then truncate it? Wouldn't the
> perf_hpp stuff take care of this? Can't we stop using that static buffer
> and this truncation at such a level?

So please consider continuing from what I left at the tmp.perf/c2c.2
branch in my tree,

Thanks,

- Arnaldo

Jiri Olsa

unread,
Oct 5, 2016, 8:40:06 AM10/5/16
to
well thats because we implicitly sort by offset but
there's no 'offset' ket support yet in your branch

basically the perf report does not display/do anything
till following patch:

perf c2c report: Add stdio output support

I made sure now that the perf c2c report wont crash
till that patch..

jirka

Jiri Olsa

unread,
Oct 5, 2016, 8:50:06 AM10/5/16
to
it's being used later on for short time hex number string
which is psased right away to another buffer ;-)

having just one thread I didn't see any harm, esp when it
saved some code lines
I think we need to cut it on this level, but I actualy might recall some
change you did for perf_hpp to cut this on column width later on?

I'll check on that..

thanks,
jirka

Jiri Olsa

unread,
Oct 5, 2016, 9:10:06 AM10/5/16
to
On Wed, Oct 05, 2016 at 02:45:37PM +0200, Jiri Olsa wrote:

SNIP

> > > +
> > > + if (he->mem_info)
> > > + addr = cl_address(he->mem_info->daddr.addr);
> > > +
> > > + return snprintf(hpp->buf, hpp->size, "%*s", width, hex_str(addr));
> >
> > So here you get that static buffer and then truncate it? Wouldn't the
> > perf_hpp stuff take care of this? Can't we stop using that static buffer
> > and this truncation at such a level?
>
> I think we need to cut it on this level, but I actualy might recall some
> change you did for perf_hpp to cut this on column width later on?
>
> I'll check on that..

ok, so it's cut later on, but it allows only for left-side alignment
while we use the right-side one

if I leave it on perf_hpp to deal with it I end up with following output:
(check the Cacheline column)


# Total Rmt ----- LLC Load Hitm ----- ---- Store Reference ---- --- Load Dram ---- LLC Total ----- Core Load Hit
# Index Cacheline records Hitm Total Lcl Rmt Total L1Hit L1Miss Lcl Rmt Ld Miss Loads FB L1
# ..... .................. ....... ....... ....... ....... ....... ....... ....... ....... ........ ........ ....... ....... ....... ....... ..
#
0 0x3d2e300 273 0.53% 44 22 22 40 40 0 0 0 22 233 107 78
1 0x3d001c0 68 0.51% 22 1 21 2 2 0 0 2 25 66 30 7
2 0x3d00200 165 0.48% 22 2 20 20 20 0 0 0 20 145 89 34
3 0x3d5ca80 22 0.41% 19 2 17 3 3 0 0 0 17 19 0 0


while current code does:

# Total Rmt ----- LLC Load Hitm ----- ---- Store Reference ---- --- Load Dram ---- LLC Total ----- Core Load Hit
# Index Cacheline records Hitm Total Lcl Rmt Total L1Hit L1Miss Lcl Rmt Ld Miss Loads FB L1
# ..... .................. ....... ....... ....... ....... ....... ....... ....... ....... ........ ........ ....... ....... ....... ....... ..
#
0 0x3d2e300 273 0.53% 44 22 22 40 40 0 0 0 22 233 107 78
1 0x3d001c0 68 0.51% 22 1 21 2 2 0 0 2 25 66 30 7
2 0x3d00200 165 0.48% 22 2 20 20 20 0 0 0 20 145 89 34
3 0x3d5ca80 22 0.41% 19 2 17 3 3 0 0 0 17 19 0 0


I'll make the snprintf/scnprintf replacement
based on your acme/tmp.perf/c2c.2

thanks,
jirka

Arnaldo Carvalho de Melo

unread,
Oct 5, 2016, 9:30:06 AM10/5/16
to
ok, but till then, it better not segfault :-) The message for that type
68 also probably should go to a more verbose level of debugging.

- Arnaldo

> I made sure now that the perf c2c report wont crash
> till that patch..

Ok!

>
> jirka

Arnaldo Carvalho de Melo

unread,
Oct 5, 2016, 9:30:06 AM10/5/16
to
I have no problems with gotos, but hate static vars used to return
vals... :-)
Ok, would be great to have this sorted out in a better way.

Arnaldo Carvalho de Melo

unread,
Oct 5, 2016, 9:30:06 AM10/5/16
to
Em Wed, Oct 05, 2016 at 03:09:29PM +0200, Jiri Olsa escreveu:
> On Wed, Oct 05, 2016 at 02:45:37PM +0200, Jiri Olsa wrote:
>
> SNIP
>
> > > > +
> > > > + if (he->mem_info)
> > > > + addr = cl_address(he->mem_info->daddr.addr);
> > > > +
> > > > + return snprintf(hpp->buf, hpp->size, "%*s", width, hex_str(addr));
> > >
> > > So here you get that static buffer and then truncate it? Wouldn't the
> > > perf_hpp stuff take care of this? Can't we stop using that static buffer
> > > and this truncation at such a level?
> >
> > I think we need to cut it on this level, but I actualy might recall some
> > change you did for perf_hpp to cut this on column width later on?
> >
> > I'll check on that..
>
> ok, so it's cut later on, but it allows only for left-side alignment
> while we use the right-side one
>
> if I leave it on perf_hpp to deal with it I end up with following output:
> (check the Cacheline column)

Which is not _that_ bad, I guess it gets like that because we expect
kernel addresses as well (longer)?

[root@jouet ~]# grep icmp_rcv /proc/kallsyms | cut -d' ' -f 1 | wc -c
17
[root@jouet ~]# echo -n .................. | wc -c
18
[root@jouet ~]#

How to indicate to the hpp code that we want right alignment? Namhyung?

>
> # Total Rmt ----- LLC Load Hitm ----- ---- Store Reference ---- --- Load Dram ---- LLC Total ----- Core Load Hit
> # Index Cacheline records Hitm Total Lcl Rmt Total L1Hit L1Miss Lcl Rmt Ld Miss Loads FB L1
> # ..... .................. ....... ....... ....... ....... ....... ....... ....... ....... ........ ........ ....... ....... ....... ....... ..
> #
> 0 0x3d2e300 273 0.53% 44 22 22 40 40 0 0 0 22 233 107 78
> 1 0x3d001c0 68 0.51% 22 1 21 2 2 0 0 2 25 66 30 7
> 2 0x3d00200 165 0.48% 22 2 20 20 20 0 0 0 20 145 89 34
> 3 0x3d5ca80 22 0.41% 19 2 17 3 3 0 0 0 17 19 0 0
>
>
> while current code does:
>
> # Total Rmt ----- LLC Load Hitm ----- ---- Store Reference ---- --- Load Dram ---- LLC Total ----- Core Load Hit
> # Index Cacheline records Hitm Total Lcl Rmt Total L1Hit L1Miss Lcl Rmt Ld Miss Loads FB L1
> # ..... .................. ....... ....... ....... ....... ....... ....... ....... ....... ........ ........ ....... ....... ....... ....... ..
> #
> 0 0x3d2e300 273 0.53% 44 22 22 40 40 0 0 0 22 233 107 78
> 1 0x3d001c0 68 0.51% 22 1 21 2 2 0 0 2 25 66 30 7
> 2 0x3d00200 165 0.48% 22 2 20 20 20 0 0 0 20 145 89 34
> 3 0x3d5ca80 22 0.41% 19 2 17 3 3 0 0 0 17 19 0 0
>
>
> I'll make the snprintf/scnprintf replacement
> based on your acme/tmp.perf/c2c.2

Thanks,

> thanks,
> jirka

Jiri Olsa

unread,
Oct 5, 2016, 9:40:08 AM10/5/16
to
On Wed, Oct 05, 2016 at 10:26:58AM -0300, Arnaldo Carvalho de Melo wrote:
> Em Wed, Oct 05, 2016 at 03:09:29PM +0200, Jiri Olsa escreveu:
> > On Wed, Oct 05, 2016 at 02:45:37PM +0200, Jiri Olsa wrote:
> >
> > SNIP
> >
> > > > > +
> > > > > + if (he->mem_info)
> > > > > + addr = cl_address(he->mem_info->daddr.addr);
> > > > > +
> > > > > + return snprintf(hpp->buf, hpp->size, "%*s", width, hex_str(addr));
> > > >
> > > > So here you get that static buffer and then truncate it? Wouldn't the
> > > > perf_hpp stuff take care of this? Can't we stop using that static buffer
> > > > and this truncation at such a level?
> > >
> > > I think we need to cut it on this level, but I actualy might recall some
> > > change you did for perf_hpp to cut this on column width later on?
> > >
> > > I'll check on that..
> >
> > ok, so it's cut later on, but it allows only for left-side alignment
> > while we use the right-side one
> >
> > if I leave it on perf_hpp to deal with it I end up with following output:
> > (check the Cacheline column)
>
> Which is not _that_ bad, I guess it gets like that because we expect
> kernel addresses as well (longer)?

exactly.. also it's the case for other columns where you have
different number lengths more often.. the right side alignment
is more readable in this case

jirka

Jiri Olsa

unread,
Oct 5, 2016, 10:10:08 AM10/5/16
to
hum, the hist_entry__snprintf_alignment just adds missing spaces
if there's any..

I think we should allow the upper layer to align based on its needs

but maybe change hist_entry__snprintf_alignment to also force the
width (like cut the overlap) in case it overflows

jirka

Arnaldo Carvalho de Melo

unread,
Oct 10, 2016, 12:50:06 PM10/10/16
to
Em Thu, Sep 22, 2016 at 05:37:07PM +0200, Jiri Olsa escreveu:
> Adding source line dimension key wrapper.
>
> It is to be displayed in the single cacheline output:
>
> cl_srcline
>
> It displays source line related to the code address that
> accessed cacheline. It's a wrapper to global srcline sort
> entry.
>
> Link: http://lkml.kernel.org/n/tip-cmnzgm37mj...@git.kernel.org
> Signed-off-by: Jiri Olsa <jo...@kernel.org>
> ---
> tools/perf/builtin-c2c.c | 11 +++++++++++
> tools/perf/util/sort.c | 2 +-
> tools/perf/util/sort.h | 1 +
> 3 files changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
> index 2318249362f8..8fb798c8a790 100644
> --- a/tools/perf/builtin-c2c.c
> +++ b/tools/perf/builtin-c2c.c
> @@ -50,6 +50,8 @@ struct perf_c2c {
> int cpus_cnt;
> int *cpu2node;
> int node_info;
> +
> + bool show_src;
> };
>
> static struct perf_c2c c2c;
> @@ -1360,6 +1362,11 @@ static struct c2c_dimension dim_cpucnt = {
> .width = 8,
> };
>
> +static struct c2c_dimension dim_srcline = {
> + .name = "cl_srcline",
> + .se = &sort_srcline,
> +};
> +
> static struct c2c_dimension *dimensions[] = {
> &dim_dcacheline,
> &dim_offset,
> @@ -1398,6 +1405,7 @@ static struct c2c_dimension *dimensions[] = {
> &dim_mean_lcl,
> &dim_mean_load,
> &dim_cpucnt,
> + &dim_srcline,
> NULL,
> };
>
> @@ -1605,6 +1613,9 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
>
> static int filter_cb(struct hist_entry *he __maybe_unused)
> {
> + if (c2c.show_src && !he->srcline)
> + he->srcline = hist_entry__get_srcline(he);
> +

You forgot to remove the __maybe_unused, doing that myself

> return 0;
> }
>
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index 452e15a10dd2..df622f4e301e 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -315,7 +315,7 @@ struct sort_entry sort_sym = {
>
> /* --sort srcline */
>
> -static char *hist_entry__get_srcline(struct hist_entry *he)
> +char *hist_entry__get_srcline(struct hist_entry *he)
> {
> struct map *map = he->ms.map;
>
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index 099c97557d33..7aff317fc7c4 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -280,4 +280,5 @@ int64_t
> sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
> int64_t
> sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
> +char *hist_entry__get_srcline(struct hist_entry *he);

Arnaldo Carvalho de Melo

unread,
Oct 10, 2016, 4:10:06 PM10/10/16
to
Em Thu, Sep 22, 2016 at 05:37:17PM +0200, Jiri Olsa escreveu:
> Add a limit for entries number of the cachelines table
> entries. By default now it's the 0.0005% minimum of
> remote HITMs.
>
> Also display only cachelines with remote hitm or store data.

Humm, this made my test machine, which has just one CPU, to show nothing
:-\

I think we need to have a command line switch to control this?

I'm leaving this patch for later.

- Arnaldo

> Link: http://lkml.kernel.org/n/tip-inykbom2f1...@git.kernel.org
> Signed-off-by: Jiri Olsa <jo...@kernel.org>
> ---
> tools/perf/builtin-c2c.c | 36 ++++++++++++++++++++++++++++++++++--
> tools/perf/util/hist.c | 1 +
> tools/perf/util/hist.h | 1 +
> 3 files changed, 36 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
> index 31e311959480..ff89c0b86c44 100644
> --- a/tools/perf/builtin-c2c.c
> +++ b/tools/perf/builtin-c2c.c
> @@ -1639,11 +1639,42 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
> return hpp_list__parse(&c2c_hists->list, output, sort);
> }
>
> -static int filter_cb(struct hist_entry *he __maybe_unused)
> +#define DISPLAY_LINE_LIMIT 0.0005
> +
> +static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
> +{
> + struct c2c_hist_entry *c2c_he;
> + double ld_dist;
> +
> + c2c_he = container_of(he, struct c2c_hist_entry, he);
> +
> + if (stats->rmt_hitm) {
> + ld_dist = ((double)c2c_he->stats.rmt_hitm / stats->rmt_hitm);
> + if (ld_dist < DISPLAY_LINE_LIMIT)
> + he->filtered = HIST_FILTER__C2C;
> + } else {
> + he->filtered = HIST_FILTER__C2C;
> + }
> +
> + return he->filtered == 0;
> +}
> +
> +static inline int valid_hitm_or_store(struct hist_entry *he)
> +{
> + struct c2c_hist_entry *c2c_he;
> +
> + c2c_he = container_of(he, struct c2c_hist_entry, he);
> + return c2c_he->stats.rmt_hitm || c2c_he->stats.store;
> +}
> +
> +static int filter_cb(struct hist_entry *he)
> {
> if (c2c.show_src && !he->srcline)
> he->srcline = hist_entry__get_srcline(he);
>
> + if (!valid_hitm_or_store(he))
> + he->filtered = HIST_FILTER__C2C;
> +
> return 0;
> }
>
> @@ -1651,11 +1682,12 @@ static int resort_cl_cb(struct hist_entry *he)
> {
> struct c2c_hist_entry *c2c_he;
> struct c2c_hists *c2c_hists;
> + bool display = he__display(he, &c2c.hitm_stats);
>
> c2c_he = container_of(he, struct c2c_hist_entry, he);
> c2c_hists = c2c_he->hists;
>
> - if (c2c_hists) {
> + if (display && c2c_hists) {
> c2c_hists__reinit(c2c_hists,
> "percent_rmt_hitm,"
> "percent_lcl_hitm,"
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index b02992efb513..e1be4132054d 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -1195,6 +1195,7 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he,
> case HIST_FILTER__GUEST:
> case HIST_FILTER__HOST:
> case HIST_FILTER__SOCKET:
> + case HIST_FILTER__C2C:
> default:
> return;
> }
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 9928fed8bc59..d4b6514eeef5 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -22,6 +22,7 @@ enum hist_filter {
> HIST_FILTER__GUEST,
> HIST_FILTER__HOST,
> HIST_FILTER__SOCKET,
> + HIST_FILTER__C2C,
> };
>
> enum hist_column {
> --
> 2.7.4

Jiri Olsa

unread,
Oct 11, 2016, 5:30:05 AM10/11/16
to
On Mon, Oct 10, 2016 at 05:01:21PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Thu, Sep 22, 2016 at 05:37:17PM +0200, Jiri Olsa escreveu:
> > Add a limit for entries number of the cachelines table
> > entries. By default now it's the 0.0005% minimum of
> > remote HITMs.
> >
> > Also display only cachelines with remote hitm or store data.
>
> Humm, this made my test machine, which has just one CPU, to show nothing
> :-\
>
> I think we need to have a command line switch to control this?

yep, we make it to display only entries with % hitm > 0.0005,

I think you might want to display local HITMs instead,
which are always easier to catch with -d lcl.. but the
support for this switch is in next patch (53)

thanks,
jirka

tip-bot for Jiri Olsa

unread,
Oct 22, 2016, 4:40:05 AM10/22/16
to
Commit-ID: cbb88500a7698bbe8751f01222081fa7f0641fd9
Gitweb: http://git.kernel.org/tip/cbb88500a7698bbe8751f01222081fa7f0641fd9
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:48 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 19 Oct 2016 13:18:31 -0300

perf c2c report: Add 'dcacheline' dimension key

It displays cacheline address as hex number.

Using c2c wrapper to standard 'dcacheline' object to defined own header
and simple (just address) cacheline output.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-21-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 37 +++++++++++++++++++++++++++++++++++++
1 file changed, 37 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 78addc4..3a3e67f 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1,5 +1,6 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/stringify.h>
#include "util.h"
#include "debug.h"
#include "builtin.h"
@@ -7,6 +8,7 @@
#include "mem-events.h"
#include "session.h"
#include "hist.h"
+#include "sort.h"
#include "tool.h"
#include "data.h"
#include "sort.h"
@@ -273,6 +275,32 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
return scnprintf(hpp->buf, hpp->size, "%*s", width, text);
}

+#define HEX_STR(__s, __v) \
+({ \
+ scnprintf(__s, sizeof(__s), "0x%" PRIx64, __v); \
+ __s; \
+})
+
+static int64_t
+dcacheline_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return sort__dcacheline_cmp(left, right);
+}
+
+static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ uint64_t addr = 0;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[20];
+
+ if (he->mem_info)
+ addr = cl_address(he->mem_info->daddr.addr);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -308,7 +336,16 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
}, \
}

+static struct c2c_dimension dim_dcacheline = {
+ .header = HEADER_LOW("Cacheline"),
+ .name = "dcacheline",
+ .cmp = dcacheline_cmp,
+ .entry = dcacheline_entry,
+ .width = 18,
+};
+
static struct c2c_dimension *dimensions[] = {
+ &dim_dcacheline,
NULL,
};

tip-bot for Jiri Olsa

unread,
Oct 22, 2016, 4:40:05 AM10/22/16
to
Commit-ID: 8d3f938dc757549dd75d1b4df4f7faf92dc5dfc3
Gitweb: http://git.kernel.org/tip/8d3f938dc757549dd75d1b4df4f7faf92dc5dfc3
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:42 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 19 Oct 2016 13:18:31 -0300

perf c2c report: Add sort_entry dimension support

Allow to reuse 'struct sort_entry' objects within c2c dimension support.

In case the 'struct sort_entry' object meets the need of c2c report we
will use it directly in following patches.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-15-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 82 ++++++++++++++++++++++++++++++++++++++----------
1 file changed, 65 insertions(+), 17 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 63c0e2d..6b58b53 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -9,6 +9,7 @@
#include "hist.h"
#include "tool.h"
#include "data.h"
+#include "sort.h"

struct c2c_hists {
struct hists hists;
@@ -47,6 +48,7 @@ struct c2c_dimension {
struct c2c_header header;
const char *name;
int width;
+ struct sort_entry *se;

int64_t (*cmp)(struct perf_hpp_fmt *fmt,
struct hist_entry *, struct hist_entry *);
@@ -66,34 +68,47 @@ static int c2c_width(struct perf_hpp_fmt *fmt,
struct hists *hists __maybe_unused)
{
struct c2c_fmt *c2c_fmt;
+ struct c2c_dimension *dim;

c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
- return c2c_fmt->dim->width;
+ dim = c2c_fmt->dim;
+
+ return dim->se ? hists__col_len(hists, dim->se->se_width_idx) :
+ c2c_fmt->dim->width;
}

static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct hists *hists __maybe_unused, int line, int *span)
+ struct hists *hists, int line, int *span)
{
+ struct perf_hpp_list *hpp_list = hists->hpp_list;
struct c2c_fmt *c2c_fmt;
struct c2c_dimension *dim;
- int len = c2c_width(fmt, hpp, hists);
- const char *text;
+ const char *text = NULL;
+ int width = c2c_width(fmt, hpp, hists);

c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
dim = c2c_fmt->dim;

- text = dim->header.line[line].text;
- if (text == NULL)
- text = "";
-
- if (*span) {
- (*span)--;
- return 0;
+ if (dim->se) {
+ text = dim->header.line[line].text;
+ /* Use the last line from sort_entry if not defined. */
+ if (!text && (line == hpp_list->nr_header_lines - 1))
+ text = dim->se->se_header;
} else {
- *span = dim->header.line[line].span;
+ text = dim->header.line[line].text;
+
+ if (*span) {
+ (*span)--;
+ return 0;
+ } else {
+ *span = dim->header.line[line].span;
+ }
}

- return scnprintf(hpp->buf, hpp->size, "%*s", len, text);
+ if (text == NULL)
+ text = "";
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, text);
}

static struct c2c_dimension *dimensions[] = {
@@ -130,6 +145,39 @@ static struct c2c_dimension *get_dimension(const char *name)
return NULL;
}

+static int c2c_se_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+ struct c2c_dimension *dim = c2c_fmt->dim;
+ size_t len = fmt->user_len;
+
+ if (!len)
+ len = hists__col_len(he->hists, dim->se->se_width_idx);
+
+ return dim->se->se_snprintf(he, hpp->buf, hpp->size, len);
+}
+
+static int64_t c2c_se_cmp(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+ struct c2c_dimension *dim = c2c_fmt->dim;
+
+ return dim->se->se_cmp(a, b);
+}
+
+static int64_t c2c_se_collapse(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+ struct c2c_dimension *dim = c2c_fmt->dim;
+ int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *);
+
+ collapse_fn = dim->se->se_collapse ?: dim->se->se_cmp;
+ return collapse_fn(a, b);
+}
+
static struct c2c_fmt *get_format(const char *name)
{
struct c2c_dimension *dim = get_dimension(name);
@@ -149,12 +197,12 @@ static struct c2c_fmt *get_format(const char *name)
INIT_LIST_HEAD(&fmt->list);
INIT_LIST_HEAD(&fmt->sort_list);

- fmt->cmp = dim->cmp;
- fmt->sort = dim->cmp;
- fmt->entry = dim->entry;
+ fmt->cmp = dim->se ? c2c_se_cmp : dim->cmp;
+ fmt->sort = dim->se ? c2c_se_cmp : dim->cmp;
+ fmt->entry = dim->se ? c2c_se_entry : dim->entry;
fmt->header = c2c_header;
fmt->width = c2c_width;
- fmt->collapse = dim->cmp;
+ fmt->collapse = dim->se ? c2c_se_collapse : dim->cmp;
fmt->equal = fmt_equal;
fmt->free = fmt_free;

tip-bot for Jiri Olsa

unread,
Oct 22, 2016, 4:40:05 AM10/22/16
to
Commit-ID: 78b275437873da5431b7ccc61f7ce3827bb55324
Gitweb: http://git.kernel.org/tip/78b275437873da5431b7ccc61f7ce3827bb55324
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:44 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 19 Oct 2016 13:18:31 -0300

perf c2c report: Add sample processing

Adding basic sample processing specific hist_entry allocation callbacks
(via hists__add_entry_ops).

Overloading 'struct hist_entry' object with new 'struct c2c_hist_entry'.
The new hist entry object will carry specific stats and nested hists
objects.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-17-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 107 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index a3481f8..29fb957 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -16,6 +16,15 @@ struct c2c_hists {
struct perf_hpp_list list;
};

+struct c2c_hist_entry {
+ struct c2c_hists *hists;
+ /*
+ * must be at the end,
+ * because of its callchain dynamic entry
+ */
+ struct hist_entry he;
+};
+
struct perf_c2c {
struct perf_tool tool;
struct c2c_hists hists;
@@ -23,6 +32,86 @@ struct perf_c2c {

static struct perf_c2c c2c;

+static void *c2c_he_zalloc(size_t size)
+{
+ struct c2c_hist_entry *c2c_he;
+
+ c2c_he = zalloc(size + sizeof(*c2c_he));
+ if (!c2c_he)
+ return NULL;
+
+ return &c2c_he->he;
+}
+
+static void c2c_he_free(void *he)
+{
+ struct c2c_hist_entry *c2c_he;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ if (c2c_he->hists) {
+ hists__delete_entries(&c2c_he->hists->hists);
+ free(c2c_he->hists);
+ }
+
+ free(c2c_he);
+}
+
+static struct hist_entry_ops c2c_entry_ops = {
+ .new = c2c_he_zalloc,
+ .free = c2c_he_free,
+};
+
+static int process_sample_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine)
+{
+ struct hists *hists = &c2c.hists.hists;
+ struct hist_entry *he;
+ struct addr_location al;
+ struct mem_info *mi;
+ int ret;
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_debug("problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ mi = sample__resolve_mem(sample, &al);
+ if (mi == NULL)
+ return -ENOMEM;
+
+ he = hists__add_entry_ops(hists, &c2c_entry_ops,
+ &al, NULL, NULL, mi,
+ sample, true);
+ if (he == NULL) {
+ free(mi);
+ return -ENOMEM;
+ }
+
+ hists__inc_nr_samples(hists, he->filtered);
+ ret = hist_entry__append_callchain(he, sample);
+
+ addr_location__put(&al);
+ return ret;
+}
+
+static struct perf_c2c c2c = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
+ .lost = perf_event__process_lost,
+ .ordered_events = true,
+ .ordering_requires_timestamps = true,
+ },
+};
+
static const char * const c2c_usage[] = {
"perf c2c {record|report}",
NULL
@@ -314,6 +403,7 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
+ struct ui_progress prog;
struct perf_data_file file = {
.mode = PERF_DATA_MODE_READ,
};
@@ -330,9 +420,12 @@ static int perf_c2c__report(int argc, const char **argv)

argc = parse_options(argc, argv, c2c_options, report_c2c_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
- if (!argc)
+ if (argc)
usage_with_options(report_c2c_usage, c2c_options);

+ if (!input_name || !strlen(input_name))
+ input_name = "perf.data";
+
file.path = input_name;

err = c2c_hists__init(&c2c.hists, "dcacheline");
@@ -356,6 +449,19 @@ static int perf_c2c__report(int argc, const char **argv)
goto out_session;
}

+ err = perf_session__process_events(session);
+ if (err) {
+ pr_err("failed to process sample\n");
+ goto out_session;
+ }
+
+ ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");
+
+ hists__collapse_resort(&c2c.hists.hists, NULL);
+ hists__output_resort(&c2c.hists.hists, &prog);
+
+ ui_progress__finish();
+
out_session:
perf_session__delete(session);
out:

tip-bot for Jiri Olsa

unread,
Oct 22, 2016, 4:40:05 AM10/22/16
to
Commit-ID: ec06f9b9b23f29c8f25367fc43c85c327229d5ca
Gitweb: http://git.kernel.org/tip/ec06f9b9b23f29c8f25367fc43c85c327229d5ca
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:45 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 19 Oct 2016 13:18:31 -0300

perf c2c report: Add cacheline hists processing

Store cacheline related entries in nested hist object for each cacheline
data. Nested entries are sorted by 'offset' within related cacheline.

We will allow specific sort keys to be configured for nested cacheline
data entries in following patches.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-18-...@kernel.org
[ he__get_hists() should return NULL when c2c_hists__init() fails ]
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 92 ++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 86 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 29fb957..df413b5 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -61,6 +61,34 @@ static struct hist_entry_ops c2c_entry_ops = {
.free = c2c_he_free,
};

+static int c2c_hists__init(struct c2c_hists *hists,
+ const char *sort);
+
+static struct hists*
+he__get_hists(struct hist_entry *he,
+ const char *sort)
+{
+ struct c2c_hist_entry *c2c_he;
+ struct c2c_hists *hists;
+ int ret;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ if (c2c_he->hists)
+ return &c2c_he->hists->hists;
+
+ hists = c2c_he->hists = zalloc(sizeof(*hists));
+ if (!hists)
+ return NULL;
+
+ ret = c2c_hists__init(hists, sort);
+ if (ret) {
+ free(hists);
+ return NULL;
+ }
+
+ return &hists->hists;
+}
+
static int process_sample_event(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -70,7 +98,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
struct hists *hists = &c2c.hists.hists;
struct hist_entry *he;
struct addr_location al;
- struct mem_info *mi;
+ struct mem_info *mi, *mi_dup;
int ret;

if (machine__resolve(machine, &al, sample) < 0) {
@@ -83,19 +111,50 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (mi == NULL)
return -ENOMEM;

+ mi_dup = memdup(mi, sizeof(*mi));
+ if (!mi_dup)
+ goto free_mi;
+
he = hists__add_entry_ops(hists, &c2c_entry_ops,
&al, NULL, NULL, mi,
sample, true);
- if (he == NULL) {
- free(mi);
- return -ENOMEM;
- }
+ if (he == NULL)
+ goto free_mi_dup;

hists__inc_nr_samples(hists, he->filtered);
ret = hist_entry__append_callchain(he, sample);

+ if (!ret) {
+ mi = mi_dup;
+
+ mi_dup = memdup(mi, sizeof(*mi));
+ if (!mi_dup)
+ goto free_mi;
+
+ hists = he__get_hists(he, "offset");
+ if (!hists)
+ goto free_mi_dup;
+
+ he = hists__add_entry_ops(hists, &c2c_entry_ops,
+ &al, NULL, NULL, mi,
+ sample, true);
+ if (he == NULL)
+ goto free_mi_dup;
+
+ hists__inc_nr_samples(hists, he->filtered);
+ ret = hist_entry__append_callchain(he, sample);
+ }
+
+out:
addr_location__put(&al);
return ret;
+
+free_mi_dup:
+ free(mi_dup);
+free_mi:
+ free(mi);
+ ret = -ENOMEM;
+ goto out;
}

static struct perf_c2c c2c = {
@@ -400,6 +459,27 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
return hpp_list__parse(&c2c_hists->list, output, sort);
}

+static int filter_cb(struct hist_entry *he __maybe_unused)
+{
+ return 0;
+}
+
+static int resort_cl_cb(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ struct c2c_hists *c2c_hists;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ c2c_hists = c2c_he->hists;
+
+ if (c2c_hists) {
+ hists__collapse_resort(&c2c_hists->hists, NULL);
+ hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb);
+ }
+
+ return 0;
+}
+
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;

tip-bot for Jiri Olsa

unread,
Oct 22, 2016, 4:40:05 AM10/22/16
to
Commit-ID: 7aef3bf3daa182f31d197e1a4f789797cc3cc561
Gitweb: http://git.kernel.org/tip/7aef3bf3daa182f31d197e1a4f789797cc3cc561
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:38 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 19 Oct 2016 13:18:31 -0300

perf c2c: Add c2c command

Adding c2c command base wirings. Its implementation is going to be added
gradually in following patches.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-11-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/Build | 1 +
tools/perf/builtin-c2c.c | 23 +++++++++++++++++++++++
tools/perf/builtin.h | 1 +
tools/perf/perf.c | 1 +
4 files changed, 26 insertions(+)

diff --git a/tools/perf/Build b/tools/perf/Build
index a43fae7..b12d5d1 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -21,6 +21,7 @@ perf-y += builtin-inject.o
perf-y += builtin-mem.o
perf-y += builtin-data.o
perf-y += builtin-version.o
+perf-y += builtin-c2c.o

perf-$(CONFIG_AUDIT) += builtin-trace.o
perf-$(CONFIG_LIBELF) += builtin-probe.o
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
new file mode 100644
index 0000000..8252ed0
--- /dev/null
+++ b/tools/perf/builtin-c2c.c
@@ -0,0 +1,23 @@
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include "util.h"
+#include "debug.h"
+#include "builtin.h"
+#include <subcmd/parse-options.h>
+
+static const char * const c2c_usage[] = {
+ "perf c2c",
+ NULL
+};
+
+int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+ const struct option c2c_options[] = {
+ OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+ OPT_END()
+ };
+
+ argc = parse_options(argc, argv, c2c_options, c2c_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ return 0;
+}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 41c24010..0bcf68e 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -18,6 +18,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix);
int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
int cmd_buildid_list(int argc, const char **argv, const char *prefix);
int cmd_config(int argc, const char **argv, const char *prefix);
+int cmd_c2c(int argc, const char **argv, const char *prefix);
int cmd_diff(int argc, const char **argv, const char *prefix);
int cmd_evlist(int argc, const char **argv, const char *prefix);
int cmd_help(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 64c0696..aa23b33 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -43,6 +43,7 @@ static struct cmd_struct commands[] = {
{ "buildid-cache", cmd_buildid_cache, 0 },
{ "buildid-list", cmd_buildid_list, 0 },
{ "config", cmd_config, 0 },
+ { "c2c", cmd_c2c, 0 },
{ "diff", cmd_diff, 0 },
{ "evlist", cmd_evlist, 0 },
{ "help", cmd_help, 0 },

tip-bot for Jiri Olsa

unread,
Oct 22, 2016, 4:40:05 AM10/22/16
to
Commit-ID: 5f2eca833cc244c6872e83fb4a5faaae1c0a87b7
Gitweb: http://git.kernel.org/tip/5f2eca833cc244c6872e83fb4a5faaae1c0a87b7
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:43 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 19 Oct 2016 13:18:31 -0300

perf c2c report: Fallback to standard dimensions

Fallback to standard dimensions in case we don't find the dimension
within c2c ones.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-16-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 6b58b53..a3481f8 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -213,8 +213,10 @@ static int c2c_hists__init_output(struct perf_hpp_list *hpp_list, char *name)
{
struct c2c_fmt *c2c_fmt = get_format(name);

- if (!c2c_fmt)
- return -1;
+ if (!c2c_fmt) {
+ reset_dimensions();
+ return output_field_add(hpp_list, name);
+ }

perf_hpp_list__column_register(hpp_list, &c2c_fmt->fmt);
return 0;
@@ -224,8 +226,10 @@ static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name)
{
struct c2c_fmt *c2c_fmt = get_format(name);

- if (!c2c_fmt)
- return -1;
+ if (!c2c_fmt) {
+ reset_dimensions();
+ return sort_dimension__add(hpp_list, name, NULL, 0);
+ }

perf_hpp_list__register_sort_field(hpp_list, &c2c_fmt->fmt);
return 0;

tip-bot for Jiri Olsa

unread,
Oct 22, 2016, 4:40:06 AM10/22/16
to
Commit-ID: 39bcd4a4e4cbd0ce41a6be848aec335646de1919
Gitweb: http://git.kernel.org/tip/39bcd4a4e4cbd0ce41a6be848aec335646de1919
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:39 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 19 Oct 2016 13:18:31 -0300

perf c2c: Add record subcommand

Adding c2c record subcommand. It setups options related to HITM
cacheline analysis and calls standard perf record command.

$ sudo perf c2c record -v -- -a
calling: record -W -d --sample-cpu -e cpu/mem-loads,ldlat=30/P -e cpu/mem-stores/P -a
...

It produces perf.data, which is to be reported by perf c2c report, that
comes in following patches.

Details are described in the man page, which is added in one of the
following patches.

Committer notes:

Testing it:

# perf c2c record -a sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 5.050 MB perf.data (412 samples) ]
# ls -la perf.data
-rw-------. 1 root root 5301752 Oct 4 13:32 perf.data
# perf evlist
cpu/mem-loads,ldlat=30/P
cpu/mem-stores/P
# perf evlist -v
cpu/mem-loads,ldlat=30/P: type: 4, size: 112, config: 0x1cd, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, { bp_addr, config1 }: 0x1f
cpu/mem-stores/P: type: 4, size: 112, config: 0x82d0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
#
# perf report --stdio
<SNIP>
# Total Lost Samples: 14
# Samples: 216 of event 'cpu/mem-loads,ldlat=30/P'
# Event count (approx.): 15207
# Overhead Symbol Shared Object
# ........ ..................................... ............................
10.32% [k] update_blocked_averages [kernel.vmlinux]
3.43% [.] 0x00000000001a2122 qemu-system-x86_64 (deleted)
2.52% [k] enqueue_entity [kernel.vmlinux]
1.88% [.] g_main_context_query libglib-2.0.so.0.4800.2
1.86% [k] __schedule [kernel.vmlinux]
<SNIP>
# Samples: 196 of event 'cpu/mem-stores/P'
# Event count (approx.): 14771346
# Overhead Symbol Shared Object
# ........ ................................... ............................
13.91% [k] intel_idle [kernel.vmlinux]
3.02% [.] 0x00000000022f06ea chrome
2.94% [.] 0x00000000001a1b4c qemu-system-x86_64 (deleted)
2.94% [.] 0x000000000019d8e4 qemu-system-x86_64 (deleted)
2.38% [.] 0x00000000001a1c52 qemu-system-x86_64 (deleted)
<SNIP>

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-12-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 114 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 8252ed0..58924c6 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -4,12 +4,116 @@
#include "debug.h"
#include "builtin.h"
#include <subcmd/parse-options.h>
+#include "mem-events.h"

static const char * const c2c_usage[] = {
"perf c2c",
NULL
};

+static int parse_record_events(const struct option *opt __maybe_unused,
+ const char *str, int unset __maybe_unused)
+{
+ bool *event_set = (bool *) opt->value;
+
+ *event_set = true;
+ return perf_mem_events__parse(str);
+}
+
+
+static const char * const __usage_record[] = {
+ "perf c2c record [<options>] [<command>]",
+ "perf c2c record [<options>] -- <command> [<options>]",
+ NULL
+};
+
+static const char * const *record_mem_usage = __usage_record;
+
+static int perf_c2c__record(int argc, const char **argv)
+{
+ int rec_argc, i = 0, j;
+ const char **rec_argv;
+ int ret;
+ bool all_user = false, all_kernel = false;
+ bool event_set = false;
+ struct option options[] = {
+ OPT_CALLBACK('e', "event", &event_set, "event",
+ "event selector. Use 'perf mem record -e list' to list available events",
+ parse_record_events),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show counter open errors, etc)"),
+ OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"),
+ OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"),
+ OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"),
+ OPT_END()
+ };
+
+ if (perf_mem_events__init()) {
+ pr_err("failed: memory events not supported\n");
+ return -1;
+ }
+
+ argc = parse_options(argc, argv, options, record_mem_usage,
+ PARSE_OPT_KEEP_UNKNOWN);
+
+ rec_argc = argc + 10; /* max number of arguments */
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+ if (!rec_argv)
+ return -1;
+
+ rec_argv[i++] = "record";
+
+ if (!event_set) {
+ perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
+ perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+ }
+
+ if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
+ rec_argv[i++] = "-W";
+
+ rec_argv[i++] = "-d";
+ rec_argv[i++] = "--sample-cpu";
+
+ for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ if (!perf_mem_events[j].record)
+ continue;
+
+ if (!perf_mem_events[j].supported) {
+ pr_err("failed: event '%s' not supported\n",
+ perf_mem_events[j].name);
+ return -1;
+ }
+
+ rec_argv[i++] = "-e";
+ rec_argv[i++] = perf_mem_events__name(j);
+ };
+
+ if (all_user)
+ rec_argv[i++] = "--all-user";
+
+ if (all_kernel)
+ rec_argv[i++] = "--all-kernel";
+
+ for (j = 0; j < argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ if (verbose > 0) {
+ pr_debug("calling: ");
+
+ j = 0;
+
+ while (rec_argv[j]) {
+ pr_debug("%s ", rec_argv[j]);
+ j++;
+ }
+ pr_debug("\n");
+ }
+
+ ret = cmd_record(i, rec_argv, NULL);
+ free(rec_argv);
+ return ret;
+}
+
int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)
{
const struct option c2c_options[] = {
@@ -19,5 +123,15 @@ int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)

argc = parse_options(argc, argv, c2c_options, c2c_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if (!argc)
+ usage_with_options(c2c_usage, c2c_options);
+
+ if (!strncmp(argv[0], "rec", 3)) {
+ return perf_c2c__record(argc, argv);
+ } else {
+ usage_with_options(c2c_usage, c2c_options);
+ }
+
return 0;
}

tip-bot for Jiri Olsa

unread,
Oct 22, 2016, 4:40:07 AM10/22/16
to
Commit-ID: 600a8cf45b797ff189c42175c1f165fb5cb9479a
Gitweb: http://git.kernel.org/tip/600a8cf45b797ff189c42175c1f165fb5cb9479a
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:47 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 19 Oct 2016 13:18:31 -0300

perf c2c report: Add header macros

Adding helping macros to define header objects. It will be used in
following patches, that add new dimensions.

The c2c report will support 2 line headers, hence we only define
line[0/1] in macros.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-20-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 43f18aa..78addc4 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -273,6 +273,41 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
return scnprintf(hpp->buf, hpp->size, "%*s", width, text);
}

+#define HEADER_LOW(__h) \
+ { \
+ .line[1] = { \
+ .text = __h, \
+ }, \
+ }
+
+#define HEADER_BOTH(__h0, __h1) \
+ { \
+ .line[0] = { \
+ .text = __h0, \
+ }, \
+ .line[1] = { \
+ .text = __h1, \
+ }, \
+ }
+
+#define HEADER_SPAN(__h0, __h1, __s) \
+ { \
+ .line[0] = { \
+ .text = __h0, \
+ .span = __s, \
+ }, \
+ .line[1] = { \
+ .text = __h1, \
+ }, \
+ }
+
+#define HEADER_SPAN_LOW(__h) \
+ { \
+ .line[1] = { \
+ .text = __h, \
+ }, \
+ }
+
static struct c2c_dimension *dimensions[] = {
NULL,
};

tip-bot for Jiri Olsa

unread,
Oct 22, 2016, 4:40:07 AM10/22/16
to
Commit-ID: 0a9a24cc0e9b47e83e9f603cd459ead37507e712
Gitweb: http://git.kernel.org/tip/0a9a24cc0e9b47e83e9f603cd459ead37507e712
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:31 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 19 Oct 2016 13:18:31 -0300

perf c2c: Introduce c2c_add_stats function

Introducing c2c_add_stats function helper to cumulate c2c_stats.

Original-patch-by: Dick Fowles <rfo...@redhat.com>
Original-patch-by: Don Zickus <dzi...@redhat.com>
Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-4-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/mem-events.c | 30 ++++++++++++++++++++++++++++++
tools/perf/util/mem-events.h | 1 +
2 files changed, 31 insertions(+)

diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 502fcee..e507732 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -366,3 +366,33 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
#undef P
return err;
}
+
+void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
+{
+ stats->nr_entries += add->nr_entries;
+
+ stats->locks += add->locks;
+ stats->store += add->store;
+ stats->st_uncache += add->st_uncache;
+ stats->st_noadrs += add->st_noadrs;
+ stats->st_l1hit += add->st_l1hit;
+ stats->st_l1miss += add->st_l1miss;
+ stats->load += add->load;
+ stats->ld_excl += add->ld_excl;
+ stats->ld_shared += add->ld_shared;
+ stats->ld_uncache += add->ld_uncache;
+ stats->ld_io += add->ld_io;
+ stats->ld_miss += add->ld_miss;
+ stats->ld_noadrs += add->ld_noadrs;
+ stats->ld_fbhit += add->ld_fbhit;
+ stats->ld_l1hit += add->ld_l1hit;
+ stats->ld_l2hit += add->ld_l2hit;
+ stats->ld_llchit += add->ld_llchit;
+ stats->lcl_hitm += add->lcl_hitm;
+ stats->rmt_hitm += add->rmt_hitm;
+ stats->rmt_hit += add->rmt_hit;
+ stats->lcl_dram += add->lcl_dram;
+ stats->rmt_dram += add->rmt_dram;
+ stats->nomap += add->nomap;
+ stats->noparse += add->noparse;
+}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index e111a2a..faf8040 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -68,5 +68,6 @@ struct c2c_stats {

struct hist_entry;
int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi);
+void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add);

#endif /* __PERF_MEM_EVENTS_H */

tip-bot for Jiri Olsa

unread,
Oct 22, 2016, 4:40:07 AM10/22/16
to
Commit-ID: aadddd68bde444cd737c376816a29b642da0610e
Gitweb: http://git.kernel.org/tip/aadddd68bde444cd737c376816a29b642da0610e
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:30 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 19 Oct 2016 13:18:31 -0300

perf c2c: Introduce c2c_decode_stats function

Introducing c2c_decode_stats function, which decodes
data_src data into new struct c2c_stats.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Original-patch-by: Dick Fowles <rfo...@redhat.com>
Original-patch-by: Don Zickus <dzi...@redhat.com>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-3-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/mem-events.c | 98 ++++++++++++++++++++++++++++++++++++++++++++
tools/perf/util/mem-events.h | 36 ++++++++++++++++
2 files changed, 134 insertions(+)

diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index bbc368e..502fcee 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -9,6 +9,7 @@
#include "mem-events.h"
#include "debug.h"
#include "symbol.h"
+#include "sort.h"

unsigned int perf_mem_events__loads_ldlat = 30;

@@ -268,3 +269,100 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_in

return i;
}
+
+int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
+{
+ union perf_mem_data_src *data_src = &mi->data_src;
+ u64 daddr = mi->daddr.addr;
+ u64 op = data_src->mem_op;
+ u64 lvl = data_src->mem_lvl;
+ u64 snoop = data_src->mem_snoop;
+ u64 lock = data_src->mem_lock;
+ int err = 0;
+
+#define P(a, b) PERF_MEM_##a##_##b
+
+ stats->nr_entries++;
+
+ if (lock & P(LOCK, LOCKED)) stats->locks++;
+
+ if (op & P(OP, LOAD)) {
+ /* load */
+ stats->load++;
+
+ if (!daddr) {
+ stats->ld_noadrs++;
+ return -1;
+ }
+
+ if (lvl & P(LVL, HIT)) {
+ if (lvl & P(LVL, UNC)) stats->ld_uncache++;
+ if (lvl & P(LVL, IO)) stats->ld_io++;
+ if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
+ if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
+ if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
+ if (lvl & P(LVL, L3 )) {
+ if (snoop & P(SNOOP, HITM))
+ stats->lcl_hitm++;
+ else
+ stats->ld_llchit++;
+ }
+
+ if (lvl & P(LVL, LOC_RAM)) {
+ stats->lcl_dram++;
+ if (snoop & P(SNOOP, HIT))
+ stats->ld_shared++;
+ else
+ stats->ld_excl++;
+ }
+
+ if ((lvl & P(LVL, REM_RAM1)) ||
+ (lvl & P(LVL, REM_RAM2))) {
+ stats->rmt_dram++;
+ if (snoop & P(SNOOP, HIT))
+ stats->ld_shared++;
+ else
+ stats->ld_excl++;
+ }
+ }
+
+ if ((lvl & P(LVL, REM_CCE1)) ||
+ (lvl & P(LVL, REM_CCE2))) {
+ if (snoop & P(SNOOP, HIT))
+ stats->rmt_hit++;
+ else if (snoop & P(SNOOP, HITM))
+ stats->rmt_hitm++;
+ }
+
+ if ((lvl & P(LVL, MISS)))
+ stats->ld_miss++;
+
+ } else if (op & P(OP, STORE)) {
+ /* store */
+ stats->store++;
+
+ if (!daddr) {
+ stats->st_noadrs++;
+ return -1;
+ }
+
+ if (lvl & P(LVL, HIT)) {
+ if (lvl & P(LVL, UNC)) stats->st_uncache++;
+ if (lvl & P(LVL, L1 )) stats->st_l1hit++;
+ }
+ if (lvl & P(LVL, MISS))
+ if (lvl & P(LVL, L1)) stats->st_l1miss++;
+ } else {
+ /* unparsable data_src? */
+ stats->noparse++;
+ return -1;
+ }
+
+ if (!mi->daddr.map || !mi->iaddr.map) {
+ stats->nomap++;
+ return -1;
+ }
+
+#undef P
+ return err;
+}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 7f69bf9..e111a2a 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -2,6 +2,10 @@
#define __PERF_MEM_EVENTS_H

#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/types.h>
+#include "stat.h"

struct perf_mem_event {
bool record;
@@ -33,4 +37,36 @@ int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);

int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);

+struct c2c_stats {
+ u32 nr_entries;
+
+ u32 locks; /* count of 'lock' transactions */
+ u32 store; /* count of all stores in trace */
+ u32 st_uncache; /* stores to uncacheable address */
+ u32 st_noadrs; /* cacheable store with no address */
+ u32 st_l1hit; /* count of stores that hit L1D */
+ u32 st_l1miss; /* count of stores that miss L1D */
+ u32 load; /* count of all loads in trace */
+ u32 ld_excl; /* exclusive loads, rmt/lcl DRAM - snp none/miss */
+ u32 ld_shared; /* shared loads, rmt/lcl DRAM - snp hit */
+ u32 ld_uncache; /* loads to uncacheable address */
+ u32 ld_io; /* loads to io address */
+ u32 ld_miss; /* loads miss */
+ u32 ld_noadrs; /* cacheable load with no address */
+ u32 ld_fbhit; /* count of loads hitting Fill Buffer */
+ u32 ld_l1hit; /* count of loads that hit L1D */
+ u32 ld_l2hit; /* count of loads that hit L2D */
+ u32 ld_llchit; /* count of loads that hit LLC */
+ u32 lcl_hitm; /* count of loads with local HITM */
+ u32 rmt_hitm; /* count of loads with remote HITM */
+ u32 rmt_hit; /* count of loads with remote hit clean; */
+ u32 lcl_dram; /* count of loads miss to local DRAM */
+ u32 rmt_dram; /* count of loads miss to remote DRAM */
+ u32 nomap; /* count of load/stores with no phys adrs */
+ u32 noparse; /* count of unparsable data sources */
+};
+
+struct hist_entry;
+int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi);
+
#endif /* __PERF_MEM_EVENTS_H */

tip-bot for Jiri Olsa

unread,
Oct 22, 2016, 4:40:13 AM10/22/16
to
Commit-ID: 903a6f15b9968a048760d79224cec4ce4b06d781
Gitweb: http://git.kernel.org/tip/903a6f15b9968a048760d79224cec4ce4b06d781
Author: Jiri Olsa <jo...@kernel.org>
AuthorDate: Thu, 22 Sep 2016 17:36:40 +0200
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 19 Oct 2016 13:18:31 -0300

perf c2c: Add report subcommand

Adding c2c report subcommand. It reads the perf.data and displays shared
data analysis.

This patch adds report basic wirings. It gets fully implemented in
following patches.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-13-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 58924c6..3fac3a2 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -5,12 +5,74 @@
#include "builtin.h"
#include <subcmd/parse-options.h>
#include "mem-events.h"
+#include "session.h"
+#include "hist.h"
+#include "tool.h"
+#include "data.h"
+
+struct perf_c2c {
+ struct perf_tool tool;
+};
+
+static struct perf_c2c c2c;

static const char * const c2c_usage[] = {
- "perf c2c",
+ "perf c2c {record|report}",
NULL
};

+static const char * const __usage_report[] = {
+ "perf c2c report",
+ NULL
+};
+
+static const char * const *report_c2c_usage = __usage_report;
+
+static int perf_c2c__report(int argc, const char **argv)
+{
+ struct perf_session *session;
+ struct perf_data_file file = {
+ .mode = PERF_DATA_MODE_READ,
+ };
+ const struct option c2c_options[] = {
+ OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show counter open errors, etc)"),
+ OPT_STRING('i', "input", &input_name, "file",
+ "the input file to process"),
+ OPT_END()
+ };
+ int err = 0;
+
+ argc = parse_options(argc, argv, c2c_options, report_c2c_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (!argc)
+ usage_with_options(report_c2c_usage, c2c_options);
+
+ file.path = input_name;
+
+ session = perf_session__new(&file, 0, &c2c.tool);
+ if (session == NULL) {
+ pr_debug("No memory for session\n");
+ goto out;
+ }
+
+ if (symbol__init(&session->header.env) < 0)
+ goto out_session;
+
+ /* No pipe support at the moment. */
+ if (perf_data_file__is_pipe(session->file)) {
+ pr_debug("No pipe support at the moment.\n");
+ goto out_session;
+ }
+
+out_session:
+ perf_session__delete(session);
+out:
+ return err;
+}
+
static int parse_record_events(const struct option *opt __maybe_unused,
const char *str, int unset __maybe_unused)
{
@@ -129,6 +191,8 @@ int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)

if (!strncmp(argv[0], "rec", 3)) {
return perf_c2c__record(argc, argv);
+ } else if (!strncmp(argv[0], "rep", 3)) {
+ return perf_c2c__report(argc, argv);
} else {
usage_with_options(c2c_usage, c2c_options);
}
0 new messages