Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH 05/22] perf tools: Do hugetlb handling in more systems

77 views
Skip to first unread message

Arnaldo Carvalho de Melo

unread,
Sep 20, 2016, 4:10:07 PM9/20/16
to
From: Arnaldo Carvalho de Melo <ac...@redhat.com>

The csets:

0ac3348e5024 ("perf tools: Recognize hugetlb mapping as anon mapping")
d7e404af115b ("perf record: Mark MAP_HUGETLB when synthesizing mmap events")

Added code conditional on MAP_HUGETLB, to make it build in older systems
where that define wasn't available. Now that we grabbed copies of
uapi/linux/mmap.h to have all those definitions in tools/, use it so
that we can support building the tools for older systems (without the
MAP_HUGETLB define in its libc headers) using new kernels that support
such maps.

Cc: Adrian Hunter <adrian...@intel.com>
Cc: David Ahern <dsa...@gmail.com>
Cc: Jiri Olsa <jo...@kernel.org>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Wang Nan <wang...@huawei.com>
Cc: Zefan Li <liz...@huawei.com>
Link: http://lkml.kernel.org/n/tip-wv6oqbfkpx...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/event.c | 7 ++-----
tools/perf/util/map.c | 9 ++-------
2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 6c3017139c67..2880e2226fdb 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1,5 +1,5 @@
#include <linux/types.h>
-#include <sys/mman.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
#include <api/fs/fs.h>
#include "event.h"
#include "debug.h"
@@ -249,10 +249,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
bool truncation = false;
unsigned long long timeout = proc_map_timeout * 1000000ULL;
int rc = 0;
-#ifdef MAP_HUGETLB
const char *hugetlbfs_mnt = hugetlbfs__mountpoint();
int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0;
-#endif

if (machine__is_default_guest(machine))
return 0;
@@ -347,12 +345,11 @@ out:

if (!strcmp(execname, ""))
strcpy(execname, anonstr);
-#ifdef MAP_HUGETLB
+
if (!strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
strcpy(execname, anonstr);
event->mmap2.flags |= MAP_HUGETLB;
}
-#endif

size = strlen(execname) + 1;
memcpy(event->mmap2.filename, execname, size);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index d51a1257973b..c662fef95d14 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -6,7 +6,7 @@
#include <string.h>
#include <stdio.h>
#include <unistd.h>
-#include <sys/mman.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
#include "map.h"
#include "thread.h"
#include "strlist.h"
@@ -27,12 +27,7 @@ const char *map_type__name[MAP__NR_TYPES] = {

static inline int is_anon_memory(const char *filename, u32 flags)
{
- u32 anon_flags = 0;
-
-#ifdef MAP_HUGETLB
- anon_flags |= MAP_HUGETLB;
-#endif
- return flags & anon_flags ||
+ return flags & MAP_HUGETLB ||
!strcmp(filename, "//anon") ||
!strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
!strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 20, 2016, 4:10:07 PM9/20/16
to
From: Namhyung Kim <namh...@kernel.org>

When --hierarchy option is used, each entry has its own hpp_list to show
the result. But it missed to update width of each column.

Before:

- 46.29% 48.12% netctl-auto
+ 31.44% 29.25% [kernel.vmlinux]
+ 8.52% 11.55% libc-2.22.so
+ 5.19% 6.91% bash
+ 10.75% 11.83% wpa_cli
+ 8.25% 2.23% swapper
+ 6.45% 5.40% tr
+ 4.81% 8.09% awk
+ 4.15% 2.85% firefox
+ 3.86% 2.53% sh

After:

- 46.29% 48.12% netctl-auto
+ 31.44% 29.25% [kernel.vmlinux]
+ 8.52% 11.55% libc-2.22.so
+ 5.19% 6.91% bash
+ 10.75% 11.83% wpa_cli
+ 8.25% 2.23% swapper
+ 6.45% 5.40% tr
+ 4.81% 8.09% awk
+ 4.15% 2.85% firefox
+ 3.86% 2.53% sh

Committer note:

Full testing instructions:

1) Record with an event group:

$ perf record -e '{cycles,instructions}' make -j4

2) Use report in hierarchy mode, to get a few expanded trees on
the same screen, use --percent-limit:

$ perf report --hierarchy --percent-limit 0.5

Samples: 103K of event 'anon group { cycles:u, instructions:u }',
Event count (approx.): 57317631725
Overhead Command / Shared Object / Symbol â—†
- 58.89% 55.12% cc1 â–’
- 50.26% 48.10% cc1 â–’
3.61% 5.13% [.] _cpp_lex_token â–’
2.58% 0.78% [.] ht_lookup_with_hash â–’
1.31% 1.30% [.] ggc_internal_alloc â–’
1.08% 2.25% [.] get_combined_adhoc_loc â–’
1.01% 1.95% [.] ira_init â–’
0.96% 1.78% [.] linemap_position_for_column â–’
0.65% 1.01% [.] cpp_get_token_with_location â–’
- 7.52% 6.58% libc-2.23.so â–’
1.70% 1.78% [.] _int_malloc â–’
0.69% 0.75% [.] _int_free â–’
0.67% 0.42% [.] malloc_consolidate â–’
- 0.58% 0.42% ld-2.23.so â–’
no entry >= 0.50% â–’
- 0.52% 0.03% [kernel.vmlinux] â–’
no entry >= 0.50% â–’

Signed-off-by: Namhyung Kim <namh...@kernel.org>
Acked-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Fixes: 1b2dbbf41a0f ("perf hists: Use own hpp_list for hierarchy mode")
Link: http://lkml.kernel.org/r/20160920053025....@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/ui/browsers/hists.c | 6 ++++++
1 file changed, 6 insertions(+)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 35e44b1879e3..49db16334814 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2067,6 +2067,7 @@ void hist_browser__init(struct hist_browser *browser,
struct hists *hists)
{
struct perf_hpp_fmt *fmt;
+ struct perf_hpp_list_node *node;

browser->hists = hists;
browser->b.refresh = hist_browser__refresh;
@@ -2079,6 +2080,11 @@ void hist_browser__init(struct hist_browser *browser,
perf_hpp__reset_width(fmt, hists);
++browser->b.columns;
}
+ /* hierarchy entries have their own hpp list */
+ list_for_each_entry(node, &hists->hpp_formats, list) {
+ perf_hpp_list__for_each_format(&node->hpp, fmt)
+ perf_hpp__reset_width(fmt, hists);
+ }
}

struct hist_browser *hist_browser__new(struct hists *hists)
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 20, 2016, 4:10:10 PM9/20/16
to
Hi Ingo,

Please consider pulling,

- Arnaldo

The following changes since commit cd34cd97b7b4336aa2c623c37daffab264c7c6ce:

perf/x86/intel/uncore: Add Skylake server uncore support (2016-09-10 11:18:52 +0200)

are available in the git repository at:

git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git tags/perf-core-for-mingo-20160920

for you to fetch changes up to 3c028a0cb5b71f47d523bc8ad2c597cb257f41fb:

perf symbols: Do not open device files (2016-09-20 16:20:21 -0300)

----------------------------------------------------------------
perf/core improvements and fixes:

User visible:

- Support event group view with hierarchy mode in 'perf top' and 'perf report'
(Namhyung Kim)

e.g.:

$ perf record -e '{cycles,instructions}' make
$ perf report --hierarchy --stdio
...
# Overhead Command / Shared Object / Symbol
# ...................... ..................................
...
25.74% 27.18% sh
19.96% 24.14% libc-2.24.so
9.55% 14.64% [.] __strcmp_sse2
1.54% 0.00% [.] __tfind
1.07% 1.13% [.] _int_malloc
0.95% 0.00% [.] __strchr_sse2
0.89% 1.39% [.] __tsearch
0.76% 0.00% [.] strlen

- Fix the dwarf regs table for x86_64, adding a missing % to the "%di"
register, noticed with a failing 'perf test bpf' (Arnaldo Carvalho de Melo)

- Fix handling of mmap parameters in the 'perf trace' beautifier in
architectures that don't have the same mappings as x86_64 (Wang Nan)

- Handle hugetbl mappings in older systems running new kernels (Wang Nan)

- Resolve 'call' operands in 'annotate', that when using /proc/kcore
were appearing just as hexadecimal addresses, to function names
(Arnaldo Carvalho de Melo)

- Fix width computation for srcline sort entry (Jiri Olsa)

- Do not ignore call instruction with indirect target in 'annotate'
(Ravi Bangoria)

- Handle MADV_FREE in the madvise 'trace' beautifier (Wang Nan)

- Fix build of 'perf trace' mman beautifier in !x86_64 (Wang Nan)

Infrastructure:

- Add infrastructure for PMU specific configuration, allowing to pass
config variables directly to the kernel PMU driver, prefixing those
variables with a '@', part of a larger series to support Coresight (Mathieu Poirier)

Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>

Build stats at the end of this message.

----------------------------------------------------------------
Arnaldo Carvalho de Melo (5):
perf probe: Fix dwarf regs table for x86_64
perf trace beauty mmap: Fix defines for non !x86_64
perf tools: Do hugetlb handling in more systems
perf annotate: Pass the symbol's map/dso to the instruction parsers
perf annotate: Resolve 'call' operands to function names

Jiri Olsa (2):
perf hists: Fix width computation for srcline sort entry
perf symbols: Do not open device files

Mathieu Poirier (1):
perf tools: Add infrastructure for PMU specific configuration

Namhyung Kim (9):
perf hists browser: Fix event group display
perf hists: Introduce hists__match_hierarchy()
perf hists: Introduce hists__link_hierarchy()
perf hist: Initialize hierarchy tree explicitly
perf ui/stdio: Always reset output width for hierarchy
perf ui/stdio: Rename print_hierarchy_header()
perf report: Enable group view with hierarchy
perf ui/tui: Reset output width for hierarchy
perf hists: Factor out hists__reset_column_width()

Ravi Bangoria (1):
perf annotate: Do not ignore call instruction with indirect target

Wang Nan (4):
tools include: Add uapi mman.h for each architecture
perf build: Compare mman.h related headers against kernel originals
perf trace beauty mmap: Add missing MADV_FREE
tools include: Add mman macros needed by perf for all arch

tools/arch/alpha/include/uapi/asm/mman.h | 47 ++++++++
tools/arch/arc/include/uapi/asm/mman.h | 6 +
tools/arch/arm/include/uapi/asm/mman.h | 6 +
tools/arch/arm64/include/uapi/asm/mman.h | 6 +
tools/arch/frv/include/uapi/asm/mman.h | 6 +
tools/arch/h8300/include/uapi/asm/mman.h | 6 +
tools/arch/hexagon/include/uapi/asm/mman.h | 6 +
tools/arch/ia64/include/uapi/asm/mman.h | 6 +
tools/arch/m32r/include/uapi/asm/mman.h | 6 +
tools/arch/microblaze/include/uapi/asm/mman.h | 6 +
tools/arch/mips/include/uapi/asm/mman.h | 46 ++++++++
tools/arch/mn10300/include/uapi/asm/mman.h | 6 +
tools/arch/parisc/include/uapi/asm/mman.h | 47 ++++++++
tools/arch/powerpc/include/uapi/asm/mman.h | 15 +++
tools/arch/s390/include/uapi/asm/mman.h | 6 +
tools/arch/score/include/uapi/asm/mman.h | 6 +
tools/arch/sh/include/uapi/asm/mman.h | 6 +
tools/arch/sparc/include/uapi/asm/mman.h | 15 +++
tools/arch/tile/include/uapi/asm/mman.h | 15 +++
tools/arch/x86/include/uapi/asm/mman.h | 5 +
tools/arch/xtensa/include/uapi/asm/mman.h | 47 ++++++++
tools/include/uapi/asm-generic/mman-common.h | 75 ++++++++++++
tools/include/uapi/asm-generic/mman.h | 22 ++++
tools/include/uapi/linux/mman.h | 13 +++
tools/perf/Documentation/perf-record.txt | 12 ++
tools/perf/MANIFEST | 4 +
tools/perf/Makefile.perf | 9 ++
tools/perf/arch/x86/include/dwarf-regs-table.h | 2 +-
tools/perf/builtin-report.c | 1 -
tools/perf/trace/beauty/mmap.c | 72 +-----------
tools/perf/ui/browsers/hists.c | 7 +-
tools/perf/ui/hist.c | 15 +++
tools/perf/ui/stdio/hist.c | 25 +---
tools/perf/util/annotate.c | 37 +++---
tools/perf/util/annotate.h | 2 +-
tools/perf/util/dso.c | 3 +
tools/perf/util/event.c | 7 +-
tools/perf/util/evsel.h | 2 +
tools/perf/util/hist.c | 154 ++++++++++++++++++++++++-
tools/perf/util/hist.h | 1 +
tools/perf/util/map.c | 9 +-
tools/perf/util/parse-events.c | 7 +-
tools/perf/util/parse-events.h | 1 +
tools/perf/util/parse-events.l | 22 ++++
tools/perf/util/parse-events.y | 11 ++
tools/perf/util/sort.h | 1 +
46 files changed, 698 insertions(+), 131 deletions(-)
create mode 100644 tools/arch/alpha/include/uapi/asm/mman.h
create mode 100644 tools/arch/arc/include/uapi/asm/mman.h
create mode 100644 tools/arch/arm/include/uapi/asm/mman.h
create mode 100644 tools/arch/arm64/include/uapi/asm/mman.h
create mode 100644 tools/arch/frv/include/uapi/asm/mman.h
create mode 100644 tools/arch/h8300/include/uapi/asm/mman.h
create mode 100644 tools/arch/hexagon/include/uapi/asm/mman.h
create mode 100644 tools/arch/ia64/include/uapi/asm/mman.h
create mode 100644 tools/arch/m32r/include/uapi/asm/mman.h
create mode 100644 tools/arch/microblaze/include/uapi/asm/mman.h
create mode 100644 tools/arch/mips/include/uapi/asm/mman.h
create mode 100644 tools/arch/mn10300/include/uapi/asm/mman.h
create mode 100644 tools/arch/parisc/include/uapi/asm/mman.h
create mode 100644 tools/arch/powerpc/include/uapi/asm/mman.h
create mode 100644 tools/arch/s390/include/uapi/asm/mman.h
create mode 100644 tools/arch/score/include/uapi/asm/mman.h
create mode 100644 tools/arch/sh/include/uapi/asm/mman.h
create mode 100644 tools/arch/sparc/include/uapi/asm/mman.h
create mode 100644 tools/arch/tile/include/uapi/asm/mman.h
create mode 100644 tools/arch/x86/include/uapi/asm/mman.h
create mode 100644 tools/arch/xtensa/include/uapi/asm/mman.h
create mode 100644 tools/include/uapi/asm-generic/mman-common.h
create mode 100644 tools/include/uapi/asm-generic/mman.h
create mode 100644 tools/include/uapi/linux/mman.h

[root@jouet ~]# perf test
1: vmlinux symtab matches kallsyms : Ok
2: detect openat syscall event : Ok
3: detect openat syscall event on all cpus : Ok
4: read samples using the mmap interface : Ok
5: parse events tests : Ok
6: Validate PERF_RECORD_* events & perf_sample fields : Ok
7: Test perf pmu format parsing : Ok
8: Test dso data read : Ok
9: Test dso data cache : Ok
10: Test dso data reopen : Ok
11: roundtrip evsel->name check : Ok
12: Check parsing of sched tracepoints fields : Ok
13: Generate and check syscalls:sys_enter_openat event fields: Ok
14: struct perf_event_attr setup : Ok
15: Test matching and linking multiple hists : Ok
16: Try 'import perf' in python, checking link problems : Ok
17: Test breakpoint overflow signal handler : Ok
18: Test breakpoint overflow sampling : Ok
19: Test number of exit event of a simple workload : Ok
20: Test software clock events have valid period values : Ok
21: Test object code reading : Ok
22: Test sample parsing : Ok
23: Test using a dummy software event to keep tracking : Ok
24: Test parsing with no sample_id_all bit set : Ok
25: Test filtering hist entries : Ok
26: Test mmap thread lookup : Ok
27: Test thread mg sharing : Ok
28: Test output sorting of hist entries : Ok
29: Test cumulation of child hist entries : Ok
30: Test tracking with sched_switch : Ok
31: Filter fds with revents mask in a fdarray : Ok
32: Add fd to a fdarray, making it autogrow : Ok
33: Test kmod_path__parse function : Ok
34: Test thread map : Ok
35: Test LLVM searching and compiling :
35.1: Basic BPF llvm compiling test : Ok
35.2: Test kbuild searching : Ok
35.3: Compile source for BPF prologue generation test : Ok
35.4: Compile source for BPF relocation test : Ok
36: Test topology in session : Ok
37: Test BPF filter :
37.1: Test basic BPF filtering : Ok
37.2: Test BPF prologue generation : Ok
37.3: Test BPF relocation checker : Ok
38: Test thread map synthesize : Ok
39: Test cpu map synthesize : Ok
40: Test stat config synthesize : Ok
41: Test stat synthesize : Ok
42: Test stat round synthesize : Ok
43: Test attr update synthesize : Ok
44: Test events times : Ok
45: Test backward reading from ring buffer : Ok
46: Test cpu map print : Ok
47: Test SDT event probing : Ok
48: Test is_printable_array function : Ok
49: Test bitmap print : Ok
50: x86 rdpmc test : Ok
51: Test converting perf time to TSC : Ok
52: Test dwarf unwind : Ok
53: Test x86 instruction decoder - new instructions : Ok
54: Test intel cqm nmi context read : Skip
[root@jouet ~]#

Build stats:

# time dm
1 74.534 alpine:3.4: Ok
2 25.636 android-ndk:r12b-arm: Ok
3 78.066 archlinux:latest: Ok
4 41.189 centos:5: Ok
5 64.550 centos:6: Ok
6 74.689 centos:7: Ok
7 68.580 debian:7: Ok
8 75.115 debian:8: Ok
9 75.288 fedora:20: Ok
10 79.294 fedora:21: Ok
11 76.839 fedora:22: Ok
12 76.695 fedora:23: Ok
13 82.058 fedora:24: Ok
14 31.649 fedora:24-x-ARC-uClibc: Ok
15 85.826 fedora:rawhide: Ok
16 83.272 mageia:5: Ok
17 76.883 opensuse:13.2: Ok
18 78.530 opensuse:42.1: Ok
19 85.315 opensuse:tumbleweed: Ok
20 63.436 ubuntu:12.04.5: Ok
21 40.909 ubuntu:14.04: Ok
22 72.689 ubuntu:14.04.4: Ok
23 76.374 ubuntu:15.10: Ok
24 70.309 ubuntu:16.04: Ok
25 59.159 ubuntu:16.04-x-arm: Ok
26 56.011 ubuntu:16.04-x-arm64: Ok
27 56.913 ubuntu:16.04-x-powerpc64: Ok
28 57.442 ubuntu:16.04-x-powerpc64el: Ok
29 80.282 ubuntu:16.10: Ok
30 60.964 ubuntu:16.10-x-arm64: Ok
31 61.390 ubuntu:16.10-x-powerpc: Ok
32 63.167 ubuntu:16.10-x-s390: Ok

real 35m54.027s
user 0m2.855s
sys 0m2.652s

Arnaldo Carvalho de Melo

unread,
Sep 20, 2016, 4:10:11 PM9/20/16
to
From: Jiri Olsa <jo...@redhat.com>

The dso__read_binary_type_filename gets the dso's file name to open. We
need to check it for regular file before trying to open it, otherwise we
might get stuck with device file.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/20160920161245.GA8995@krava
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/dso.c | 3 +++
1 file changed, 3 insertions(+)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 774f6ec884d5..d2c6cdd9d42b 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -363,6 +363,9 @@ static int __open_dso(struct dso *dso, struct machine *machine)
return -EINVAL;
}

+ if (!is_regular_file(name))
+ return -EINVAL;
+
fd = do_open(name);
free(name);
return fd;
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 20, 2016, 4:10:11 PM9/20/16
to
From: Arnaldo Carvalho de Melo <ac...@redhat.com>

So that things like:

→ callq 0xffffffff993e3230

found while disassembling /proc/kcore can be beautified by later
patches, that will resolve that address to a function, looking it up in
/proc/kallsyms.

Cc: Adrian Hunter <adrian...@intel.com>
Cc: Alexander Shishkin <alexander...@linux.intel.com>
Cc: Chris Riyder <chris...@arm.com>
Cc: David Ahern <dsa...@gmail.com>
Cc: Hemant Kumar <hem...@linux.vnet.ibm.com>
Cc: Jiri Olsa <jo...@kernel.org>
Cc: Markus Trippelsdorf <mar...@trippelsdorf.de>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Naveen N. Rao <naveen...@linux.vnet.ibm.com>
Cc: Pawel Moll <pawel...@arm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Ravi Bangoria <ravi.b...@linux.vnet.ibm.com>
Cc: Russell King <rmk+k...@arm.linux.org.uk>
Cc: Taeung Song <treeze...@gmail.com>
Cc: Wang Nan <wang...@huawei.com>
Link: http://lkml.kernel.org/n/tip-p76myuke4j...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/annotate.c | 23 ++++++++++++-----------
tools/perf/util/annotate.h | 2 +-
2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 60e915f392a6..aef841706dff 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -54,7 +54,7 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
return ins__raw_scnprintf(ins, bf, size, ops);
}

-static int call__parse(struct ins_operands *ops)
+static int call__parse(struct ins_operands *ops, struct map *map __maybe_unused)
{
char *endptr, *tok, *name;

@@ -114,7 +114,7 @@ bool ins__is_call(const struct ins *ins)
return ins->ops == &call_ops;
}

-static int jump__parse(struct ins_operands *ops)
+static int jump__parse(struct ins_operands *ops, struct map *map __maybe_unused)
{
const char *s = strchr(ops->raw, '+');

@@ -169,7 +169,7 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
return 0;
}

-static int lock__parse(struct ins_operands *ops)
+static int lock__parse(struct ins_operands *ops, struct map *map)
{
char *name;

@@ -190,7 +190,7 @@ static int lock__parse(struct ins_operands *ops)
return 0;

if (ops->locked.ins->ops->parse &&
- ops->locked.ins->ops->parse(ops->locked.ops) < 0)
+ ops->locked.ins->ops->parse(ops->locked.ops, map) < 0)
goto out_free_ops;

return 0;
@@ -233,7 +233,7 @@ static struct ins_ops lock_ops = {
.scnprintf = lock__scnprintf,
};

-static int mov__parse(struct ins_operands *ops)
+static int mov__parse(struct ins_operands *ops, struct map *map __maybe_unused)
{
char *s = strchr(ops->raw, ','), *target, *comment, prev;

@@ -300,7 +300,7 @@ static struct ins_ops mov_ops = {
.scnprintf = mov__scnprintf,
};

-static int dec__parse(struct ins_operands *ops)
+static int dec__parse(struct ins_operands *ops, struct map *map __maybe_unused)
{
char *target, *comment, *s, prev;

@@ -705,7 +705,7 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip)
return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip);
}

-static void disasm_line__init_ins(struct disasm_line *dl)
+static void disasm_line__init_ins(struct disasm_line *dl, struct map *map)
{
dl->ins = ins__find(dl->name);

@@ -715,7 +715,7 @@ static void disasm_line__init_ins(struct disasm_line *dl)
if (!dl->ins->ops)
return;

- if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops) < 0)
+ if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops, map) < 0)
dl->ins = NULL;
}

@@ -757,7 +757,8 @@ out_free_name:
}

static struct disasm_line *disasm_line__new(s64 offset, char *line,
- size_t privsize, int line_nr)
+ size_t privsize, int line_nr,
+ struct map *map)
{
struct disasm_line *dl = zalloc(sizeof(*dl) + privsize);

@@ -772,7 +773,7 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line,
if (disasm_line__parse(dl->line, &dl->name, &dl->ops.raw) < 0)
goto out_free_line;

- disasm_line__init_ins(dl);
+ disasm_line__init_ins(dl, map);
}
}

@@ -1144,7 +1145,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
parsed_line = tmp2 + 1;
}

- dl = disasm_line__new(offset, parsed_line, privsize, *line_nr);
+ dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, map);
free(line);
(*line_nr)++;

diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index ea44e4ff19c6..5bbcec173b82 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -36,7 +36,7 @@ struct ins_operands {

struct ins_ops {
void (*free)(struct ins_operands *ops);
- int (*parse)(struct ins_operands *ops);
+ int (*parse)(struct ins_operands *ops, struct map *map);
int (*scnprintf)(struct ins *ins, char *bf, size_t size,
struct ins_operands *ops);
};
--
2.7.4

tip-bot for Arnaldo Carvalho de Melo

unread,
Sep 20, 2016, 5:40:05 PM9/20/16
to
Commit-ID: fbef103fad5009827965b10aedbecb1786904f4d
Gitweb: http://git.kernel.org/tip/fbef103fad5009827965b10aedbecb1786904f4d
Author: Arnaldo Carvalho de Melo <ac...@redhat.com>
AuthorDate: Mon, 12 Sep 2016 16:47:57 -0300
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Tue, 13 Sep 2016 15:26:30 -0300

perf tools: Do hugetlb handling in more systems

The csets:

0ac3348e5024 ("perf tools: Recognize hugetlb mapping as anon mapping")
d7e404af115b ("perf record: Mark MAP_HUGETLB when synthesizing mmap events")

Added code conditional on MAP_HUGETLB, to make it build in older systems
where that define wasn't available. Now that we grabbed copies of
uapi/linux/mmap.h to have all those definitions in tools/, use it so
that we can support building the tools for older systems (without the
MAP_HUGETLB define in its libc headers) using new kernels that support
such maps.

Cc: Adrian Hunter <adrian...@intel.com>
Cc: David Ahern <dsa...@gmail.com>
Cc: Jiri Olsa <jo...@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/event.c | 7 ++-----
tools/perf/util/map.c | 9 ++-------
2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 6c30171..2880e22 100644
index d51a125..c662fef 100644

Ingo Molnar

unread,
Sep 20, 2016, 5:40:05 PM9/20/16
to
Pulled, thanks a lot Arnaldo!

Ingo

tip-bot for Arnaldo Carvalho de Melo

unread,
Sep 20, 2016, 5:50:05 PM9/20/16
to
Commit-ID: bff5c3061374c37ed1262131eb333f714e5bcdf8
Gitweb: http://git.kernel.org/tip/bff5c3061374c37ed1262131eb333f714e5bcdf8
Author: Arnaldo Carvalho de Melo <ac...@redhat.com>
AuthorDate: Mon, 19 Sep 2016 17:18:16 -0300
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Tue, 20 Sep 2016 12:28:29 -0300

perf annotate: Pass the symbol's map/dso to the instruction parsers

So that things like:

→ callq 0xffffffff993e3230

found while disassembling /proc/kcore can be beautified by later
patches, that will resolve that address to a function, looking it up in
/proc/kallsyms.

Cc: Adrian Hunter <adrian...@intel.com>
Cc: Alexander Shishkin <alexander...@linux.intel.com>
Cc: Chris Riyder <chris...@arm.com>
Cc: David Ahern <dsa...@gmail.com>
Cc: Hemant Kumar <hem...@linux.vnet.ibm.com>
Cc: Jiri Olsa <jo...@kernel.org>
Cc: Markus Trippelsdorf <mar...@trippelsdorf.de>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Naveen N. Rao <naveen...@linux.vnet.ibm.com>
Cc: Pawel Moll <pawel...@arm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Ravi Bangoria <ravi.b...@linux.vnet.ibm.com>
Cc: Russell King <rmk+k...@arm.linux.org.uk>
Cc: Taeung Song <treeze...@gmail.com>
Cc: Wang Nan <wang...@huawei.com>
Link: http://lkml.kernel.org/n/tip-p76myuke4j...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/annotate.c | 23 ++++++++++++-----------
tools/perf/util/annotate.h | 2 +-
2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 60e915f..aef8417 100644
index ea44e4f..5bbcec1 100644

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 5:20:05 PM9/22/16
to
From: Arnaldo Carvalho de Melo <ac...@redhat.com>

Hi Ingo,

Please consider pulling,

- Arnaldo

The following changes since commit 89f1c2c59c4aef8e26edbc7db5175e6ffb0e9ec7:

Merge tag 'perf-core-for-mingo-20160920' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core (2016-09-20 23:32:02 +0200)

are available in the git repository at:

git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git tags/perf-core-for-mingo-20160922

for you to fetch changes up to 2d831454140f28fa643b78deede4511b9e2c9e5f:

perf hists: Make hists__fprintf_headers function global (2016-09-22 13:08:59 -0300)

----------------------------------------------------------------
perf/core improvements:

New features:

- Add support for interacting with Coresight PMU ETMs/PTMs, that are IP blocks
to perform hardware assisted tracing on a ARM CPU core (Mathieu Poirier)

Infrastructure:

- Histogram prep work for the upcoming c2c tool (Jiri Olsa)

Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>

----------------------------------------------------------------
Jiri Olsa (9):
perf evsel: Remove superfluous initialization of weight
perf hists: Use bigger buffer for stdio headers
perf hists: Add __hist_entry__snprintf function
perf tools: Make reset_dimensions global
perf tools: Make output_field_add and sort_dimension__add global
perf tools: Make several sorting functions global
perf tools: Make several display functions global
perf hists: Make __hist_entry__snprintf function global
perf hists: Make hists__fprintf_headers function global

Mathieu Poirier (6):
perf tools: Confine __get_cpuid() to x86 architecture
perf tools: Make coresight PMU listable
perf tools: Add coresight etm PMU record capabilities
perf pmu: Push configuration down to PMU driver
perf tools: Add PMU configuration to tools
perf tools: Add sink configuration for cs_etm PMU

MAINTAINERS | 5 +
tools/perf/Makefile.config | 11 +-
tools/perf/arch/arm/util/Build | 2 +
tools/perf/arch/arm/util/auxtrace.c | 54 ++++
tools/perf/arch/arm/util/cs-etm.c | 617 ++++++++++++++++++++++++++++++++++++
tools/perf/arch/arm/util/cs-etm.h | 26 ++
tools/perf/arch/arm/util/pmu.c | 36 +++
tools/perf/arch/arm64/util/Build | 4 +
tools/perf/builtin-record.c | 10 +
tools/perf/builtin-stat.c | 9 +
tools/perf/builtin-top.c | 13 +
tools/perf/ui/browsers/hists.c | 2 +-
tools/perf/ui/hist.c | 2 +-
tools/perf/ui/stdio/hist.c | 14 +-
tools/perf/util/Build | 1 +
tools/perf/util/auxtrace.c | 1 +
tools/perf/util/auxtrace.h | 1 +
tools/perf/util/cs-etm.h | 74 +++++
tools/perf/util/drv_configs.c | 77 +++++
tools/perf/util/drv_configs.h | 26 ++
tools/perf/util/evsel.c | 2 -
tools/perf/util/hist.h | 5 +
tools/perf/util/pmu.h | 2 +
tools/perf/util/sort.c | 16 +-
tools/perf/util/sort.h | 11 +
25 files changed, 1001 insertions(+), 20 deletions(-)
create mode 100644 tools/perf/arch/arm/util/auxtrace.c
create mode 100644 tools/perf/arch/arm/util/cs-etm.c
create mode 100644 tools/perf/arch/arm/util/cs-etm.h
create mode 100644 tools/perf/arch/arm/util/pmu.c
create mode 100644 tools/perf/util/cs-etm.h
create mode 100644 tools/perf/util/drv_configs.c
create mode 100644 tools/perf/util/drv_configs.h

[root@zoo ~]# time dm
1 73.911 alpine:3.4: Ok
2 26.890 android-ndk:r12b-arm: Ok
3 77.833 archlinux:latest: Ok
4 40.814 centos:5: Ok
5 64.151 centos:6: Ok
6 75.720 centos:7: Ok
7 68.960 debian:7: Ok
8 75.606 debian:8: Ok
9 75.127 fedora:20: Ok
10 80.186 fedora:21: Ok
11 80.157 fedora:22: Ok
12 83.273 fedora:23: Ok
13 91.566 fedora:24: Ok
14 37.720 fedora:24-x-ARC-uClibc: Ok
15 98.492 fedora:rawhide: Ok
16 100.555 mageia:5: Ok
17 94.140 opensuse:13.2: Ok
18 95.476 opensuse:42.1: Ok
19 106.037 opensuse:tumbleweed: Ok
20 75.951 ubuntu:12.04.5: Ok
21 52.138 ubuntu:14.04: Ok
22 94.814 ubuntu:14.04.4: Ok
23 100.525 ubuntu:15.10: Ok
24 93.813 ubuntu:16.04: Ok
25 85.214 ubuntu:16.04-x-arm: Ok
26 83.487 ubuntu:16.04-x-arm64: Ok
27 82.918 ubuntu:16.04-x-powerpc64: Ok
28 84.189 ubuntu:16.04-x-powerpc64el: Ok
29 93.162 ubuntu:16.10: Ok

real 38m13.568s
user 0m2.379s
sys 0m2.402s
[root@zoo ~]#

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 5:20:06 PM9/22/16
to
From: Jiri Olsa <jo...@kernel.org>

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-10-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/ui/stdio/hist.c | 2 +-
tools/perf/util/hist.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index a2a50ef4176d..89d8441f9890 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -702,7 +702,7 @@ hists__fprintf_standard_headers(struct hists *hists,
return hpp_list->nr_header_lines + 2;
}

-static int hists__fprintf_headers(struct hists *hists, FILE *fp)
+int hists__fprintf_headers(struct hists *hists, FILE *fp)
{
char bf[1024];
struct perf_hpp dummy_hpp = {
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ecc4c0c5ce18..9928fed8bc59 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -489,5 +489,6 @@ int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
struct perf_hpp_list *hpp_list);
+int hists__fprintf_headers(struct hists *hists, FILE *fp);

#endif /* __PERF_HIST_H */
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 5:20:06 PM9/22/16
to
From: Mathieu Poirier <mathieu...@linaro.org>

The __get_cpuid() test is only valid when compiling for x86. When
compiling for other architectures like ARM/ARM64 the test fails event if
the functionality is not required.

This patch isolate the build-in feature check to x86 platform, allowing
the compilation and usage of PMUs that use the AUXTRACE infrastructure
on other architectures (i.e ARM CoreSight).

Signed-off-by: Mathieu Poirier <mathieu...@linaro.org>
Acked-by: Adrian Hunter <adrian...@intel.com>
Acked-by: Jiri Olsa <jo...@kernel.org>
Cc: Alexander Shishkin <alexander...@linux.intel.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linux-ar...@lists.infradead.org
Link: http://lkml.kernel.org/r/1474041004-13956-2-git-s...@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/Makefile.config | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 24803c58049a..72edf83d76b7 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -746,10 +746,13 @@ ifdef LIBBABELTRACE
endif

ifndef NO_AUXTRACE
- ifeq ($(feature-get_cpuid), 0)
- msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
- NO_AUXTRACE := 1
- else
+ ifeq ($(ARCH),x86)
+ ifeq ($(feature-get_cpuid), 0)
+ msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
+ NO_AUXTRACE := 1
+ endif
+ endif
+ ifndef NO_AUXTRACE
$(call detected,CONFIG_AUXTRACE)
CFLAGS += -DHAVE_AUXTRACE_SUPPORT
endif
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 5:20:06 PM9/22/16
to
From: Mathieu Poirier <mathieu...@linaro.org>

Now that the required mechanic is there to deal with PMU specific
configuration, add the functionality to the tools where events can be
selected.

Signed-off-by: Mathieu Poirier <mathieu...@linaro.org>
Acked-by: Jiri Olsa <jo...@kernel.org>
Cc: Alexander Shishkin <alexander...@linux.intel.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linux-ar...@lists.infradead.org
Link: http://lkml.kernel.org/r/1474041004-13956-7-git-s...@linaro.org
[ Fix the build on XSI-compliant systems, using str_error_r() to make sure we return a string, not an integer ]
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>

Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-record.c | 10 ++++++++++
tools/perf/builtin-stat.c | 9 +++++++++
tools/perf/builtin-top.c | 13 +++++++++++++
3 files changed, 32 insertions(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 03251c7f14ec..2d0d69be3bf8 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -22,6 +22,7 @@
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/debug.h"
+#include "util/drv_configs.h"
#include "util/session.h"
#include "util/tool.h"
#include "util/symbol.h"
@@ -383,6 +384,7 @@ static int record__open(struct record *rec)
struct perf_evlist *evlist = rec->evlist;
struct perf_session *session = rec->session;
struct record_opts *opts = &rec->opts;
+ struct perf_evsel_config_term *err_term;
int rc = 0;

perf_evlist__config(evlist, opts, &callchain_param);
@@ -412,6 +414,14 @@ try_again:
goto out;
}

+ if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
+ error("failed to set config \"%s\" on event %s with %d (%s)\n",
+ err_term->val.drv_cfg, perf_evsel__name(pos), errno,
+ str_error_r(errno, msg, sizeof(msg)));
+ rc = -1;
+ goto out;
+ }
+
rc = record__mmap(rec);
if (rc)
goto out;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 90882b1d6a91..688dea7cb08f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -52,6 +52,7 @@
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/debug.h"
+#include "util/drv_configs.h"
#include "util/color.h"
#include "util/stat.h"
#include "util/header.h"
@@ -540,6 +541,7 @@ static int __run_perf_stat(int argc, const char **argv)
int status = 0;
const bool forks = (argc > 0);
bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false;
+ struct perf_evsel_config_term *err_term;

if (interval) {
ts.tv_sec = interval / USEC_PER_MSEC;
@@ -611,6 +613,13 @@ try_again:
return -1;
}

+ if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) {
+ error("failed to set config \"%s\" on event %s with %d (%s)\n",
+ err_term->val.drv_cfg, perf_evsel__name(counter), errno,
+ str_error_r(errno, msg, sizeof(msg)));
+ return -1;
+ }
+
if (STAT_RECORD) {
int err, fd = perf_data_file__fd(&perf_stat.file);

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 400785702566..fe3af9535e85 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -24,6 +24,7 @@
#include "util/annotate.h"
#include "util/config.h"
#include "util/color.h"
+#include "util/drv_configs.h"
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/machine.h"
@@ -913,6 +914,10 @@ static int callchain_param__setup_sample_type(struct callchain_param *callchain)

static int __cmd_top(struct perf_top *top)
{
+ char msg[512];
+ struct perf_evsel *pos;
+ struct perf_evsel_config_term *err_term;
+ struct perf_evlist *evlist = top->evlist;
struct record_opts *opts = &top->record_opts;
pthread_t thread;
int ret;
@@ -947,6 +952,14 @@ static int __cmd_top(struct perf_top *top)
if (ret)
goto out_delete;

+ ret = perf_evlist__apply_drv_configs(evlist, &pos, &err_term);
+ if (ret) {
+ error("failed to set config \"%s\" on event %s with %d (%s)\n",
+ err_term->val.drv_cfg, perf_evsel__name(pos), errno,
+ str_error_r(errno, msg, sizeof(msg)));
+ goto out_delete;
+ }
+
top->session->evlist = top->evlist;
perf_session__set_id_hdr_size(top->session);

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 5:20:06 PM9/22/16
to
From: Jiri Olsa <jo...@kernel.org>

Add __hist_entry__snprintf() to take a perf_hpp_list as an argument
instead of using he->hists->hpp_list.

This way we can display arbitrary list of entries regardless of the
hists setup, which will be useful in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-2-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/ui/stdio/hist.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index c8dca34585ea..189665c315cc 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -373,7 +373,8 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
return 0;
}

-static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+static int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list)
{
const char *sep = symbol_conf.field_sep;
struct perf_hpp_fmt *fmt;
@@ -384,7 +385,7 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
if (symbol_conf.exclude_other && !he->parent)
return 0;

- hists__for_each_format(he->hists, fmt) {
+ perf_hpp_list__for_each_format(hpp_list, fmt) {
if (perf_hpp__should_skip(fmt, he->hists))
continue;

@@ -410,6 +411,11 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
return hpp->buf - start;
}

+static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+{
+ return __hist_entry__snprintf(he, hpp, he->hists->hpp_list);
+}
+
static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
struct perf_hpp *hpp,
struct hists *hists,
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 5:20:07 PM9/22/16
to
From: Mathieu Poirier <mathieu...@linaro.org>

This patch adds a PMU callback and the required mechanic so that drivers
can process the command line configuration elements found in
evsel::config_terms.

Signed-off-by: Mathieu Poirier <mathieu...@linaro.org>
Acked-by: Jiri Olsa <jo...@kernel.org>
Cc: Alexander Shishkin <alexander...@linux.intel.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linux-ar...@lists.infradead.org
Link: http://lkml.kernel.org/r/1474041004-13956-6-git-s...@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/Build | 1 +
tools/perf/util/drv_configs.c | 77 +++++++++++++++++++++++++++++++++++++++++++
tools/perf/util/drv_configs.h | 26 +++++++++++++++
tools/perf/util/pmu.h | 2 ++
4 files changed, 106 insertions(+)
create mode 100644 tools/perf/util/drv_configs.c
create mode 100644 tools/perf/util/drv_configs.h

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 96f99d608d00..eb60e613d795 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -86,6 +86,7 @@ libperf-y += term.o
libperf-y += help-unknown-cmd.o
libperf-y += mem-events.o
libperf-y += vsprintf.o
+libperf-y += drv_configs.o

libperf-$(CONFIG_LIBBPF) += bpf-loader.o
libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
diff --git a/tools/perf/util/drv_configs.c b/tools/perf/util/drv_configs.c
new file mode 100644
index 000000000000..1647f285c629
--- /dev/null
+++ b/tools/perf/util/drv_configs.c
@@ -0,0 +1,77 @@
+/*
+ * drv_configs.h: Interface to apply PMU specific configuration
+ * Copyright (c) 2016-2018, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include "drv_configs.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "pmu.h"
+
+static int
+perf_evsel__apply_drv_configs(struct perf_evsel *evsel,
+ struct perf_evsel_config_term **err_term)
+{
+ bool found = false;
+ int err = 0;
+ struct perf_evsel_config_term *term;
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmu__scan(pmu)) != NULL)
+ if (pmu->type == evsel->attr.type) {
+ found = true;
+ break;
+ }
+
+ list_for_each_entry(term, &evsel->config_terms, list) {
+ if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG)
+ continue;
+
+ /*
+ * We have a configuration term, report an error if we
+ * can't find the PMU or if the PMU driver doesn't support
+ * cmd line driver configuration.
+ */
+ if (!found || !pmu->set_drv_config) {
+ err = -EINVAL;
+ *err_term = term;
+ break;
+ }
+
+ err = pmu->set_drv_config(term);
+ if (err) {
+ *err_term = term;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
+ struct perf_evsel **err_evsel,
+ struct perf_evsel_config_term **err_term)
+{
+ struct perf_evsel *evsel;
+ int err = 0;
+
+ evlist__for_each_entry(evlist, evsel) {
+ err = perf_evsel__apply_drv_configs(evsel, err_term);
+ if (err) {
+ *err_evsel = evsel;
+ break;
+ }
+ }
+
+ return err;
+}
diff --git a/tools/perf/util/drv_configs.h b/tools/perf/util/drv_configs.h
new file mode 100644
index 000000000000..32bc9babc2e0
--- /dev/null
+++ b/tools/perf/util/drv_configs.h
@@ -0,0 +1,26 @@
+/*
+ * drv_configs.h: Interface to apply PMU specific configuration
+ * Copyright (c) 2016-2018, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_DRV_CONFIGS_H
+#define __PERF_DRV_CONFIGS_H
+
+#include "drv_configs.h"
+#include "evlist.h"
+#include "evsel.h"
+
+int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
+ struct perf_evsel **err_evsel,
+ struct perf_evsel_config_term **term);
+#endif
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 5d7e84466bee..743422ad900b 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -4,6 +4,7 @@
#include <linux/bitmap.h>
#include <linux/perf_event.h>
#include <stdbool.h>
+#include "evsel.h"
#include "parse-events.h"

enum {
@@ -25,6 +26,7 @@ struct perf_pmu {
struct list_head format; /* HEAD struct perf_pmu_format -> list */
struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
struct list_head list; /* ELEM */
+ int (*set_drv_config) (struct perf_evsel_config_term *term);
};

struct perf_pmu_info {
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 5:20:07 PM9/22/16
to
From: Jiri Olsa <jo...@kernel.org>

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-7-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/sort.c | 6 +++---
tools/perf/util/sort.h | 6 ++++++
2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 9f7c1ea9e3ad..452e15a10dd2 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -867,7 +867,7 @@ struct sort_entry sort_cycles = {
};

/* --sort daddr_sym */
-static int64_t
+int64_t
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
{
uint64_t l = 0, r = 0;
@@ -896,7 +896,7 @@ static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf,
width);
}

-static int64_t
+int64_t
sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
{
uint64_t l = 0, r = 0;
@@ -1062,7 +1062,7 @@ static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
return repsep_snprintf(bf, size, "%-*s", width, out);
}

-static int64_t
+int64_t
sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
{
u64 l, r;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index e93b0fa43704..099c97557d33 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -274,4 +274,10 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
struct perf_evlist *evlist,
int level);
int output_field_add(struct perf_hpp_list *list, char *tok);
+int64_t
+sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
#endif /* __PERF_SORT_H */
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 5:20:07 PM9/22/16
to
From: Jiri Olsa <jo...@kernel.org>

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-9-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/ui/stdio/hist.c | 4 ++--
tools/perf/util/hist.h | 2 ++
2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 189665c315cc..a2a50ef4176d 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -373,8 +373,8 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
return 0;
}

-static int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
- struct perf_hpp_list *hpp_list)
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list)
{
const char *sep = symbol_conf.field_sep;
struct perf_hpp_fmt *fmt;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 6150b94e0d23..ecc4c0c5ce18 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -487,5 +487,7 @@ static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list);

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 5:20:08 PM9/22/16
to
From: Jiri Olsa <jo...@kernel.org>

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-8-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/ui/browsers/hists.c | 2 +-
tools/perf/ui/hist.c | 2 +-
tools/perf/util/hist.h | 2 ++
3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index a6d5d248b8fb..fb8e42c7507a 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1080,7 +1080,7 @@ struct hpp_arg {
bool current_entry;
};

-static int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...)
+int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...)
{
struct hpp_arg *arg = hpp->ptr;
int ret, len;
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 60c4a4d08374..37388397b5bc 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -237,7 +237,7 @@ static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
}

-static int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
+int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
{
va_list args;
ssize_t ssize = hpp->size;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index defa957f27df..6150b94e0d23 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -485,5 +485,7 @@ static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
#define HIERARCHY_INDENT 3

bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
+int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 5:20:11 PM9/22/16
to
From: Mathieu Poirier <mathieu...@linaro.org>

Adding the required mechanic allowing 'perf list pmu' to discover
coresight ETM/PTM tracers.

Signed-off-by: Mathieu Poirier <mathieu...@linaro.org>
Acked-by: Adrian Hunter <adrian...@intel.com>
Acked-by: Jiri Olsa <jo...@kernel.org>
Cc: Alexander Shishkin <alexander...@linux.intel.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linux-ar...@lists.infradead.org
Link: http://lkml.kernel.org/r/1474041004-13956-3-git-s...@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
MAINTAINERS | 1 +
tools/perf/arch/arm/util/Build | 2 ++
tools/perf/arch/arm/util/pmu.c | 34 ++++++++++++++++++++++++++++++++++
tools/perf/arch/arm64/util/Build | 2 ++
4 files changed, 39 insertions(+)
create mode 100644 tools/perf/arch/arm/util/pmu.c

diff --git a/MAINTAINERS b/MAINTAINERS
index db814a89599c..7407fe779053 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1123,6 +1123,7 @@ F: drivers/hwtracing/coresight/*
F: Documentation/trace/coresight.txt
F: Documentation/devicetree/bindings/arm/coresight.txt
F: Documentation/ABI/testing/sysfs-bus-coresight-devices-*
+F: tools/perf/arch/arm/util/pmu.c

ARM/CORGI MACHINE SUPPORT
M: Richard Purdie <rpu...@rpsys.net>
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build
index f98da17357c0..4093fd146f46 100644
--- a/tools/perf/arch/arm/util/Build
+++ b/tools/perf/arch/arm/util/Build
@@ -2,3 +2,5 @@ libperf-$(CONFIG_DWARF) += dwarf-regs.o

libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+
+libperf-$(CONFIG_AUXTRACE) += pmu.o
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
new file mode 100644
index 000000000000..af9fb666b44f
--- /dev/null
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu...@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+#include <linux/coresight-pmu.h>
+#include <linux/perf_event.h>
+
+#include "../../util/pmu.h"
+
+struct perf_event_attr
+*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+{
+#ifdef HAVE_AUXTRACE_SUPPORT
+ if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
+ /* add ETM default config here */
+ pmu->selectable = true;
+ }
+#endif
+ return NULL;
+}
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 02f41dba4f4f..3876dd05bb8b 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,2 +1,4 @@
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
+
+libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 5:20:11 PM9/22/16
to
From: Jiri Olsa <jo...@kernel.org>

Removing superfluous initialization of weight, it's already set to 0 via
memset.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474290610-23241-3-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/evsel.c | 2 --
1 file changed, 2 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 21fd573106ed..f3225a2e6eee 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1728,7 +1728,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
data->cpu = data->pid = data->tid = -1;
data->stream_id = data->id = data->time = -1ULL;
data->period = evsel->attr.sample_period;
- data->weight = 0;
data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;

if (event->header.type != PERF_RECORD_SAMPLE) {
@@ -1935,7 +1934,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
}
}

- data->weight = 0;
if (type & PERF_SAMPLE_WEIGHT) {
OVERFLOW_CHECK_u64(array);
data->weight = *array;
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 22, 2016, 5:20:12 PM9/22/16
to
From: Jiri Olsa <jo...@kernel.org>

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-6-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/sort.c | 8 ++++----
tools/perf/util/sort.h | 4 ++++
2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 9e1f6f75a50f..9f7c1ea9e3ad 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2308,9 +2308,9 @@ int hpp_dimension__add_output(unsigned col)
return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
}

-static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
- struct perf_evlist *evlist,
- int level)
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+ struct perf_evlist *evlist,
+ int level)
{
unsigned int i;

@@ -2685,7 +2685,7 @@ void sort__setup_elide(FILE *output)
}
}

-static int output_field_add(struct perf_hpp_list *list, char *tok)
+int output_field_add(struct perf_hpp_list *list, char *tok)
{
unsigned int i;

diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 4efadc1e98c5..e93b0fa43704 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -270,4 +270,8 @@ bool is_strict_order(const char *order);

int hpp_dimension__add_output(unsigned col);
void reset_dimensions(void);
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+ struct perf_evlist *evlist,
+ int level);
+int output_field_add(struct perf_hpp_list *list, char *tok);

Ingo Molnar

unread,
Sep 23, 2016, 1:30:10 AM9/23/16
to

* Arnaldo Carvalho de Melo <ac...@kernel.org> wrote:

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:40:07 AM9/29/16
to
From: Adrian Hunter <adrian...@intel.com>

Change '/sys/bus/event_sources' to the correct path which is
'/sys/bus/event_source'.

Signed-off-by: Adrian Hunter <adrian...@intel.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Mathieu Poirier <mathieu...@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-2-git-...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/Documentation/perf-record.txt | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 1a24f4d64328..babbb63e6d9d 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -35,15 +35,15 @@ OPTIONS

- a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where
'param1', 'param2', etc are defined as formats for the PMU in
- /sys/bus/event_sources/devices/<pmu>/format/*.
+ /sys/bus/event_source/devices/<pmu>/format/*.

- a symbolically formed event like 'pmu/config=M,config1=N,config3=K/'

where M, N, K are numbers (in decimal, hex, octal format). Acceptable
values for each of 'config', 'config1' and 'config2' are defined by
- corresponding entries in /sys/bus/event_sources/devices/<pmu>/format/*
+ corresponding entries in /sys/bus/event_source/devices/<pmu>/format/*
param1 and param2 are defined as formats for the PMU in:
- /sys/bus/event_sources/devices/<pmu>/format/*
+ /sys/bus/event_source/devices/<pmu>/format/*

There are also some params which are not defined in .../<pmu>/format/*.
These params can be used to overload default config values per event.
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:40:07 AM9/29/16
to
From: Adrian Hunter <adrian...@intel.com>

Symbols come from either the DSO or /proc/kallsyms for the kernel.
Details of the functionality can be found in Documentation/perf-record.txt.

Signed-off-by: Adrian Hunter <adrian...@intel.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Mathieu Poirier <mathieu...@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-8-git-...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/Documentation/perf-record.txt | 55 ++-
tools/perf/builtin-record.c | 14 +-
tools/perf/util/auxtrace.c | 737 +++++++++++++++++++++++++++++++
tools/perf/util/auxtrace.h | 54 +++
4 files changed, 857 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index babbb63e6d9d..92335193dc33 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -89,9 +89,62 @@ OPTIONS

--filter=<filter>::
Event filter. This option should follow a event selector (-e) which
- selects tracepoint event(s). Multiple '--filter' options are combined
+ selects either tracepoint event(s) or a hardware trace PMU
+ (e.g. Intel PT or CoreSight).
+
+ - tracepoint filters
+
+ In the case of tracepoints, multiple '--filter' options are combined
using '&&'.

+ - address filters
+
+ A hardware trace PMU advertises its ability to accept a number of
+ address filters by specifying a non-zero value in
+ /sys/bus/event_source/devices/<pmu>/nr_addr_filters.
+
+ Address filters have the format:
+
+ filter|start|stop|tracestop <start> [/ <size>] [@<file name>]
+
+ Where:
+ - 'filter': defines a region that will be traced.
+ - 'start': defines an address at which tracing will begin.
+ - 'stop': defines an address at which tracing will stop.
+ - 'tracestop': defines a region in which tracing will stop.
+
+ <file name> is the name of the object file, <start> is the offset to the
+ code to trace in that file, and <size> is the size of the region to
+ trace. 'start' and 'stop' filters need not specify a <size>.
+
+ If no object file is specified then the kernel is assumed, in which case
+ the start address must be a current kernel memory address.
+
+ <start> can also be specified by providing the name of a symbol. If the
+ symbol name is not unique, it can be disambiguated by inserting #n where
+ 'n' selects the n'th symbol in address order. Alternately #0, #g or #G
+ select only a global symbol. <size> can also be specified by providing
+ the name of a symbol, in which case the size is calculated to the end
+ of that symbol. For 'filter' and 'tracestop' filters, if <size> is
+ omitted and <start> is a symbol, then the size is calculated to the end
+ of that symbol.
+
+ If <size> is omitted and <start> is '*', then the start and size will
+ be calculated from the first and last symbols, i.e. to trace the whole
+ file.
+
+ If symbol names (or '*') are provided, they must be surrounded by white
+ space.
+
+ The filter passed to the kernel is not necessarily the same as entered.
+ To see the filter that is passed, use the -v option.
+
+ The kernel may not be able to configure a trace region if it is not
+ within a single mapping. MMAP events (or /proc/<pid>/maps) can be
+ examined to determine if that is a possibility.
+
+ Multiple filters can be separated with space or comma.
+
--exclude-perf::
Don't record events issued by perf itself. This option should follow
a event selector (-e) which selects tracepoint event(s). It adds a
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 962adcfc43a5..67d2a9003294 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1581,6 +1581,18 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
if (err)
goto out;

+ /*
+ * Allow aliases to facilitate the lookup of symbols for address
+ * filters. Refer to auxtrace_parse_filters().
+ */
+ symbol_conf.allow_aliases = true;
+
+ symbol__init(NULL);
+
+ err = auxtrace_parse_filters(rec->evlist);
+ if (err)
+ goto out;
+
if (dry_run)
goto out;

@@ -1594,8 +1606,6 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)

err = -ENOMEM;

- symbol__init(NULL);
-
if (symbol_conf.kptr_restrict)
pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index c0aba8e839aa..c5a6e0b12452 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -16,6 +16,10 @@
#include <sys/types.h>
#include <sys/mman.h>
#include <stdbool.h>
+#include <ctype.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>

#include <linux/kernel.h>
#include <linux/perf_event.h>
@@ -35,9 +39,14 @@
#include "../perf.h"
#include "util.h"
#include "evlist.h"
+#include "dso.h"
+#include "map.h"
+#include "pmu.h"
+#include "evsel.h"
#include "cpumap.h"
#include "thread_map.h"
#include "asm/bug.h"
+#include "symbol/kallsyms.h"
#include "auxtrace.h"

#include <linux/hash.h>
@@ -1399,3 +1408,731 @@ void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key)

return NULL;
}
+
+static void addr_filter__free_str(struct addr_filter *filt)
+{
+ free(filt->str);
+ filt->action = NULL;
+ filt->sym_from = NULL;
+ filt->sym_to = NULL;
+ filt->filename = NULL;
+ filt->str = NULL;
+}
+
+static struct addr_filter *addr_filter__new(void)
+{
+ struct addr_filter *filt = zalloc(sizeof(*filt));
+
+ if (filt)
+ INIT_LIST_HEAD(&filt->list);
+
+ return filt;
+}
+
+static void addr_filter__free(struct addr_filter *filt)
+{
+ if (filt)
+ addr_filter__free_str(filt);
+ free(filt);
+}
+
+static void addr_filters__add(struct addr_filters *filts,
+ struct addr_filter *filt)
+{
+ list_add_tail(&filt->list, &filts->head);
+ filts->cnt += 1;
+}
+
+static void addr_filters__del(struct addr_filters *filts,
+ struct addr_filter *filt)
+{
+ list_del_init(&filt->list);
+ filts->cnt -= 1;
+}
+
+void addr_filters__init(struct addr_filters *filts)
+{
+ INIT_LIST_HEAD(&filts->head);
+ filts->cnt = 0;
+}
+
+void addr_filters__exit(struct addr_filters *filts)
+{
+ struct addr_filter *filt, *n;
+
+ list_for_each_entry_safe(filt, n, &filts->head, list) {
+ addr_filters__del(filts, filt);
+ addr_filter__free(filt);
+ }
+}
+
+static int parse_num_or_str(char **inp, u64 *num, const char **str,
+ const char *str_delim)
+{
+ *inp += strspn(*inp, " ");
+
+ if (isdigit(**inp)) {
+ char *endptr;
+
+ if (!num)
+ return -EINVAL;
+ errno = 0;
+ *num = strtoull(*inp, &endptr, 0);
+ if (errno)
+ return -errno;
+ if (endptr == *inp)
+ return -EINVAL;
+ *inp = endptr;
+ } else {
+ size_t n;
+
+ if (!str)
+ return -EINVAL;
+ *inp += strspn(*inp, " ");
+ *str = *inp;
+ n = strcspn(*inp, str_delim);
+ if (!n)
+ return -EINVAL;
+ *inp += n;
+ if (**inp) {
+ **inp = '\0';
+ *inp += 1;
+ }
+ }
+ return 0;
+}
+
+static int parse_action(struct addr_filter *filt)
+{
+ if (!strcmp(filt->action, "filter")) {
+ filt->start = true;
+ filt->range = true;
+ } else if (!strcmp(filt->action, "start")) {
+ filt->start = true;
+ } else if (!strcmp(filt->action, "stop")) {
+ filt->start = false;
+ } else if (!strcmp(filt->action, "tracestop")) {
+ filt->start = false;
+ filt->range = true;
+ filt->action += 5; /* Change 'tracestop' to 'stop' */
+ } else {
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int parse_sym_idx(char **inp, int *idx)
+{
+ *idx = -1;
+
+ *inp += strspn(*inp, " ");
+
+ if (**inp != '#')
+ return 0;
+
+ *inp += 1;
+
+ if (**inp == 'g' || **inp == 'G') {
+ *inp += 1;
+ *idx = 0;
+ } else {
+ unsigned long num;
+ char *endptr;
+
+ errno = 0;
+ num = strtoul(*inp, &endptr, 0);
+ if (errno)
+ return -errno;
+ if (endptr == *inp || num > INT_MAX)
+ return -EINVAL;
+ *inp = endptr;
+ *idx = num;
+ }
+
+ return 0;
+}
+
+static int parse_addr_size(char **inp, u64 *num, const char **str, int *idx)
+{
+ int err = parse_num_or_str(inp, num, str, " ");
+
+ if (!err && *str)
+ err = parse_sym_idx(inp, idx);
+
+ return err;
+}
+
+static int parse_one_filter(struct addr_filter *filt, const char **filter_inp)
+{
+ char *fstr;
+ int err;
+
+ filt->str = fstr = strdup(*filter_inp);
+ if (!fstr)
+ return -ENOMEM;
+
+ err = parse_num_or_str(&fstr, NULL, &filt->action, " ");
+ if (err)
+ goto out_err;
+
+ err = parse_action(filt);
+ if (err)
+ goto out_err;
+
+ err = parse_addr_size(&fstr, &filt->addr, &filt->sym_from,
+ &filt->sym_from_idx);
+ if (err)
+ goto out_err;
+
+ fstr += strspn(fstr, " ");
+
+ if (*fstr == '/') {
+ fstr += 1;
+ err = parse_addr_size(&fstr, &filt->size, &filt->sym_to,
+ &filt->sym_to_idx);
+ if (err)
+ goto out_err;
+ filt->range = true;
+ }
+
+ fstr += strspn(fstr, " ");
+
+ if (*fstr == '@') {
+ fstr += 1;
+ err = parse_num_or_str(&fstr, NULL, &filt->filename, " ,");
+ if (err)
+ goto out_err;
+ }
+
+ fstr += strspn(fstr, " ,");
+
+ *filter_inp += fstr - filt->str;
+
+ return 0;
+
+out_err:
+ addr_filter__free_str(filt);
+
+ return err;
+}
+
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+ const char *filter)
+{
+ struct addr_filter *filt;
+ const char *fstr = filter;
+ int err;
+
+ while (*fstr) {
+ filt = addr_filter__new();
+ err = parse_one_filter(filt, &fstr);
+ if (err) {
+ addr_filter__free(filt);
+ addr_filters__exit(filts);
+ return err;
+ }
+ addr_filters__add(filts, filt);
+ }
+
+ return 0;
+}
+
+struct sym_args {
+ const char *name;
+ u64 start;
+ u64 size;
+ int idx;
+ int cnt;
+ bool started;
+ bool global;
+ bool selected;
+ bool duplicate;
+ bool near;
+};
+
+static bool kern_sym_match(struct sym_args *args, const char *name, char type)
+{
+ /* A function with the same name, and global or the n'th found or any */
+ return symbol_type__is_a(type, MAP__FUNCTION) &&
+ !strcmp(name, args->name) &&
+ ((args->global && isupper(type)) ||
+ (args->selected && ++(args->cnt) == args->idx) ||
+ (!args->global && !args->selected));
+}
+
+static int find_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+ struct sym_args *args = arg;
+
+ if (args->started) {
+ if (!args->size)
+ args->size = start - args->start;
+ if (args->selected) {
+ if (args->size)
+ return 1;
+ } else if (kern_sym_match(args, name, type)) {
+ args->duplicate = true;
+ return 1;
+ }
+ } else if (kern_sym_match(args, name, type)) {
+ args->started = true;
+ args->start = start;
+ }
+
+ return 0;
+}
+
+static int print_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+ struct sym_args *args = arg;
+
+ if (kern_sym_match(args, name, type)) {
+ pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+ ++args->cnt, start, type, name);
+ args->near = true;
+ } else if (args->near) {
+ args->near = false;
+ pr_err("\t\twhich is near\t\t%s\n", name);
+ }
+
+ return 0;
+}
+
+static int sym_not_found_error(const char *sym_name, int idx)
+{
+ if (idx > 0) {
+ pr_err("N'th occurrence (N=%d) of symbol '%s' not found.\n",
+ idx, sym_name);
+ } else if (!idx) {
+ pr_err("Global symbol '%s' not found.\n", sym_name);
+ } else {
+ pr_err("Symbol '%s' not found.\n", sym_name);
+ }
+ pr_err("Note that symbols must be functions.\n");
+
+ return -EINVAL;
+}
+
+static int find_kern_sym(const char *sym_name, u64 *start, u64 *size, int idx)
+{
+ struct sym_args args = {
+ .name = sym_name,
+ .idx = idx,
+ .global = !idx,
+ .selected = idx > 0,
+ };
+ int err;
+
+ *start = 0;
+ *size = 0;
+
+ err = kallsyms__parse("/proc/kallsyms", &args, find_kern_sym_cb);
+ if (err < 0) {
+ pr_err("Failed to parse /proc/kallsyms\n");
+ return err;
+ }
+
+ if (args.duplicate) {
+ pr_err("Multiple kernel symbols with name '%s'\n", sym_name);
+ args.cnt = 0;
+ kallsyms__parse("/proc/kallsyms", &args, print_kern_sym_cb);
+ pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+ sym_name);
+ pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+ return -EINVAL;
+ }
+
+ if (!args.started) {
+ pr_err("Kernel symbol lookup: ");
+ return sym_not_found_error(sym_name, idx);
+ }
+
+ *start = args.start;
+ *size = args.size;
+
+ return 0;
+}
+
+static int find_entire_kern_cb(void *arg, const char *name __maybe_unused,
+ char type, u64 start)
+{
+ struct sym_args *args = arg;
+
+ if (!symbol_type__is_a(type, MAP__FUNCTION))
+ return 0;
+
+ if (!args->started) {
+ args->started = true;
+ args->start = start;
+ }
+ /* Don't know exactly where the kernel ends, so we add a page */
+ args->size = round_up(start, page_size) + page_size - args->start;
+
+ return 0;
+}
+
+static int addr_filter__entire_kernel(struct addr_filter *filt)
+{
+ struct sym_args args = { .started = false };
+ int err;
+
+ err = kallsyms__parse("/proc/kallsyms", &args, find_entire_kern_cb);
+ if (err < 0 || !args.started) {
+ pr_err("Failed to parse /proc/kallsyms\n");
+ return err;
+ }
+
+ filt->addr = args.start;
+ filt->size = args.size;
+
+ return 0;
+}
+
+static int check_end_after_start(struct addr_filter *filt, u64 start, u64 size)
+{
+ if (start + size >= filt->addr)
+ return 0;
+
+ if (filt->sym_from) {
+ pr_err("Symbol '%s' (0x%"PRIx64") comes before '%s' (0x%"PRIx64")\n",
+ filt->sym_to, start, filt->sym_from, filt->addr);
+ } else {
+ pr_err("Symbol '%s' (0x%"PRIx64") comes before address 0x%"PRIx64")\n",
+ filt->sym_to, start, filt->addr);
+ }
+
+ return -EINVAL;
+}
+
+static int addr_filter__resolve_kernel_syms(struct addr_filter *filt)
+{
+ bool no_size = false;
+ u64 start, size;
+ int err;
+
+ if (symbol_conf.kptr_restrict) {
+ pr_err("Kernel addresses are restricted. Unable to resolve kernel symbols.\n");
+ return -EINVAL;
+ }
+
+ if (filt->sym_from && !strcmp(filt->sym_from, "*"))
+ return addr_filter__entire_kernel(filt);
+
+ if (filt->sym_from) {
+ err = find_kern_sym(filt->sym_from, &start, &size,
+ filt->sym_from_idx);
+ if (err)
+ return err;
+ filt->addr = start;
+ if (filt->range && !filt->size && !filt->sym_to) {
+ filt->size = size;
+ no_size = !!size;
+ }
+ }
+
+ if (filt->sym_to) {
+ err = find_kern_sym(filt->sym_to, &start, &size,
+ filt->sym_to_idx);
+ if (err)
+ return err;
+
+ err = check_end_after_start(filt, start, size);
+ if (err)
+ return err;
+ filt->size = start + size - filt->addr;
+ no_size = !!size;
+ }
+
+ /* The very last symbol in kallsyms does not imply a particular size */
+ if (no_size) {
+ pr_err("Cannot determine size of symbol '%s'\n",
+ filt->sym_to ? filt->sym_to : filt->sym_from);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct dso *load_dso(const char *name)
+{
+ struct map *map;
+ struct dso *dso;
+
+ map = dso__new_map(name);
+ if (!map)
+ return NULL;
+
+ map__load(map);
+
+ dso = dso__get(map->dso);
+
+ map__put(map);
+
+ return dso;
+}
+
+static bool dso_sym_match(struct symbol *sym, const char *name, int *cnt,
+ int idx)
+{
+ /* Same name, and global or the n'th found or any */
+ return !arch__compare_symbol_names(name, sym->name) &&
+ ((!idx && sym->binding == STB_GLOBAL) ||
+ (idx > 0 && ++*cnt == idx) ||
+ idx < 0);
+}
+
+static void print_duplicate_syms(struct dso *dso, const char *sym_name)
+{
+ struct symbol *sym;
+ bool near = false;
+ int cnt = 0;
+
+ pr_err("Multiple symbols with name '%s'\n", sym_name);
+
+ sym = dso__first_symbol(dso, MAP__FUNCTION);
+ while (sym) {
+ if (dso_sym_match(sym, sym_name, &cnt, -1)) {
+ pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+ ++cnt, sym->start,
+ sym->binding == STB_GLOBAL ? 'g' :
+ sym->binding == STB_LOCAL ? 'l' : 'w',
+ sym->name);
+ near = true;
+ } else if (near) {
+ near = false;
+ pr_err("\t\twhich is near\t\t%s\n", sym->name);
+ }
+ sym = dso__next_symbol(sym);
+ }
+
+ pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+ sym_name);
+ pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+}
+
+static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
+ u64 *size, int idx)
+{
+ struct symbol *sym;
+ int cnt = 0;
+
+ *start = 0;
+ *size = 0;
+
+ sym = dso__first_symbol(dso, MAP__FUNCTION);
+ while (sym) {
+ if (*start) {
+ if (!*size)
+ *size = sym->start - *start;
+ if (idx > 0) {
+ if (*size)
+ return 1;
+ } else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+ print_duplicate_syms(dso, sym_name);
+ return -EINVAL;
+ }
+ } else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+ *start = sym->start;
+ *size = sym->end - sym->start;
+ }
+ sym = dso__next_symbol(sym);
+ }
+
+ if (!*start)
+ return sym_not_found_error(sym_name, idx);
+
+ return 0;
+}
+
+static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso)
+{
+ struct symbol *first_sym = dso__first_symbol(dso, MAP__FUNCTION);
+ struct symbol *last_sym = dso__last_symbol(dso, MAP__FUNCTION);
+
+ if (!first_sym || !last_sym) {
+ pr_err("Failed to determine filter for %s\nNo symbols found.\n",
+ filt->filename);
+ return -EINVAL;
+ }
+
+ filt->addr = first_sym->start;
+ filt->size = last_sym->end - first_sym->start;
+
+ return 0;
+}
+
+static int addr_filter__resolve_syms(struct addr_filter *filt)
+{
+ u64 start, size;
+ struct dso *dso;
+ int err = 0;
+
+ if (!filt->sym_from && !filt->sym_to)
+ return 0;
+
+ if (!filt->filename)
+ return addr_filter__resolve_kernel_syms(filt);
+
+ dso = load_dso(filt->filename);
+ if (!dso) {
+ pr_err("Failed to load symbols from: %s\n", filt->filename);
+ return -EINVAL;
+ }
+
+ if (filt->sym_from && !strcmp(filt->sym_from, "*")) {
+ err = addr_filter__entire_dso(filt, dso);
+ goto put_dso;
+ }
+
+ if (filt->sym_from) {
+ err = find_dso_sym(dso, filt->sym_from, &start, &size,
+ filt->sym_from_idx);
+ if (err)
+ goto put_dso;
+ filt->addr = start;
+ if (filt->range && !filt->size && !filt->sym_to)
+ filt->size = size;
+ }
+
+ if (filt->sym_to) {
+ err = find_dso_sym(dso, filt->sym_to, &start, &size,
+ filt->sym_to_idx);
+ if (err)
+ goto put_dso;
+
+ err = check_end_after_start(filt, start, size);
+ if (err)
+ return err;
+
+ filt->size = start + size - filt->addr;
+ }
+
+put_dso:
+ dso__put(dso);
+
+ return err;
+}
+
+static char *addr_filter__to_str(struct addr_filter *filt)
+{
+ char filename_buf[PATH_MAX];
+ const char *at = "";
+ const char *fn = "";
+ char *filter;
+ int err;
+
+ if (filt->filename) {
+ at = "@";
+ fn = realpath(filt->filename, filename_buf);
+ if (!fn)
+ return NULL;
+ }
+
+ if (filt->range) {
+ err = asprintf(&filter, "%s 0x%"PRIx64"/0x%"PRIx64"%s%s",
+ filt->action, filt->addr, filt->size, at, fn);
+ } else {
+ err = asprintf(&filter, "%s 0x%"PRIx64"%s%s",
+ filt->action, filt->addr, at, fn);
+ }
+
+ return err < 0 ? NULL : filter;
+}
+
+static int parse_addr_filter(struct perf_evsel *evsel, const char *filter,
+ int max_nr)
+{
+ struct addr_filters filts;
+ struct addr_filter *filt;
+ int err;
+
+ addr_filters__init(&filts);
+
+ err = addr_filters__parse_bare_filter(&filts, filter);
+ if (err)
+ goto out_exit;
+
+ if (filts.cnt > max_nr) {
+ pr_err("Error: number of address filters (%d) exceeds maximum (%d)\n",
+ filts.cnt, max_nr);
+ err = -EINVAL;
+ goto out_exit;
+ }
+
+ list_for_each_entry(filt, &filts.head, list) {
+ char *new_filter;
+
+ err = addr_filter__resolve_syms(filt);
+ if (err)
+ goto out_exit;
+
+ new_filter = addr_filter__to_str(filt);
+ if (!new_filter) {
+ err = -ENOMEM;
+ goto out_exit;
+ }
+
+ if (perf_evsel__append_addr_filter(evsel, new_filter)) {
+ err = -ENOMEM;
+ goto out_exit;
+ }
+ }
+
+out_exit:
+ addr_filters__exit(&filts);
+
+ if (err) {
+ pr_err("Failed to parse address filter: '%s'\n", filter);
+ pr_err("Filter format is: filter|start|stop|tracestop <start symbol or address> [/ <end symbol or size>] [@<file name>]\n");
+ pr_err("Where multiple filters are separated by space or comma.\n");
+ }
+
+ return err;
+}
+
+static struct perf_pmu *perf_evsel__find_pmu(struct perf_evsel *evsel)
+{
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ if (pmu->type == evsel->attr.type)
+ break;
+ }
+
+ return pmu;
+}
+
+static int perf_evsel__nr_addr_filter(struct perf_evsel *evsel)
+{
+ struct perf_pmu *pmu = perf_evsel__find_pmu(evsel);
+ int nr_addr_filters = 0;
+
+ if (!pmu)
+ return 0;
+
+ perf_pmu__scan_file(pmu, "nr_addr_filters", "%d", &nr_addr_filters);
+
+ return nr_addr_filters;
+}
+
+int auxtrace_parse_filters(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ char *filter;
+ int err, max_nr;
+
+ evlist__for_each_entry(evlist, evsel) {
+ filter = evsel->filter;
+ max_nr = perf_evsel__nr_addr_filter(evsel);
+ if (!filter || !max_nr)
+ continue;
+ evsel->filter = NULL;
+ err = parse_addr_filter(evsel, filter, max_nr);
+ free(filter);
+ if (err)
+ return err;
+ pr_debug("Address filter: %s\n", evsel->filter);
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 09286f193532..26fb1ee5746a 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -318,6 +318,48 @@ struct auxtrace_record {
unsigned int alignment;
};

+/**
+ * struct addr_filter - address filter.
+ * @list: list node
+ * @range: true if it is a range filter
+ * @start: true if action is 'filter' or 'start'
+ * @action: 'filter', 'start' or 'stop' ('tracestop' is accepted but converted
+ * to 'stop')
+ * @sym_from: symbol name for the filter address
+ * @sym_to: symbol name that determines the filter size
+ * @sym_from_idx: selects n'th from symbols with the same name (0 means global
+ * and less than 0 means symbol must be unique)
+ * @sym_to_idx: same as @sym_from_idx but for @sym_to
+ * @addr: filter address
+ * @size: filter region size (for range filters)
+ * @filename: DSO file name or NULL for the kernel
+ * @str: allocated string that contains the other string members
+ */
+struct addr_filter {
+ struct list_head list;
+ bool range;
+ bool start;
+ const char *action;
+ const char *sym_from;
+ const char *sym_to;
+ int sym_from_idx;
+ int sym_to_idx;
+ u64 addr;
+ u64 size;
+ const char *filename;
+ char *str;
+};
+
+/**
+ * struct addr_filters - list of address filters.
+ * @head: list of address filters
+ * @cnt: number of address filters
+ */
+struct addr_filters {
+ struct list_head head;
+ int cnt;
+};
+
#ifdef HAVE_AUXTRACE_SUPPORT

/*
@@ -482,6 +524,12 @@ void perf_session__auxtrace_error_inc(struct perf_session *session,
union perf_event *event);
void events_stats__auxtrace_error_warn(const struct events_stats *stats);

+void addr_filters__init(struct addr_filters *filts);
+void addr_filters__exit(struct addr_filters *filts);
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+ const char *filter);
+int auxtrace_parse_filters(struct perf_evlist *evlist);
+
static inline int auxtrace__process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
@@ -640,6 +688,12 @@ void auxtrace_index__free(struct list_head *head __maybe_unused)
{
}

+static inline
+int auxtrace_parse_filters(struct perf_evlist *evlist __maybe_unused)
+{
+ return 0;
+}
+
int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
struct auxtrace_mmap_params *mp,
void *userpg, int fd);
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:40:07 AM9/29/16
to
From: Adrian Hunter <adrian...@intel.com>

Commit 608c34de0b3d ("perf symbols: Mark if a symbol is idle in the
library") causes idle symbols to vanish from perf script output. That is
because print functions suppress symbols marked as 'idle'.

However, suppression of 'idle' functions is only used by 'perf top' and
'perf top' does not use the print functions. Consequently that
functionality can simply be removed from the print functions.

Signed-off-by: Adrian Hunter <adrian...@intel.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Mathieu Poirier <mathieu...@linaro.org>
Fixes: 608c34de0b3d ("perf symbols: Mark if a symbol is idle in the library")
Link: http://lkml.kernel.org/r/1474641528-18776-4-git-...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/evsel_fprintf.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 9111e0666950..662a0a6182e7 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -122,9 +122,6 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
if (!node)
break;

- if (node->sym && node->sym->idle)
- goto next;
-
printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");

if (print_ip)
@@ -158,7 +155,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,

if (!print_oneline)
printed += fprintf(fp, "\n");
-next:
+
callchain_cursor_advance(cursor);
}
}
@@ -181,7 +178,7 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
if (cursor != NULL) {
printed += sample__fprintf_callchain(sample, left_alignment,
print_opts, cursor, fp);
- } else if (!(al->sym && al->sym->idle)) {
+ } else {
printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");

if (print_ip)
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:40:07 AM9/29/16
to
From: Wang Nan <wang...@huawei.com>

On ARM32 building it report following error when we build with
libbabeltrace:

util/data-convert-bt.c: In function 'add_bpf_output_values':
util/data-convert-bt.c:440:3: error: format '%lu' expects argument of type 'long unsigned int', but argument 5 has type 'unsigned int' [-Werror=format]
cc1: all warnings being treated as errors

Fix it by changing %lu to %zu.

Signed-off-by: Wang Nan <wang...@huawei.com>
Cc: Jiri Olsa <jo...@kernel.org>
Cc: Zefan Li <liz...@huawei.com>
Cc: pi3o...@163.com
Fixes: 6122d57e9f7c ("perf data: Support converting data from bpf_perf_event_output()")
Link: http://lkml.kernel.org/r/1475035126-146587-1-g...@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/data-convert-bt.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 4f979bb27b6c..7123f4de32cc 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -437,7 +437,7 @@ add_bpf_output_values(struct bt_ctf_event_class *event_class,
int ret;

if (nr_elements * sizeof(u32) != raw_size)
- pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n",
+ pr_warning("Incorrect raw_size (%u) in bpf output event, skip %zu bytes\n",
raw_size, nr_elements * sizeof(u32) - raw_size);

len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:40:07 AM9/29/16
to
From: Masami Hiramatsu <mhir...@kernel.org>

Ignore the error when the perf probe failed to find inline function
instances. This can happen when we search a method in C++ debuginfo. If
there is completely no instance in target, perf probe can return an
error.

E.g. without this fix:
----
$ perf probe -x /usr/lib64/libstdc++.so.6 -vD showmanyc
probe-definition(0): showmanyc
symbol:showmanyc file:(null) line:0 offset:0 return:0 lazy:(null)
0 arguments
symbol:catch file:(null) line:0 offset:0 return:0 lazy:(null)
symbol:throw file:(null) line:0 offset:0 return:0 lazy:(null)
symbol:rethrow file:(null) line:0 offset:0 return:0 lazy:(null)
Open Debuginfo file: /usr/lib/debug/usr/lib64/libstdc++.so.6.0.22.debug
Try to find probe point from debuginfo.
Matched function: showmanyc
An error occurred in debuginfo analysis (-2).
Trying to use symbols.
Failed to find symbol showmanyc in /usr/lib64/libstdc++.so.6.0.22
Error: Failed to add events. Reason: No such file or directory (Code: -2)
----

This is because one of showmanyc is defined as inline but no instance
found. With this fix, it is succeeded to show as below.
----
$ perf probe -x /usr/lib64/libstdc++.so.6 -D showmanyc
p:probe_libstdc++/showmanyc /usr/lib64/libstdc++.so.6.0.22:0xb0e50
p:probe_libstdc++/showmanyc_1 /usr/lib64/libstdc++.so.6.0.22:0xc7c40
p:probe_libstdc++/showmanyc_2 /usr/lib64/libstdc++.so.6.0.22:0xecfa0
p:probe_libstdc++/showmanyc_3 /usr/lib64/libstdc++.so.6.0.22:0x115fc0
p:probe_libstdc++/showmanyc_4 /usr/lib64/libstdc++.so.6.0.22:0x121a90
----

Signed-off-by: Masami Hiramatsu <mhir...@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Tested-by: Jiri Olsa <jo...@kernel.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Thomas Gleixner <tg...@linutronix.de>
Link: http://lkml.kernel.org/r/147464489775.29804.3190419491209875936.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/probe-finder.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 8daca4fc1f8d..5fe8325e81f2 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -988,7 +988,8 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die)))
return DWARF_CB_OK;

- pr_debug("Matched function: %s\n", dwarf_diename(sp_die));
+ pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die),
+ (unsigned long)dwarf_dieoffset(sp_die));
pf->fname = dwarf_decl_file(sp_die);
if (pp->line) { /* Function relative line */
dwarf_decl_line(sp_die, &pf->lno);
@@ -1011,7 +1012,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
param->retval = die_walk_instances(sp_die,
probe_point_inline_cb, (void *)pf);
/* This could be a non-existed inline definition */
- if (param->retval == -ENOENT && strisglob(pp->function))
+ if (param->retval == -ENOENT)
param->retval = 0;
}

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:40:07 AM9/29/16
to
Hi Ingo,

Please consider pulling, more to come soon,

- Arnaldo

Build and test results at the end of this message.

The following changes since commit 6b652de2b27c0a4020ce0e8f277e782b6af76096:

Merge tag 'perf-core-for-mingo-20160922' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core (2016-09-23 07:21:38 +0200)

are available in the git repository at:

git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git tags/perf-core-for-mingo-20160929

for you to fetch changes up to d18019a53a07e009899ff6b8dc5ec30f249360d9:

perf tests: Add dwarf unwind test for powerpc (2016-09-29 11:18:21 -0300)

----------------------------------------------------------------
perf/core improvements and fixes:

User visible:
-------------

New features:

- Add support for using symbols in address filters with Intel PT and ARM
CoreSight (hardware assisted tracing facilities) (Adrian Hunter, Mathieu Poirier)

Fixes:

- Fix MMAP event synthesis for pre-existing threads when no hugetlbfs
mount is in place (Adrian Hunter)

- Don't ignore kernel idle symbols in 'perf script' (Adrian Hunter)

- Assorted Intel PT fixes (Adrian Hunter)

Improvements:

- Fix handling of C++ symbols in 'perf probe' (Masami Hiramatsu)

- Beautify sched_[gs]et_attr return value in 'perf trace' (Arnaldo Carvalho de Melo)

Infrastructure:
---------------

New features:

- Add dwarf unwind 'perf test' for powerpc (Ravi Bangoria)

Fixes:

- Fix error paths in 'perf record' (Adrian Hunter)

Documentation:

- Update documentation info about quipper, a C++ parser for converting
to/from perf.data/chromium profiling format (Simon Que)

Build Fixes:

Fix building in 32 bit platform with libbabeltrace (Wang Nan)

Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>

----------------------------------------------------------------
Adrian Hunter (16):
perf record: Fix documentation 'event_sources' -> 'event_source'
perf tools: Fix MMAP event synthesis broken by MAP_HUGETLB change
perf script: Fix vanished idle symbols
perf record: Rename label 'out_symbol_exit'
perf record: Fix error paths
perf symbols: Add dso__last_symbol()
perf record: Add support for using symbols in address filters
perf probe: Increase debug level of SDT debug messages
perf intel-pt: Fix snapshot overlap detection decoder errors
perf intel-pt: Add support for recording the max non-turbo ratio
perf intel-pt: Fix missing error codes processing auxtrace_info
perf intel-pt: Add a helper function for processing AUXTRACE_INFO
perf intel-pt: Record address filter in AUXTRACE_INFO event
perf intel-pt: Read address filter from AUXTRACE_INFO event
perf intel-pt: Enable decoder to handle TIP.PGD with missing IP
perf intel-pt: Fix decoding when there are address filters

Arnaldo Carvalho de Melo (1):
perf trace: Beautify sched_[gs]et_attr return value

Masami Hiramatsu (4):
perf probe: Ignore the error of finding inline instance
perf probe: Skip if the function address is 0
perf probe: Fix to cut off incompatible chars from group name
perf probe: Match linkage name with mangled name

Mathieu Poirier (3):
perf tools: Make perf_evsel__append_filter() generic
perf evsel: New tracepoint specific function
perf evsel: Add support for address filters

Ravi Bangoria (1):
perf tests: Add dwarf unwind test for powerpc

Simon Que (1):
perf tools: Update documentation info about quipper

Wang Nan (1):
perf data: Fix building in 32 bit platform with libbabeltrace

tools/perf/Documentation/perf-record.txt | 61 +-
tools/perf/Documentation/perf.data-file-format.txt | 6 +-
tools/perf/arch/powerpc/Build | 1 +
tools/perf/arch/powerpc/include/arch-tests.h | 13 +
tools/perf/arch/powerpc/include/perf_regs.h | 2 +
tools/perf/arch/powerpc/tests/Build | 4 +
tools/perf/arch/powerpc/tests/arch-tests.c | 15 +
tools/perf/arch/powerpc/tests/dwarf-unwind.c | 62 ++
tools/perf/arch/powerpc/tests/regs_load.S | 94 +++
tools/perf/arch/x86/util/intel-pt.c | 57 +-
tools/perf/builtin-record.c | 32 +-
tools/perf/builtin-trace.c | 10 +-
tools/perf/tests/Build | 2 +-
tools/perf/tests/dwarf-unwind.c | 2 +-
tools/perf/util/auxtrace.c | 737 +++++++++++++++++++++
tools/perf/util/auxtrace.h | 54 ++
tools/perf/util/build-id.c | 4 +-
tools/perf/util/data-convert-bt.c | 2 +-
tools/perf/util/dwarf-aux.c | 28 +-
tools/perf/util/dwarf-aux.h | 3 +
tools/perf/util/event.c | 3 +-
tools/perf/util/evsel.c | 16 +-
tools/perf/util/evsel.h | 5 +-
tools/perf/util/evsel_fprintf.c | 7 +-
.../perf/util/intel-pt-decoder/intel-pt-decoder.c | 30 +
.../perf/util/intel-pt-decoder/intel-pt-decoder.h | 1 +
tools/perf/util/intel-pt.c | 172 ++++-
tools/perf/util/intel-pt.h | 4 +-
tools/perf/util/parse-events.c | 41 +-
tools/perf/util/probe-event.c | 10 +-
tools/perf/util/probe-file.c | 2 +-
tools/perf/util/probe-finder.c | 17 +-
tools/perf/util/symbol.c | 15 +
tools/perf/util/symbol.h | 1 +
34 files changed, 1451 insertions(+), 62 deletions(-)
create mode 100644 tools/perf/arch/powerpc/include/arch-tests.h
create mode 100644 tools/perf/arch/powerpc/tests/Build
create mode 100644 tools/perf/arch/powerpc/tests/arch-tests.c
create mode 100644 tools/perf/arch/powerpc/tests/dwarf-unwind.c
create mode 100644 tools/perf/arch/powerpc/tests/regs_load.S

# time dm
1 alpine:3.4: Ok
2 android-ndk:r12b-arm: Ok
3 archlinux:latest: Ok
4 centos:5: Ok
5 centos:6: Ok
6 centos:7: Ok
7 debian:7: Ok
8 debian:8: Ok
9 debian:experimental: Ok
10 fedora:20: Ok
11 fedora:21: Ok
12 fedora:22: Ok
13 fedora:23: Ok
14 fedora:24: Ok
15 fedora:24-x-ARC-uClibc: Ok
16 fedora:rawhide: Ok
17 mageia:5: Ok
18 opensuse:13.2: Ok
19 opensuse:42.1: Ok
20 opensuse:tumbleweed: Ok
21 ubuntu:12.04.5: Ok
22 ubuntu:14.04: Ok
23 ubuntu:14.04.4: Ok
24 ubuntu:15.10: Ok
25 ubuntu:16.04: Ok
26 ubuntu:16.04-x-arm: Ok
27 ubuntu:16.04-x-arm64: Ok
28 ubuntu:16.04-x-powerpc: Ok
29 ubuntu:16.04-x-powerpc64: Ok
30 ubuntu:16.04-x-powerpc64el: Ok
31 ubuntu:16.04-x-s390: Ok
32 ubuntu:16.10: Ok
33 2246.21

real 37m26.862s
user 0m2.148s
sys 0m2.256s
#
#

$ make -C tools/perf build-test
make: Entering directory '/home/acme/git/linux/tools/perf'
tarpkg: ./tests/perf-targz-src-pkg .
make_debug_O: make DEBUG=1
make_no_libnuma_O: make NO_LIBNUMA=1
make_no_slang_O: make NO_SLANG=1
make_no_libaudit_O: make NO_LIBAUDIT=1
make_no_libbpf_O: make NO_LIBBPF=1
make_install_prefix_slash_O: make install prefix=/tmp/krava/
make_tags_O: make tags
make_doc_O: make doc
make_no_libunwind_O: make NO_LIBUNWIND=1
make_install_bin_O: make install-bin
make_no_libbionic_O: make NO_LIBBIONIC=1
make_with_babeltrace_O: make LIBBABELTRACE=1
make_no_demangle_O: make NO_DEMANGLE=1
make_perf_o_O: make perf.o
make_no_auxtrace_O: make NO_AUXTRACE=1
make_no_scripts_O: make NO_LIBPYTHON=1 NO_LIBPERL=1
make_pure_O: make
make_util_map_o_O: make util/map.o
make_no_libdw_dwarf_unwind_O: make NO_LIBDW_DWARF_UNWIND=1
make_no_newt_O: make NO_NEWT=1
make_no_libpython_O: make NO_LIBPYTHON=1
make_util_pmu_bison_o_O: make util/pmu-bison.o
make_help_O: make help
make_install_prefix_O: make install prefix=/tmp/krava
make_static_O: make LDFLAGS=-static
make_no_ui_O: make NO_NEWT=1 NO_SLANG=1 NO_GTK2=1
make_no_backtrace_O: make NO_BACKTRACE=1
make_clean_all_O: make clean all
make_install_O: make install
make_no_libelf_O: make NO_LIBELF=1
make_no_libperl_O: make NO_LIBPERL=1
make_no_gtk2_O: make NO_GTK2=1
make_minimal_O: make NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 NO_LIBCRYPTO=1 NO_SDT=1
OK
make: Leaving directory '/home/acme/git/linux/tools/perf'
$

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:40:07 AM9/29/16
to
From: Mathieu Poirier <mathieu...@linaro.org>

Making function perf_evsel__append_filter() static and introducing a new
tracepoint specific function to append filters. That way we eliminate
redundant code and avoid formatting mistake.

Signed-off-by: Mathieu Poirier <mathieu...@linaro.org>
Acked-by: Adrian Hunter <adrian...@intel.com>
Cc: Alexander Shishkin <alexander...@linux.intel.com>
Cc: Jiri Olsa <jo...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linux-ar...@lists.infradead.org
Link: http://lkml.kernel.org/r/1474037045-31730-3-git-s...@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-trace.c | 7 +++----
tools/perf/util/evsel.c | 9 +++++++--
tools/perf/util/evsel.h | 3 +--
tools/perf/util/parse-events.c | 4 ++--
4 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index e04ba9d852d4..c298bd3e1d90 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2151,11 +2151,10 @@ static int trace__set_ev_qualifier_filter(struct trace *trace)
if (filter == NULL)
goto out_enomem;

- if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter,
- "(%s) && (%s)", filter)) {
+ if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
+ filter)) {
sys_exit = trace->syscalls.events.sys_exit;
- err = perf_evsel__append_filter(sys_exit,
- "(%s) && (%s)", filter);
+ err = perf_evsel__append_tp_filter(sys_exit, filter);
}

free(filter);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4f327b522e5c..3b4e7c452e43 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1045,8 +1045,8 @@ int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter)
return -1;
}

-int perf_evsel__append_filter(struct perf_evsel *evsel,
- const char *fmt, const char *filter)
+static int perf_evsel__append_filter(struct perf_evsel *evsel,
+ const char *fmt, const char *filter)
{
char *new_filter;

@@ -1062,6 +1062,11 @@ int perf_evsel__append_filter(struct perf_evsel *evsel,
return -1;
}

+int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter)
+{
+ return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter);
+}
+
int perf_evsel__enable(struct perf_evsel *evsel)
{
int nthreads = thread_map__nr(evsel->threads);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 7ef960298b3d..1f8c48f87f7d 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -235,8 +235,7 @@ void perf_evsel__set_sample_id(struct perf_evsel *evsel,
bool use_sample_identifier);

int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter);
-int perf_evsel__append_filter(struct perf_evsel *evsel,
- const char *fmt, const char *filter);
+int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter);
int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
const char *filter);
int perf_evsel__enable(struct perf_evsel *evsel);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index b14784c765eb..16bf09cc3e8d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1767,7 +1767,7 @@ static int set_filter(struct perf_evsel *evsel, const void *arg)
return -1;
}

- if (perf_evsel__append_filter(evsel, "(%s) && (%s)", str) < 0) {
+ if (perf_evsel__append_tp_filter(evsel, str) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
return -1;
@@ -1798,7 +1798,7 @@ static int add_exclude_perf_filter(struct perf_evsel *evsel,

snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());

- if (perf_evsel__append_filter(evsel, "(%s) && (%s)", new_filter) < 0) {
+ if (perf_evsel__append_tp_filter(evsel, new_filter) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
return -1;
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:40:07 AM9/29/16
to
From: Adrian Hunter <adrian...@intel.com>

Previously the maximum non-turbo ratio was calculated from TSC assuming
a 100 MHz multiplier which is correct for current hardware supporting
Intel PT. However more recent kernels also now export the value, so use
that in preference to the calculated value.

Signed-off-by: Adrian Hunter <adrian...@intel.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Mathieu Poirier <mathieu...@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-11-git-...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/arch/x86/util/intel-pt.c | 6 ++++++
tools/perf/util/intel-pt.c | 14 +++++++++++++-
tools/perf/util/intel-pt.h | 1 +
3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index a2412e9d883b..18b21514c153 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -302,6 +302,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
bool cap_user_time_zero = false, per_cpu_mmaps;
u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
+ unsigned long max_non_turbo_ratio;
int err;

if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE)
@@ -317,6 +318,10 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,

intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);

+ if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio",
+ "%lu", &max_non_turbo_ratio) != 1)
+ max_non_turbo_ratio = 0;
+
if (!session->evlist->nr_mmaps)
return -EINVAL;

@@ -351,6 +356,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n;
auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d;
auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit;
+ auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio;

return 0;
}
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index b744ea812a2e..77fbf02c8e41 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -2023,6 +2023,7 @@ static const char * const intel_pt_info_fmts[] = {
[INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n",
[INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n",
[INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n",
+ [INTEL_PT_MAX_NONTURBO_RATIO] = " Max non-turbo ratio %"PRIu64"\n",
};

static void intel_pt_print_info(u64 *arr, int start, int finish)
@@ -2087,6 +2088,15 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
INTEL_PT_CYC_BIT);
}

+ if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
+ (sizeof(u64) * INTEL_PT_MAX_NONTURBO_RATIO)) {
+ pt->max_non_turbo_ratio =
+ auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
+ intel_pt_print_info(&auxtrace_info->priv[0],
+ INTEL_PT_MAX_NONTURBO_RATIO,
+ INTEL_PT_MAX_NONTURBO_RATIO);
+ }
+
pt->timeless_decoding = intel_pt_timeless_decoding(pt);
pt->have_tsc = intel_pt_have_tsc(pt);
pt->sampling_mode = false;
@@ -2156,7 +2166,9 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
if (pt->tc.time_mult) {
u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);

- pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000;
+ if (!pt->max_non_turbo_ratio)
+ pt->max_non_turbo_ratio =
+ (tsc_freq + 50000000) / 100000000;
intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
intel_pt_log("Maximum non-turbo ratio %u\n",
pt->max_non_turbo_ratio);
diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h
index 0065949df693..8b8356233e6a 100644
--- a/tools/perf/util/intel-pt.h
+++ b/tools/perf/util/intel-pt.h
@@ -34,6 +34,7 @@ enum {
INTEL_PT_TSC_CTC_N,
INTEL_PT_TSC_CTC_D,
INTEL_PT_CYC_BIT,
+ INTEL_PT_MAX_NONTURBO_RATIO,
INTEL_PT_AUXTRACE_PRIV_MAX,
};

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:40:07 AM9/29/16
to
From: Adrian Hunter <adrian...@intel.com>

Due to errata SKL014 "Intel PT TIP.PGD May Not Have Target IP Payload",
the Intel PT decoder needs to match address filters against TIP.PGD
packets. Parse the address filters and implement the decoder's
'pgd_ip()' callback to match the IP against the filter regions.

Signed-off-by: Adrian Hunter <adrian...@intel.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Mathieu Poirier <mathieu...@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-17-git-...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/intel-pt.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 82 insertions(+)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index c9fec19a7914..dc041d4368c8 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -105,6 +105,7 @@ struct intel_pt {
unsigned long num_events;

char *filter;
+ struct addr_filters filts;
};

enum switch_state {
@@ -550,6 +551,76 @@ out_no_cache:
return 0;
}

+static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
+ uint64_t offset, const char *filename)
+{
+ struct addr_filter *filt;
+ bool have_filter = false;
+ bool hit_tracestop = false;
+ bool hit_filter = false;
+
+ list_for_each_entry(filt, &pt->filts.head, list) {
+ if (filt->start)
+ have_filter = true;
+
+ if ((filename && !filt->filename) ||
+ (!filename && filt->filename) ||
+ (filename && strcmp(filename, filt->filename)))
+ continue;
+
+ if (!(offset >= filt->addr && offset < filt->addr + filt->size))
+ continue;
+
+ intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
+ ip, offset, filename ? filename : "[kernel]",
+ filt->start ? "filter" : "stop",
+ filt->addr, filt->size);
+
+ if (filt->start)
+ hit_filter = true;
+ else
+ hit_tracestop = true;
+ }
+
+ if (!hit_tracestop && !hit_filter)
+ intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
+ ip, offset, filename ? filename : "[kernel]");
+
+ return hit_tracestop || (have_filter && !hit_filter);
+}
+
+static int __intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct thread *thread;
+ struct addr_location al;
+ u8 cpumode;
+ u64 offset;
+
+ if (ip >= ptq->pt->kernel_start)
+ return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
+
+ cpumode = PERF_RECORD_MISC_USER;
+
+ thread = ptq->thread;
+ if (!thread)
+ return -EINVAL;
+
+ thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
+ if (!al.map || !al.map->dso)
+ return -EINVAL;
+
+ offset = al.map->map_ip(al.map, ip);
+
+ return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
+ al.map->dso->long_name);
+}
+
+static bool intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+ return __intel_pt_pgd_ip(ip, data) > 0;
+}
+
static bool intel_pt_get_config(struct intel_pt *pt,
struct perf_event_attr *attr, u64 *config)
{
@@ -726,6 +797,9 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;

+ if (pt->filts.cnt > 0)
+ params.pgd_ip = intel_pt_pgd_ip;
+
if (pt->synth_opts.instructions) {
if (pt->synth_opts.period) {
switch (pt->synth_opts.period_type) {
@@ -1776,6 +1850,7 @@ static void intel_pt_free(struct perf_session *session)
intel_pt_free_events(session);
session->auxtrace = NULL;
thread__put(pt->unknown_thread);
+ addr_filters__exit(&pt->filts);
zfree(&pt->filter);
free(pt);
}
@@ -2073,6 +2148,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
if (!pt)
return -ENOMEM;

+ addr_filters__init(&pt->filts);
+
perf_config(intel_pt_perf_config, pt);

err = auxtrace_queues__init(&pt->queues);
@@ -2147,6 +2224,10 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
err = -EINVAL;
goto err_free_queues;
}
+ err = addr_filters__parse_bare_filter(&pt->filts,
+ filter);
+ if (err)
+ goto err_free_queues;
}
intel_pt_print_info_str("Filter string", pt->filter);
}
@@ -2268,6 +2349,7 @@ err_free_queues:
auxtrace_queues__free(&pt->queues);
session->auxtrace = NULL;
err_free:
+ addr_filters__exit(&pt->filts);
zfree(&pt->filter);
free(pt);
return err;
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:40:08 AM9/29/16
to
From: Simon Que <sq...@chromium.org>

The existing link is outdated. The most recent quipper code can be found at the
new URL.

Committer notes:

Quipper is a C++ parser that can be used to convert from a perf.data
file to and from a protobuf, a Chromium OS facility.

Signed-off-by: Simon Que <sq...@chromium.org>
Acked-by: Andi Kleen <a...@linux.intel.com>
Cc: Adrian Hunter <adrian...@intel.com>
Cc: Chong Jiang <chong...@chromium.org>
Link: http://lkml.kernel.org/n/tip-4q1nm7jl3v...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/Documentation/perf.data-file-format.txt | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index fdc99fe6bbc3..b664b18d3991 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -437,6 +437,10 @@ in pmu-tools parser. This allows to read perf.data from python and dump it.
quipper

The quipper C++ parser is available at
-https://chromium.googlesource.com/chromiumos/platform/chromiumos-wide-profiling/
+https://chromium.googlesource.com/chromiumos/platform2
+
+It is under the chromiumos-wide-profiling/ subdirectory. This library can
+convert a perf data file to a protobuf and vice versa.
+
Unfortunately this parser tends to be many versions behind and may not be able
to parse data files generated by recent perf.
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:40:08 AM9/29/16
to
From: Masami Hiramatsu <mhir...@kernel.org>

Match linkage name with mangled name if exists. The linkage_name is used
for storing mangled name of the object.

Thus, this allows 'perf probe' to find appropriate probe point from
mangled symbol as below.

E.g. without this fix:
----
$ perf probe -x /usr/lib64/libstdc++.so.6 \
-D _ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv
Probe point '_ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv'
not found.
Error: Failed to add events.
----

With this fix, perf probe can find the correct one.
----
$ perf probe -x /usr/lib64/libstdc++.so.6 \
-D _ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv
p:probe_libstdc/_ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv
/usr/lib64/libstdc++.so.6.0.22:0x8ca60
----

Committer notes:

After the fix, setting it for real (no -D/--definition, that amounts to
a --dry-run):

# perf probe -x /usr/lib64/libstdc++.so.6 _ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv
Added new event:
probe_libstdc:_ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv (on _ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv in /usr/lib64/libstdc++.so.6.0.22)

You can now use it in all perf tools, such as:

perf record -e probe_libstdc:_ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv -aR sleep 1

# perf probe -l probe_libstdc:*
probe_libstdc:_ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv (on is_open@libstdc++-v3/include/fstream in /usr/lib64/libstdc++.so.6.0.22)
#

Reported-by: Thomas Gleixner <tg...@linutronix.de>
Signed-off-by: Masami Hiramatsu <mhir...@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Tested-by: Jiri Olsa <jo...@kernel.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Link: http://lkml.kernel.org/r/147464493162.29804.16715053505069382443.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/dwarf-aux.c | 28 ++++++++++++++++++++++++++--
tools/perf/util/dwarf-aux.h | 3 +++
2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index faec899435f2..41e068e94349 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -130,6 +130,22 @@ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
}

/**
+ * die_get_linkage_name - Get the linkage name of the object
+ * @dw_die: A DIE of the object
+ *
+ * Get the linkage name attiribute of given @dw_die.
+ * For C++ binary, the linkage name will be the mangled symbol.
+ */
+const char *die_get_linkage_name(Dwarf_Die *dw_die)
+{
+ Dwarf_Attribute attr;
+
+ if (dwarf_attr_integrate(dw_die, DW_AT_linkage_name, &attr) == NULL)
+ return NULL;
+ return dwarf_formstring(&attr);
+}
+
+/**
* die_compare_name - Compare diename and tname
* @dw_die: a DIE
* @tname: a string of target name
@@ -145,18 +161,26 @@ bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
}

/**
- * die_match_name - Match diename and glob
+ * die_match_name - Match diename/linkage name and glob
* @dw_die: a DIE
* @glob: a string of target glob pattern
*
* Glob matching the name of @dw_die and @glob. Return false if matching fail.
+ * This also match linkage name.
*/
bool die_match_name(Dwarf_Die *dw_die, const char *glob)
{
const char *name;

name = dwarf_diename(dw_die);
- return name ? strglobmatch(name, glob) : false;
+ if (name && strglobmatch(name, glob))
+ return true;
+ /* fall back to check linkage name */
+ name = die_get_linkage_name(dw_die);
+ if (name && strglobmatch(name, glob))
+ return true;
+
+ return false;
}

/**
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index 8b6d2f83af02..8ac53bf1ec4e 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -38,6 +38,9 @@ int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
int (*callback)(Dwarf_Die *, void *), void *data);

+/* Get DW_AT_linkage_name (should be NULL for C binary) */
+const char *die_get_linkage_name(Dwarf_Die *dw_die);
+
/* Ensure that this DIE is a subprogram and definition (not declaration) */
bool die_is_func_def(Dwarf_Die *dw_die);

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:40:08 AM9/29/16
to
From: Adrian Hunter <adrian...@intel.com>

Patch "perf record: Mark MAP_HUGETLB when synthesizing mmap events") breaks
MMAP event synthesis. The executable name comparison will match any name
if the length is zero, resulting in all the user space maps becoming
anonymous. This is particularly noticeable with system-wide traces.
Example:

perf record -a sleep 1
perf script --show-mmap-events

Committer note:

That is not the case when, say, one has a qemu instance and libvirt actually
mounts hugetlbfs. To test this I had to first umount it:

[root@jouet ~]# mount | grep hugetlbfs
hugetlbfs on /dev/hugepages type hugetlbfs (rw,relatime,seclabel)
[root@jouet ~]#

After unmount it the error fixed by this patch manifests itself:

# perf record -a sleep 1
# perf script --show-mmap-events | grep PERF_RECORD_MMAP2 | head -5
systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x557d47ed8000(0x167000) @ 0 fd:00 3146896 7362875424355726126]: r-xp //anon
systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c488d000(0x4000) @ 0 fd:00 3153214 7362875424355726126]: r-xp //anon
systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c4a92000(0x3d000) @ 0 fd:00 3159276 7362875424355726126]: r-xp //anon
systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c4cd5000(0x15000) @ 0 fd:00 3153725 7362875424355726126]: r-xp //anon
systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c4eeb000(0x25000) @ 0 fd:00 3153260 7362875424355726126]: r-xp //anon
#

Fixed version:

# perf record -a sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 1.419 MB perf.data (182 samples) ]
# perf script --show-mmap-events | grep PERF_RECORD_MMAP2 | head -5
systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x557d47ed8000(0x167000) @ 0 fd:00 3146896 7362875424355726126]: r-xp /usr/lib/systemd/systemd
systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c488d000(0x4000) @ 0 fd:00 3153214 7362875424355726126]: r-xp /usr/lib64/libuuid.so.1.3.0
systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c4a92000(0x3d000) @ 0 fd:00 3159276 7362875424355726126]: r-xp /usr/lib64/libblkid.so.1.1.0
systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c4cd5000(0x15000) @ 0 fd:00 3153725 7362875424355726126]: r-xp /usr/lib64/libz.so.1.2.8
systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c4eeb000(0x25000) @ 0 fd:00 3153260 7362875424355726126]: r-xp /usr/lib64/liblzma.so.5.2.2
[root@jouet ~]#

Signed-off-by: Adrian Hunter <adrian...@intel.com>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Mathieu Poirier <mathieu...@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-3-git-...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/event.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 2880e2226fdb..8ab0d7da956b 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -346,7 +346,8 @@ out:
if (!strcmp(execname, ""))
strcpy(execname, anonstr);

- if (!strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
+ if (hugetlbfs_mnt_len &&
+ !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
strcpy(execname, anonstr);
event->mmap2.flags |= MAP_HUGETLB;
}
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:40:09 AM9/29/16
to
From: Adrian Hunter <adrian...@intel.com>

In preparation for fixing the error paths, rename label
'out_symbol_exit' to be 'out' because that error path can be used
irrespective of whether symbols (or anything else) has been initialized.

Signed-off-by: Adrian Hunter <adrian...@intel.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Mathieu Poirier <mathieu...@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-5-git-...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-record.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 2d0d69be3bf8..b32a880ec473 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1643,7 +1643,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
if (rec->evlist->nr_entries == 0 &&
perf_evlist__add_default(rec->evlist) < 0) {
pr_err("Not enough memory for event selector list\n");
- goto out_symbol_exit;
+ goto out;
}

if (rec->opts.target.tid && !rec->opts.no_inherit_set)
@@ -1663,7 +1663,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
ui__error("%s", errbuf);

err = -saved_errno;
- goto out_symbol_exit;
+ goto out;
}

err = -ENOMEM;
@@ -1672,7 +1672,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)

err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
if (err)
- goto out_symbol_exit;
+ goto out;

/*
* We take all buildids when the file contains
@@ -1684,11 +1684,11 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)

if (record_opts__config(&rec->opts)) {
err = -EINVAL;
- goto out_symbol_exit;
+ goto out;
}

err = __cmd_record(&record, argc, argv);
-out_symbol_exit:
+out:
perf_evlist__delete(rec->evlist);
symbol__exit();
auxtrace_record__free(rec->itr);
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:40:11 AM9/29/16
to
From: Ravi Bangoria <ravi.b...@linux.vnet.ibm.com>

The user stack dump feature was recently added for powerpc. But there
was no test case available to test it.

This test works same as on other architectures by preparing a stack
frame on the perf test thread and comparing each frame by unwinding it.

$ ./perf test 50
50: Test dwarf unwind : Ok

User stack dump for powerpc: https://lkml.org/lkml/2016/4/28/482

Signed-off-by: Ravi Bangoria <ravi.b...@linux.vnet.ibm.com>
Acked-by: Jiri Olsa <jo...@kernel.org>
Cc: Alexander Shishkin <alexander...@linux.intel.com>
Cc: Anju T Sudhakar <an...@linux.vnet.ibm.com>
Cc: Josh Poimboeuf <jpoi...@redhat.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Matt Fleming <matt.f...@intel.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Wang Nan <wang...@huawei.com>
Cc: linuxp...@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1474267100-31079-1-git-...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/arch/powerpc/Build | 1 +
tools/perf/arch/powerpc/include/arch-tests.h | 13 ++++
tools/perf/arch/powerpc/include/perf_regs.h | 2 +
tools/perf/arch/powerpc/tests/Build | 4 ++
tools/perf/arch/powerpc/tests/arch-tests.c | 15 +++++
tools/perf/arch/powerpc/tests/dwarf-unwind.c | 62 ++++++++++++++++++
tools/perf/arch/powerpc/tests/regs_load.S | 94 ++++++++++++++++++++++++++++
tools/perf/tests/Build | 2 +-
tools/perf/tests/dwarf-unwind.c | 2 +-
9 files changed, 193 insertions(+), 2 deletions(-)
create mode 100644 tools/perf/arch/powerpc/include/arch-tests.h
create mode 100644 tools/perf/arch/powerpc/tests/Build
create mode 100644 tools/perf/arch/powerpc/tests/arch-tests.c
create mode 100644 tools/perf/arch/powerpc/tests/dwarf-unwind.c
create mode 100644 tools/perf/arch/powerpc/tests/regs_load.S

diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build
index 54afe4a467e7..db52fa22d3a1 100644
--- a/tools/perf/arch/powerpc/Build
+++ b/tools/perf/arch/powerpc/Build
@@ -1 +1,2 @@
libperf-y += util/
+libperf-y += tests/
diff --git a/tools/perf/arch/powerpc/include/arch-tests.h b/tools/perf/arch/powerpc/include/arch-tests.h
new file mode 100644
index 000000000000..84d8dedef2ed
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/arch-tests.h
@@ -0,0 +1,13 @@
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread);
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
index 75de0e92e71e..c12f4e804f66 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -5,6 +5,8 @@
#include <linux/types.h>
#include <asm/perf_regs.h>

+void perf_regs_load(u64 *regs);
+
#define PERF_REGS_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1)
#define PERF_REGS_MAX PERF_REG_POWERPC_MAX
#ifdef __powerpc64__
diff --git a/tools/perf/arch/powerpc/tests/Build b/tools/perf/arch/powerpc/tests/Build
new file mode 100644
index 000000000000..d827ef384b33
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/Build
@@ -0,0 +1,4 @@
+libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o
+libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
+
+libperf-y += arch-tests.o
diff --git a/tools/perf/arch/powerpc/tests/arch-tests.c b/tools/perf/arch/powerpc/tests/arch-tests.c
new file mode 100644
index 000000000000..e24f46241f40
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/arch-tests.c
@@ -0,0 +1,15 @@
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+ {
+ .desc = "Test dwarf unwind",
+ .func = test__dwarf_unwind,
+ },
+#endif
+ {
+ .func = NULL,
+ },
+};
diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
new file mode 100644
index 000000000000..0bac3137ccbd
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
@@ -0,0 +1,62 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_POWERPC_R1];
+
+ map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/powerpc/tests/regs_load.S b/tools/perf/arch/powerpc/tests/regs_load.S
new file mode 100644
index 000000000000..d76c9a32f327
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/regs_load.S
@@ -0,0 +1,94 @@
+#include <linux/linkage.h>
+
+/* Offset is based on macros from arch/powerpc/include/uapi/asm/ptrace.h. */
+#define R0 0
+#define R1 1 * 8
+#define R2 2 * 8
+#define R3 3 * 8
+#define R4 4 * 8
+#define R5 5 * 8
+#define R6 6 * 8
+#define R7 7 * 8
+#define R8 8 * 8
+#define R9 9 * 8
+#define R10 10 * 8
+#define R11 11 * 8
+#define R12 12 * 8
+#define R13 13 * 8
+#define R14 14 * 8
+#define R15 15 * 8
+#define R16 16 * 8
+#define R17 17 * 8
+#define R18 18 * 8
+#define R19 19 * 8
+#define R20 20 * 8
+#define R21 21 * 8
+#define R22 22 * 8
+#define R23 23 * 8
+#define R24 24 * 8
+#define R25 25 * 8
+#define R26 26 * 8
+#define R27 27 * 8
+#define R28 28 * 8
+#define R29 29 * 8
+#define R30 30 * 8
+#define R31 31 * 8
+#define NIP 32 * 8
+#define CTR 35 * 8
+#define LINK 36 * 8
+#define XER 37 * 8
+
+.globl perf_regs_load
+perf_regs_load:
+ std 0, R0(3)
+ std 1, R1(3)
+ std 2, R2(3)
+ std 3, R3(3)
+ std 4, R4(3)
+ std 5, R5(3)
+ std 6, R6(3)
+ std 7, R7(3)
+ std 8, R8(3)
+ std 9, R9(3)
+ std 10, R10(3)
+ std 11, R11(3)
+ std 12, R12(3)
+ std 13, R13(3)
+ std 14, R14(3)
+ std 15, R15(3)
+ std 16, R16(3)
+ std 17, R17(3)
+ std 18, R18(3)
+ std 19, R19(3)
+ std 20, R20(3)
+ std 21, R21(3)
+ std 22, R22(3)
+ std 23, R23(3)
+ std 24, R24(3)
+ std 25, R25(3)
+ std 26, R26(3)
+ std 27, R27(3)
+ std 28, R28(3)
+ std 29, R29(3)
+ std 30, R30(3)
+ std 31, R31(3)
+
+ /* store NIP */
+ mflr 4
+ std 4, NIP(3)
+
+ /* Store LR */
+ std 4, LINK(3)
+
+ /* Store XER */
+ mfxer 4
+ std 4, XER(3)
+
+ /* Store CTR */
+ mfctr 4
+ std 4, CTR(3)
+
+ /* Restore original value of r4 */
+ ld 4, R4(3)
+
+ blr
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index dc51bc570e51..8a4ce492f7b2 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -71,7 +71,7 @@ $(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/B
$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
$(Q)echo ';' >> $@

-ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
+ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc))
perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
endif

diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 8f6eb853aaf7..1046491de4b2 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -11,7 +11,7 @@
#include "thread.h"
#include "callchain.h"

-#if defined (__x86_64__) || defined (__i386__)
+#if defined (__x86_64__) || defined (__i386__) || defined (__powerpc__)
#include "arch-tests.h"
#endif

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:50:05 AM9/29/16
to
From: Adrian Hunter <adrian...@intel.com>

Two SDT debug messages can occur for every DSO which is too noisy.
Consequently, increase debug level of SDT messages.

Signed-off-by: Adrian Hunter <adrian...@intel.com>
Acked-by: Masami Hiramatsu <mhir...@kernel.org>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Mathieu Poirier <mathieu...@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-9-git-...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/build-id.c | 4 ++--
tools/perf/util/probe-file.c | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 5651f3c12f93..e528c40739cc 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -620,7 +620,7 @@ static int build_id_cache__add_sdt_cache(const char *sbuild_id,

ret = probe_cache__scan_sdt(cache, realname);
if (ret >= 0) {
- pr_debug("Found %d SDTs in %s\n", ret, realname);
+ pr_debug4("Found %d SDTs in %s\n", ret, realname);
if (probe_cache__commit(cache) < 0)
ret = -1;
}
@@ -691,7 +691,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name,

/* Update SDT cache : error is just warned */
if (build_id_cache__add_sdt_cache(sbuild_id, realname) < 0)
- pr_debug("Failed to update/scan SDT cache for %s\n", realname);
+ pr_debug4("Failed to update/scan SDT cache for %s\n", realname);

out_free:
if (!is_kallsyms)
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 6f931e442f14..436b64731f65 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -699,7 +699,7 @@ int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname)
INIT_LIST_HEAD(&sdtlist);
ret = get_sdt_note_list(&sdtlist, pathname);
if (ret < 0) {
- pr_debug("Failed to get sdt note: %d\n", ret);
+ pr_debug4("Failed to get sdt note: %d\n", ret);
return ret;
}
list_for_each_entry(note, &sdtlist, note_list) {
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:50:05 AM9/29/16
to
From: Adrian Hunter <adrian...@intel.com>

When address filters are used, the decoder must detect the end of a
filter region (or a branch into a tracestop region) by matching Packet
Generation Disabled (TIP.PGD) packets against the object code using the
IP given in the packet. However, due to errata SKL014 "Intel PT TIP.PGD
May Not Have Target IP Payload", that IP may not be present.

Enable the decoder to handle that by adding a new callback function
'pgd_ip()' which indicates whether the IP is not traced, in which case
that is the point where the trace was disabled.

Signed-off-by: Adrian Hunter <adrian...@intel.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Mathieu Poirier <mathieu...@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-16-git-...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
.../perf/util/intel-pt-decoder/intel-pt-decoder.c | 30 ++++++++++++++++++++++
.../perf/util/intel-pt-decoder/intel-pt-decoder.h | 1 +
2 files changed, 31 insertions(+)

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 8ff6c6a61291..7591a0c37473 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -80,6 +80,7 @@ struct intel_pt_decoder {
int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
uint64_t max_insn_cnt, void *data);
+ bool (*pgd_ip)(uint64_t ip, void *data);
void *data;
struct intel_pt_state state;
const unsigned char *buf;
@@ -186,6 +187,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)

decoder->get_trace = params->get_trace;
decoder->walk_insn = params->walk_insn;
+ decoder->pgd_ip = params->pgd_ip;
decoder->data = params->data;
decoder->return_compression = params->return_compression;

@@ -1008,6 +1010,19 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
int err;

err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+ if (err == INTEL_PT_RETURN &&
+ decoder->pgd_ip &&
+ decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+ (decoder->state.type & INTEL_PT_BRANCH) &&
+ decoder->pgd_ip(decoder->state.to_ip, decoder->data)) {
+ /* Unconditional branch leaving filter region */
+ decoder->no_progress = 0;
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.to_ip = 0;
+ return 0;
+ }
if (err == INTEL_PT_RETURN)
return 0;
if (err)
@@ -1036,6 +1051,21 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
}

if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+ uint64_t to_ip = decoder->ip + intel_pt_insn.length +
+ intel_pt_insn.rel;
+
+ if (decoder->pgd_ip &&
+ decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+ decoder->pgd_ip(to_ip, decoder->data)) {
+ /* Conditional branch leaving filter region */
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->ip = to_ip;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+ }
intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
decoder->ip);
decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 02c38fec1c37..89399985fa4d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -83,6 +83,7 @@ struct intel_pt_params {
int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
uint64_t max_insn_cnt, void *data);
+ bool (*pgd_ip)(uint64_t ip, void *data);
void *data;
bool return_compression;
uint64_t period;
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:50:06 AM9/29/16
to
From: Adrian Hunter <adrian...@intel.com>

Some error paths do not tidy-up. Fix that.

Signed-off-by: Adrian Hunter <adrian...@intel.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Mathieu Poirier <mathieu...@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-6-git-...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-record.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index b32a880ec473..962adcfc43a5 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1573,23 +1573,23 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
if (!rec->itr) {
rec->itr = auxtrace_record__init(rec->evlist, &err);
if (err)
- return err;
+ goto out;
}

err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
rec->opts.auxtrace_snapshot_opts);
if (err)
- return err;
+ goto out;

if (dry_run)
- return 0;
+ goto out;

err = bpf__setup_stdout(rec->evlist);
if (err) {
bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
pr_err("ERROR: Setup BPF stdout failed: %s\n",
errbuf);
- return err;
+ goto out;
}

err = -ENOMEM;
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:50:06 AM9/29/16
to
From: Mathieu Poirier <mathieu...@linaro.org>

This patch makes it possible to use the current filter framework with
address filters. That way address filters for HW tracers such as
CoreSight and Intel PT can be communicated to the kernel drivers.

Signed-off-by: Mathieu Poirier <mathieu...@linaro.org>
Acked-by: Adrian Hunter <adrian...@intel.com>
Cc: Alexander Shishkin <alexander...@linux.intel.com>
Cc: Jiri Olsa <jo...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linux-ar...@lists.infradead.org
Link: http://lkml.kernel.org/r/1474037045-31730-4-git-s...@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/evsel.c | 5 +++++
tools/perf/util/evsel.h | 2 ++
tools/perf/util/parse-events.c | 39 ++++++++++++++++++++++++++++++++++-----
3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 3b4e7c452e43..380e84c3af3d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1067,6 +1067,11 @@ int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter)
return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter);
}

+int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
+{
+ return perf_evsel__append_filter(evsel, "%s,%s", filter);
+}
+
int perf_evsel__enable(struct perf_evsel *evsel)
{
int nthreads = thread_map__nr(evsel->threads);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 1f8c48f87f7d..b1503b0ecdff 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -236,6 +236,8 @@ void perf_evsel__set_sample_id(struct perf_evsel *evsel,

int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter);
int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter);
+int perf_evsel__append_addr_filter(struct perf_evsel *evsel,
+ const char *filter);
int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
const char *filter);
int perf_evsel__enable(struct perf_evsel *evsel);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 16bf09cc3e8d..33546c3ac1fe 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1760,20 +1760,49 @@ foreach_evsel_in_last_glob(struct perf_evlist *evlist,
static int set_filter(struct perf_evsel *evsel, const void *arg)
{
const char *str = arg;
+ bool found = false;
+ int nr_addr_filters = 0;
+ struct perf_pmu *pmu = NULL;

- if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) {
- fprintf(stderr,
- "--filter option should follow a -e tracepoint option\n");
- return -1;
+ if (evsel == NULL)
+ goto err;
+
+ if (evsel->attr.type == PERF_TYPE_TRACEPOINT) {
+ if (perf_evsel__append_tp_filter(evsel, str) < 0) {
+ fprintf(stderr,
+ "not enough memory to hold filter string\n");
+ return -1;
+ }
+
+ return 0;
}

- if (perf_evsel__append_tp_filter(evsel, str) < 0) {
+ while ((pmu = perf_pmu__scan(pmu)) != NULL)
+ if (pmu->type == evsel->attr.type) {
+ found = true;
+ break;
+ }
+
+ if (found)
+ perf_pmu__scan_file(pmu, "nr_addr_filters",
+ "%d", &nr_addr_filters);
+
+ if (!nr_addr_filters)
+ goto err;
+
+ if (perf_evsel__append_addr_filter(evsel, str) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
return -1;
}

return 0;
+
+err:
+ fprintf(stderr,
+ "--filter option should follow a -e tracepoint or HW tracer option\n");
+
+ return -1;
}

int parse_filter(const struct option *opt, const char *str,
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:50:07 AM9/29/16
to
From: Adrian Hunter <adrian...@intel.com>

Add a helper function 'intel_pt_has()' to make it easier to determine
which members the AUXTRACE_INFO event contains.

Signed-off-by: Adrian Hunter <adrian...@intel.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Mathieu Poirier <mathieu...@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-13-git-...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/intel-pt.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 96519e801e53..f16b00f55a19 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -2037,6 +2037,12 @@ static void intel_pt_print_info(u64 *arr, int start, int finish)
fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
}

+static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
+{
+ return auxtrace_info->header.size >=
+ sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
+}
+
int intel_pt_process_auxtrace_info(union perf_event *event,
struct perf_session *session)
{
@@ -2077,8 +2083,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
INTEL_PT_PER_CPU_MMAPS);

- if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
- (sizeof(u64) * INTEL_PT_CYC_BIT)) {
+ if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
@@ -2088,8 +2093,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
INTEL_PT_CYC_BIT);
}

- if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
- (sizeof(u64) * INTEL_PT_MAX_NONTURBO_RATIO)) {
+ if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
pt->max_non_turbo_ratio =
auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
intel_pt_print_info(&auxtrace_info->priv[0],
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:50:08 AM9/29/16
to
From: Adrian Hunter <adrian...@intel.com>

Fix occasional decoder errors decoding trace data collected in snapshot
mode.

Snapshot mode can take successive snapshots of trace which might overlap.
The decoder checks whether there is an overlap but only looks at the
current and previous buffer. However buffers that do not contain
synchronization (i.e. PSB) packets cannot be decoded or used for overlap
checking. That means the decoder actually needs to check overlaps between
the current buffer and the previous buffer that contained usable data.
Make that change.

Signed-off-by: Adrian Hunter <adrian...@intel.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Mathieu Poirier <mathieu...@linaro.org>
Cc: sta...@vger.kernel.org # v4.3+
Link: http://lkml.kernel.org/r/1474641528-18776-10-git-...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/intel-pt.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index b9cc353cace2..b744ea812a2e 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -241,7 +241,7 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
}

queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
-
+next:
buffer = auxtrace_buffer__next(queue, buffer);
if (!buffer) {
if (old_buffer)
@@ -264,9 +264,6 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
return -ENOMEM;

- if (old_buffer)
- auxtrace_buffer__drop_data(old_buffer);
-
if (buffer->use_data) {
b->len = buffer->use_size;
b->buf = buffer->use_data;
@@ -276,6 +273,16 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
}
b->ref_timestamp = buffer->reference;

+ /*
+ * If in snapshot mode and the buffer has no usable data, get next
+ * buffer and again check overlap against old_buffer.
+ */
+ if (ptq->pt->snapshot_mode && !b->len)
+ goto next;
+
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+
if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
!buffer->consecutive)) {
b->consecutive = false;
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:50:12 AM9/29/16
to
From: Masami Hiramatsu <mhir...@kernel.org>

Cut off the characters which can not use for group name of uprobes
when making it based on executable filename.

For example, if the exec name is libstdc++.so, without this fix
perf probe generates "probe_libstdc++" as the group name, but
it is failed to set because '+' can not be used for group name.

With this fix perf accepts only alphabet, number or '_' for group
name, thus perf generates "probe_libstdc" as the group name.

E.g. with this fix, you can see the event name has no "+".
----
$ ./perf probe -x /usr/lib64/libstdc++.so.6 -D is_open
p:probe_libstdc/is_open /usr/lib64/libstdc++.so.6.0.22:0x8ca80
p:probe_libstdc/is_open_1 /usr/lib64/libstdc++.so.6.0.22:0x8ca70
p:probe_libstdc/is_open_2 /usr/lib64/libstdc++.so.6.0.22:0x8ca60
p:probe_libstdc/is_open_3 /usr/lib64/libstdc++.so.6.0.22:0xb0ad0
p:probe_libstdc/is_open_4 /usr/lib64/libstdc++.so.6.0.22:0xecca9
----

Committer note:

Before this fix:

# perf probe -x /usr/lib64/libstdc++.so.6 is_open
Failed to write event: Invalid argument
Error: Failed to add events.
#

After the fix:

# perf probe -x /usr/lib64/libstdc++.so.6 is_open
Added new events:
probe_libstdc:is_open (on is_open in /usr/lib64/libstdc++.so.6.0.22)
probe_libstdc:is_open_1 (on is_open in /usr/lib64/libstdc++.so.6.0.22)
probe_libstdc:is_open_2 (on is_open in /usr/lib64/libstdc++.so.6.0.22)
probe_libstdc:is_open_3 (on is_open in /usr/lib64/libstdc++.so.6.0.22)
probe_libstdc:is_open_4 (on is_open in /usr/lib64/libstdc++.so.6.0.22)

You can now use it in all perf tools, such as:

perf record -e probe_libstdc:is_open_4 -aR sleep 1

# perf probe -l probe_libstdc:*
probe_libstdc:is_open (on is_open@libstdc++-v3/include/fstream in /usr/lib64/libstdc++.so.6.0.22)
probe_libstdc:is_open_1 (on is_open@libstdc++-v3/include/fstream in /usr/lib64/libstdc++.so.6.0.22)
probe_libstdc:is_open_2 (on is_open@libstdc++-v3/include/fstream in /usr/lib64/libstdc++.so.6.0.22)
probe_libstdc:is_open_3 (on is_open@src/c++98/basic_file.cc in /usr/lib64/libstdc++.so.6.0.22)
probe_libstdc:is_open_4 (on stdio_filebuf:5@include/ext/stdio_filebuf.h in /usr/lib64/libstdc++.so.6.0.22)
#

Signed-off-by: Masami Hiramatsu <mhir...@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Tested-by: Jiri Olsa <jo...@kernel.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Thomas Gleixner <tg...@linutronix.de>
Link: http://lkml.kernel.org/r/147464491667.29804.9553638175441827970.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/probe-event.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index bc60ce49720b..fcfbef07b92d 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -213,9 +213,13 @@ static int convert_exec_to_group(const char *exec, char **result)
goto out;
}

- ptr2 = strpbrk(ptr1, "-._");
- if (ptr2)
- *ptr2 = '\0';
+ for (ptr2 = ptr1; ptr2 != '\0'; ptr2++) {
+ if (!isalnum(*ptr2) && *ptr2 != '_') {
+ *ptr2 = '\0';
+ break;
+ }
+ }
+
ret = e_snprintf(buf, 64, "%s_%s", PERFPROBE_GROUP, ptr1);
if (ret < 0)
goto out;
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Sep 29, 2016, 10:50:12 AM9/29/16
to
From: Adrian Hunter <adrian...@intel.com>

Add a function to find the last symbol in a DSO. This will be used when
parsing address filters to calculate a region that includes the entire
DSO.

Signed-off-by: Adrian Hunter <adrian...@intel.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Mathieu Poirier <mathieu...@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-7-git-...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/symbol.c | 15 +++++++++++++++
tools/perf/util/symbol.h | 1 +
2 files changed, 16 insertions(+)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 19c9c558454f..aecff69a510d 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -345,6 +345,16 @@ static struct symbol *symbols__first(struct rb_root *symbols)
return NULL;
}

+static struct symbol *symbols__last(struct rb_root *symbols)
+{
+ struct rb_node *n = rb_last(symbols);
+
+ if (n)
+ return rb_entry(n, struct symbol, rb_node);
+
+ return NULL;
+}
+
static struct symbol *symbols__next(struct symbol *sym)
{
struct rb_node *n = rb_next(&sym->rb_node);
@@ -466,6 +476,11 @@ struct symbol *dso__first_symbol(struct dso *dso, enum map_type type)
return symbols__first(&dso->symbols[type]);
}

+struct symbol *dso__last_symbol(struct dso *dso, enum map_type type)
+{
+ return symbols__last(&dso->symbols[type]);
+}
+
struct symbol *dso__next_symbol(struct symbol *sym)
{
return symbols__next(sym);
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 0dacfb7d5b67..d964844eb314 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -259,6 +259,7 @@ struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
struct symbol *symbol__next_by_name(struct symbol *sym);

struct symbol *dso__first_symbol(struct dso *dso, enum map_type type);
+struct symbol *dso__last_symbol(struct dso *dso, enum map_type type);
struct symbol *dso__next_symbol(struct symbol *sym);

enum dso_type dso__type_fd(int fd);
--
2.7.4

Ingo Molnar

unread,
Sep 29, 2016, 1:20:05 PM9/29/16
to

* Arnaldo Carvalho de Melo <ac...@kernel.org> wrote:

tip-bot for Simon Que

unread,
Sep 29, 2016, 2:20:08 PM9/29/16
to
Commit-ID: 2acad19500c28ce0c4dc3f9bf1dcfc82040b6531
Gitweb: http://git.kernel.org/tip/2acad19500c28ce0c4dc3f9bf1dcfc82040b6531
Author: Simon Que <sq...@chromium.org>
AuthorDate: Wed, 28 Sep 2016 11:37:53 -0700
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Thu, 29 Sep 2016 11:16:44 -0300

perf tools: Update documentation info about quipper

The existing link is outdated. The most recent quipper code can be found at the
new URL.

Committer notes:

Quipper is a C++ parser that can be used to convert from a perf.data
file to and from a protobuf, a Chromium OS facility.

Signed-off-by: Simon Que <sq...@chromium.org>
Acked-by: Andi Kleen <a...@linux.intel.com>
Cc: Adrian Hunter <adrian...@intel.com>
Cc: Chong Jiang <chong...@chromium.org>
Link: http://lkml.kernel.org/n/tip-4q1nm7jl3v...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/Documentation/perf.data-file-format.txt | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index fdc99fe..b664b18 100644

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:40:06 PM10/3/16
to
From: Andi Kleen <a...@linux.intel.com>

Add a --no-desc flag to 'perf list' to not print the event descriptions
that were earlier added for JSON events. This may be useful to get a
less crowded listing.

It's still default to print descriptions as that is the more useful
default for most users.

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mi...@kernel.org>
Acked-by: Jiri Olsa <jo...@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: linuxp...@lists.ozlabs.org
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Link: http://lkml.kernel.org/r/1473978296-20712-9-g...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/Documentation/perf-list.txt | 8 +++++++-
tools/perf/builtin-list.c | 14 +++++++++-----
tools/perf/util/parse-events.c | 4 ++--
tools/perf/util/parse-events.h | 2 +-
tools/perf/util/pmu.c | 4 ++--
tools/perf/util/pmu.h | 2 +-
6 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index a126e97a8114..72209bc0f523 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -8,13 +8,19 @@ perf-list - List all symbolic event types
SYNOPSIS
--------
[verse]
-'perf list' [hw|sw|cache|tracepoint|pmu|event_glob]
+'perf list' [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob]

DESCRIPTION
-----------
This command displays the symbolic event types which can be selected in the
various perf commands with the -e option.

+OPTIONS
+-------
+--no-desc::
+Don't print descriptions.
+
+
[[EVENT_MODIFIERS]]
EVENT MODIFIERS
---------------
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 88ee419e5189..b14cb162f841 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -16,16 +16,20 @@
#include "util/pmu.h"
#include <subcmd/parse-options.h>

+static bool desc_flag = true;
+
int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
{
int i;
bool raw_dump = false;
struct option list_options[] = {
OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"),
+ OPT_BOOLEAN('d', "desc", &desc_flag,
+ "Print extra event descriptions. --no-desc to not print."),
OPT_END()
};
const char * const list_usage[] = {
- "perf list [hw|sw|cache|tracepoint|pmu|sdt|event_glob]",
+ "perf list [--no-desc] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]",
NULL
};

@@ -40,7 +44,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
printf("\nList of pre-defined events (to be used in -e):\n\n");

if (argc == 0) {
- print_events(NULL, raw_dump);
+ print_events(NULL, raw_dump, !desc_flag);
return 0;
}

@@ -61,14 +65,14 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
strcmp(argv[i], "hwcache") == 0)
print_hwcache_events(NULL, raw_dump);
else if (strcmp(argv[i], "pmu") == 0)
- print_pmu_events(NULL, raw_dump);
+ print_pmu_events(NULL, raw_dump, !desc_flag);
else if (strcmp(argv[i], "sdt") == 0)
print_sdt_events(NULL, NULL, raw_dump);
else if ((sep = strchr(argv[i], ':')) != NULL) {
int sep_idx;

if (sep == NULL) {
- print_events(argv[i], raw_dump);
+ print_events(argv[i], raw_dump, !desc_flag);
continue;
}
sep_idx = sep - argv[i];
@@ -90,7 +94,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
print_symbol_events(s, PERF_TYPE_SOFTWARE,
event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
print_hwcache_events(s, raw_dump);
- print_pmu_events(s, raw_dump);
+ print_pmu_events(s, raw_dump, !desc_flag);
print_tracepoint_events(NULL, s, raw_dump);
print_sdt_events(NULL, s, raw_dump);
free(s);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 33546c3ac1fe..3966ad79ee8d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2263,7 +2263,7 @@ out_enomem:
/*
* Print the help text for the event symbols:
*/
-void print_events(const char *event_glob, bool name_only)
+void print_events(const char *event_glob, bool name_only, bool quiet_flag)
{
print_symbol_events(event_glob, PERF_TYPE_HARDWARE,
event_symbols_hw, PERF_COUNT_HW_MAX, name_only);
@@ -2273,7 +2273,7 @@ void print_events(const char *event_glob, bool name_only)

print_hwcache_events(event_glob, name_only);

- print_pmu_events(event_glob, name_only);
+ print_pmu_events(event_glob, name_only, quiet_flag);

if (event_glob != NULL)
return;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 8d09a976fca8..3bf376b42bf9 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -172,7 +172,7 @@ void parse_events_update_lists(struct list_head *list_event,
void parse_events_evlist_error(struct parse_events_evlist *data,
int idx, const char *str);

-void print_events(const char *event_glob, bool name_only);
+void print_events(const char *event_glob, bool name_only, bool quiet);

struct event_symbol {
const char *symbol;
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 7b46e772f5f9..9dc3506d23fc 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1084,7 +1084,7 @@ static void wordwrap(char *s, int start, int max, int corr)
}
}

-void print_pmu_events(const char *event_glob, bool name_only)
+void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag)
{
struct perf_pmu *pmu;
struct perf_pmu_alias *alias;
@@ -1151,7 +1151,7 @@ void print_pmu_events(const char *event_glob, bool name_only)
printf("%s ", aliases[j].name);
continue;
}
- if (aliases[j].desc) {
+ if (aliases[j].desc && !quiet_flag) {
if (numdesc++ == 0)
printf("\n");
printf(" %-50s\n", aliases[j].name);
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 51d8d0d35e63..7b47192e03b5 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -72,7 +72,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head);

struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);

-void print_pmu_events(const char *event_glob, bool name_only);
+void print_pmu_events(const char *event_glob, bool name_only, bool quiet);
bool pmu_have_event(const char *pname, const char *name);

int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:40:06 PM10/3/16
to
From: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>

Implement support in jevents to parse long descriptions for events that
may have them in the JSON files. A follow on patch will make this long
description available to user through the 'perf list' command.

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mi...@kernel.org>
Acked-by: Jiri Olsa <jo...@redhat.com>
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linuxp...@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1473978296-20712-11-g...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/pmu-events/jevents.c | 32 ++++++++++++++++++++++++--------
tools/perf/pmu-events/jevents.h | 3 ++-
tools/perf/pmu-events/pmu-events.h | 1 +
3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index c9bf9a7dc7b2..13f4284721d5 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -269,7 +269,7 @@ static void print_events_table_prefix(FILE *fp, const char *tblname)
}

static int print_events_table_entry(void *data, char *name, char *event,
- char *desc)
+ char *desc, char *long_desc)
{
struct perf_entry_data *pd = data;
FILE *outfp = pd->outfp;
@@ -285,6 +285,8 @@ static int print_events_table_entry(void *data, char *name, char *event,
fprintf(outfp, "\t.event = \"%s\",\n", event);
fprintf(outfp, "\t.desc = \"%s\",\n", desc);
fprintf(outfp, "\t.topic = \"%s\",\n", topic);
+ if (long_desc && long_desc[0])
+ fprintf(outfp, "\t.long_desc = \"%s\",\n", long_desc);

fprintf(outfp, "},\n");

@@ -306,7 +308,8 @@ static void print_events_table_suffix(FILE *outfp)

/* Call func with each event in the json file */
int json_events(const char *fn,
- int (*func)(void *data, char *name, char *event, char *desc),
+ int (*func)(void *data, char *name, char *event, char *desc,
+ char *long_desc),
void *data)
{
int err = -EIO;
@@ -325,6 +328,8 @@ int json_events(const char *fn,
tok = tokens + 1;
for (i = 0; i < tokens->size; i++) {
char *event = NULL, *desc = NULL, *name = NULL;
+ char *long_desc = NULL;
+ char *extra_desc = NULL;
struct msrmap *msr = NULL;
jsmntok_t *msrval = NULL;
jsmntok_t *precise = NULL;
@@ -350,6 +355,10 @@ int json_events(const char *fn,
} else if (json_streq(map, field, "BriefDescription")) {
addfield(map, &desc, "", "", val);
fixdesc(desc);
+ } else if (json_streq(map, field,
+ "PublicDescription")) {
+ addfield(map, &long_desc, "", "", val);
+ fixdesc(long_desc);
} else if (json_streq(map, field, "PEBS") && nz) {
precise = val;
} else if (json_streq(map, field, "MSRIndex") && nz) {
@@ -358,10 +367,10 @@ int json_events(const char *fn,
msrval = val;
} else if (json_streq(map, field, "Errata") &&
!json_streq(map, val, "null")) {
- addfield(map, &desc, ". ",
+ addfield(map, &extra_desc, ". ",
" Spec update: ", val);
} else if (json_streq(map, field, "Data_LA") && nz) {
- addfield(map, &desc, ". ",
+ addfield(map, &extra_desc, ". ",
" Supports address when precise",
NULL);
}
@@ -369,19 +378,26 @@ int json_events(const char *fn,
}
if (precise && desc && !strstr(desc, "(Precise Event)")) {
if (json_streq(map, precise, "2"))
- addfield(map, &desc, " ", "(Must be precise)",
- NULL);
+ addfield(map, &extra_desc, " ",
+ "(Must be precise)", NULL);
else
- addfield(map, &desc, " ",
+ addfield(map, &extra_desc, " ",
"(Precise event)", NULL);
}
+ if (desc && extra_desc)
+ addfield(map, &desc, " ", extra_desc, NULL);
+ if (long_desc && extra_desc)
+ addfield(map, &long_desc, " ", extra_desc, NULL);
if (msr != NULL)
addfield(map, &event, ",", msr->pname, msrval);
fixname(name);
- err = func(data, name, event, desc);
+
+ err = func(data, name, event, desc, long_desc);
free(event);
free(desc);
free(name);
+ free(long_desc);
+ free(extra_desc);
if (err)
break;
tok += j;
diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h
index 996601f828b6..b0eb2744b498 100644
--- a/tools/perf/pmu-events/jevents.h
+++ b/tools/perf/pmu-events/jevents.h
@@ -2,7 +2,8 @@
#define JEVENTS_H 1

int json_events(const char *fn,
- int (*func)(void *data, char *name, char *event, char *desc),
+ int (*func)(void *data, char *name, char *event, char *desc,
+ char *long_desc),
void *data);
char *get_cpu_str(void);

diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index 70d54794e3cb..2eaef595d8a0 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -9,6 +9,7 @@ struct pmu_event {
const char *event;
const char *desc;
const char *topic;
+ const char *long_desc;
};

/*
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:40:07 PM10/3/16
to
From: Colin Ian King <colin...@canonical.com>

Static anaylsis with cppcheck[1] detected an incorrect comparison:
[tools/perf/util/probe-event.c:216]: (warning) Char literal compared
with pointer 'ptr2'. Did you intend to dereference it?

Dereference ptr2 for the comparison to fix this.

1: https://sourceforge.net/p/cppcheck/wiki/Home/

Signed-off-by: Colin King <colin...@canonical.com>
Cc: Alexander Shishkin <alexander...@linux.intel.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Ravi Bangoria <ravi.b...@linux.vnet.ibm.com>
Cc: Wang Nan <wang...@huawei.com>
Fixes: 35726d3a4ca9 ("perf probe: Fix to cut off incompatible chars from group name")
Link: http://lkml.kernel.org/r/20161003103431.1...@canonical.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/probe-event.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index fcfbef07b92d..d281ae2b54e8 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -213,7 +213,7 @@ static int convert_exec_to_group(const char *exec, char **result)
goto out;
}

- for (ptr2 = ptr1; ptr2 != '\0'; ptr2++) {
+ for (ptr2 = ptr1; *ptr2 != '\0'; ptr2++) {
if (!isalnum(*ptr2) && *ptr2 != '_') {
*ptr2 = '\0';
break;
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:40:08 PM10/3/16
to
Hi Ingo,

Please consider pulling,

- Arnaldo

Build and test stats at the end of the message.

The following changes since commit 41aad2a6d4fcdda8d73c9739daf7a9f3f49499d6:

Merge tag 'perf-core-for-mingo-20160929' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core (2016-09-29 19:09:58 +0200)

are available in the git repository at:

git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git tags/perf-core-for-mingo-20161003

for you to fetch changes up to b42c7369e3f451e22c2b0be5d193955498d37546:

perf pmu-events: Add Skylake frontend MSR support (2016-10-03 21:52:01 -0300)

----------------------------------------------------------------
perf/core improvements and fixes:

- Allow vendors to provide JSON files describing PMU events, that then
get parsed to generate C tables that are linked against perf, allowing
the use of the names in their documentations, such as:

# perf list l1d

List of pre-defined events (to be used in -e):

Cache:
l1d.replacement
[L1D data line replacements]
l1d_pend_miss.fb_full
[Cycles a demand request was blocked due to Fill Buffers inavailability]
l1d_pend_miss.pending
[L1D miss oustandings duration in cycles]
l1d_pend_miss.pending_cycles
[Cycles with L1D load Misses outstanding]
l1d_pend_miss.pending_cycles_any
[Cycles with L1D load Misses outstanding from any thread on physical core]
l2_trans.l1d_wb
[L1D writebacks that access L2 cache]

Pipeline:
cycle_activity.cycles_l1d_miss
[Cycles while L1 cache miss demand load is outstanding]
cycle_activity.cycles_l1d_pending
[Cycles while L1 cache miss demand load is outstanding]
cycle_activity.stalls_l1d_miss
[Execution stalls while L1 cache miss demand load is outstanding]
cycle_activity.stalls_l1d_pending
[Execution stalls while L1 cache miss demand load is outstanding]

The above example was done on a Broadwell based ThinkPad t450s after
downloading and installing such JSON files which will be added to the
tools/perf/pmu-events/ directory in a subsequent patchkit.

Now one can use those names with -e/--event in all 'perf tools'.
(Andi Kleen, Sukadev Bhattiprolu)

- Add a missing pointer dereference in 'perf probe' (Colin Ian King)

- Add support for building host programs to be used in generating files
to be used in the build process, such as fixdep and jevents, fixing
the usage of these features in a cross compilation setup (Jiri Olsa)

Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>

----------------------------------------------------------------
Andi Kleen (12):
perf tools: Add jsmn `jasmine' JSON parser
perf jevents: Program to convert JSON file
perf tools: Support CPU id matching for x86 v2
perf jevents: Handle header line in mapfile
perf pmu: Support alias descriptions
perf tools: Query terminal width and use in perf list
perf list: Add a --no-desc flag
perf pmu: Add override support for event list CPUID
perf list jevents: Add support for event list topics
perf tools: Make alias matching case-insensitive
perf pmu-events: Fix fixed counters on Intel
perf pmu-events: Add Skylake frontend MSR support

Arnaldo Carvalho de Melo (1):
perf tools: Experiment with cppcheck

Colin Ian King (1):
perf probe: Check if *ptr2 is zero and not ptr2

Jiri Olsa (2):
tools build: Add support for host programs format
tools build: Make fixdep a hostprog

Sukadev Bhattiprolu (6):
perf pmu: Use pmu_events table to create aliases
perf powerpc: Support CPU ID matching for Powerpc
perf jevents: Add support for long descriptions
perf list: Support long jevents descriptions
perf tools: Add README for info on parsing JSON/map files
perf tools: Allow period= in perf stat CPU event descriptions.

tools/build/Build | 2 +
tools/build/Build.include | 5 +
tools/build/Makefile | 8 +-
tools/build/Makefile.build | 19 +-
tools/build/Makefile.include | 4 -
tools/lib/subcmd/pager.c | 16 +
tools/lib/subcmd/pager.h | 1 +
tools/perf/Documentation/perf-list.txt | 12 +-
tools/perf/Makefile.perf | 34 +-
tools/perf/arch/powerpc/util/header.c | 11 +
tools/perf/arch/x86/util/header.c | 24 +-
tools/perf/builtin-list.c | 20 +-
tools/perf/pmu-events/Build | 13 +
tools/perf/pmu-events/README | 147 ++++++
tools/perf/pmu-events/jevents.c | 812 +++++++++++++++++++++++++++++++++
tools/perf/pmu-events/jevents.h | 18 +
tools/perf/pmu-events/jsmn.c | 313 +++++++++++++
tools/perf/pmu-events/jsmn.h | 67 +++
tools/perf/pmu-events/json.c | 162 +++++++
tools/perf/pmu-events/json.h | 38 ++
tools/perf/pmu-events/pmu-events.h | 37 ++
tools/perf/util/evlist.c | 12 +-
tools/perf/util/evsel.c | 3 +-
tools/perf/util/header.h | 1 +
tools/perf/util/machine.c | 6 +-
tools/perf/util/parse-events.c | 8 +-
tools/perf/util/parse-events.h | 3 +-
tools/perf/util/pmu.c | 176 ++++++-
tools/perf/util/pmu.h | 6 +-
tools/perf/util/probe-event.c | 2 +-
tools/perf/util/strbuf.h | 3 +-
tools/perf/util/thread.c | 9 +-
32 files changed, 1926 insertions(+), 66 deletions(-)
create mode 100644 tools/perf/pmu-events/Build
create mode 100644 tools/perf/pmu-events/README
create mode 100644 tools/perf/pmu-events/jevents.c
create mode 100644 tools/perf/pmu-events/jevents.h
create mode 100644 tools/perf/pmu-events/jsmn.c
create mode 100644 tools/perf/pmu-events/jsmn.h
create mode 100644 tools/perf/pmu-events/json.c
create mode 100644 tools/perf/pmu-events/json.h
create mode 100644 tools/perf/pmu-events/pmu-events.h
real 33m23.855s
user 0m2.128s
sys 0m2.305s
- tarpkg: ./tests/perf-targz-src-pkg .
make_no_libperl_O: make NO_LIBPERL=1
make_no_newt_O: make NO_NEWT=1
make_no_slang_O: make NO_SLANG=1
make_no_libnuma_O: make NO_LIBNUMA=1
make_with_babeltrace_O: make LIBBABELTRACE=1
make_install_prefix_slash_O: make install prefix=/tmp/krava/
make_no_libbpf_O: make NO_LIBBPF=1
make_no_demangle_O: make NO_DEMANGLE=1
make_install_bin_O: make install-bin
make_install_O: make install
make_no_libbionic_O: make NO_LIBBIONIC=1
make_no_libunwind_O: make NO_LIBUNWIND=1
make_no_libaudit_O: make NO_LIBAUDIT=1
make_no_libpython_O: make NO_LIBPYTHON=1
make_tags_O: make tags
make_perf_o_O: make perf.o
make_static_O: make LDFLAGS=-static
make_clean_all_O: make clean all
make_install_prefix_O: make install prefix=/tmp/krava
make_help_O: make help
make_util_map_o_O: make util/map.o
make_no_ui_O: make NO_NEWT=1 NO_SLANG=1 NO_GTK2=1
make_no_libdw_dwarf_unwind_O: make NO_LIBDW_DWARF_UNWIND=1
make_minimal_O: make NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 NO_LIBCRYPTO=1 NO_SDT=1
make_no_backtrace_O: make NO_BACKTRACE=1
make_debug_O: make DEBUG=1
make_no_libelf_O: make NO_LIBELF=1
make_no_scripts_O: make NO_LIBPYTHON=1 NO_LIBPERL=1
make_no_auxtrace_O: make NO_AUXTRACE=1
make_doc_O: make doc
make_util_pmu_bison_o_O: make util/pmu-bison.o
make_no_gtk2_O: make NO_GTK2=1
OK

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:40:08 PM10/3/16
to
From: Jiri Olsa <jo...@redhat.com>

In some cases, like for fixdep and shortly for jevents, we need to build a tool
to run on the host that will be used in building a tool, such as perf, that is
being cross compiled, so do like the kernel and provide HOSTCC, HOSTLD and HOSTAR
to do that.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Requested-by: Andi Kleen <an...@firstfloor.org>
Requested-and-Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Cc: linuxp...@lists.ozlabs.org
Link: http://lkml.kernel.org/r/20160927141846.GA6589@krava
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/build/Build.include | 5 +++++
tools/build/Makefile | 6 ++++++
tools/build/Makefile.build | 19 +++++++++++++++----
3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/tools/build/Build.include b/tools/build/Build.include
index 4d000bc959b4..02489380d79b 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -90,3 +90,8 @@ if_changed = $(if $(strip $(any-prereq) $(arg-check)), \
# - per object C flags
# - BUILD_STR macro to allow '-D"$(variable)"' constructs
c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj))
+
+###
+## HOSTCC C flags
+
+host_c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CHOSTFLAGS) -D"BUILD_STR(s)=\#s" $(CHOSTFLAGS_$(basetarget).o) $(CHOSTFLAGS_$(obj))
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 0d5a0e3a8fa9..653faee2a055 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -14,6 +14,12 @@ endef
$(call allow-override,CC,$(CROSS_COMPILE)gcc)
$(call allow-override,LD,$(CROSS_COMPILE)ld)

+HOSTCC ?= gcc
+HOSTLD ?= ld
+HOSTAR ?= ar
+
+export HOSTCC HOSTLD HOSTAR
+
ifeq ($(V),1)
Q =
else
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index 27f3583193e6..190519a94ce5 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -58,6 +58,9 @@ quiet_cmd_mkdir = MKDIR $(dir $@)
quiet_cmd_cc_o_c = CC $@
cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<

+quiet_cmd_host_cc_o_c = HOSTCC $@
+ cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $<
+
quiet_cmd_cpp_i_c = CPP $@
cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $<

@@ -70,16 +73,24 @@ quiet_cmd_gen = GEN $@
# If there's nothing to link, create empty $@ object.
quiet_cmd_ld_multi = LD $@
cmd_ld_multi = $(if $(strip $(obj-y)),\
- $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@)
+ $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@)
+
+quiet_cmd_host_ld_multi = HOSTLD $@
+ cmd_host_ld_multi = $(if $(strip $(obj-y)),\
+ $(HOSTLD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(HOSTAR) rcs $@)
+
+ifneq ($(filter $(obj),$(hostprogs)),)
+ host = host_
+endif

# Build rules
$(OUTPUT)%.o: %.c FORCE
$(call rule_mkdir)
- $(call if_changed_dep,cc_o_c)
+ $(call if_changed_dep,$(host)cc_o_c)

$(OUTPUT)%.o: %.S FORCE
$(call rule_mkdir)
- $(call if_changed_dep,cc_o_c)
+ $(call if_changed_dep,$(host)cc_o_c)

$(OUTPUT)%.i: %.c FORCE
$(call rule_mkdir)
@@ -119,7 +130,7 @@ $(sort $(subdir-obj-y)): $(subdir-y) ;

$(in-target): $(obj-y) FORCE
$(call rule_mkdir)
- $(call if_changed,ld_multi)
+ $(call if_changed,$(host)ld_multi)

__build: $(in-target)
@:
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:50:05 PM10/3/16
to
From: Andi Kleen <a...@linux.intel.com>

Add a PERF_CPUID variable to override the CPUID of the current CPU
(within the current architecture). This is useful for testing, so that
all event lists can be tested on a single system.

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mi...@kernel.org>
Acked-by: Jiri Olsa <jo...@redhat.com>
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linuxp...@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1473978296-20712-10-g...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/pmu.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 9dc3506d23fc..79242cf9bb79 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -501,10 +501,16 @@ static void pmu_add_cpu_aliases(struct list_head *head)
struct pmu_event *pe;
char *cpuid;

- cpuid = get_cpuid_str();
+ cpuid = getenv("PERF_CPUID");
+ if (cpuid)
+ cpuid = strdup(cpuid);
+ if (!cpuid)
+ cpuid = get_cpuid_str();
if (!cpuid)
return;

+ pr_debug("Using CPUID %s\n", cpuid);
+
i = 0;
while (1) {
map = &pmu_events_map[i++];
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:50:05 PM10/3/16
to
From: Andi Kleen <a...@linux.intel.com>

Add support to print alias descriptions in perf list, which are taken
from the generated event files.

The sorting code is changed to put the events with descriptions at the
end. The descriptions are printed as possibly multiple word wrapped
lines.

Example output:

% perf list
...
arith.fpu_div
[Divide operations executed]
arith.fpu_div_active
[Cycles when divider is busy executing divide operations]

Committer notes:

Further testing on a Broadwell machine (ThinkPad t450s), using these
files:

$ find tools/perf/pmu-events/arch/x86/
tools/perf/pmu-events/arch/x86/
tools/perf/pmu-events/arch/x86/Broadwell
tools/perf/pmu-events/arch/x86/Broadwell/Cache.json
tools/perf/pmu-events/arch/x86/Broadwell/Other.json
tools/perf/pmu-events/arch/x86/Broadwell/Frontend.json
tools/perf/pmu-events/arch/x86/Broadwell/Virtual-Memory.json
tools/perf/pmu-events/arch/x86/Broadwell/Pipeline.json
tools/perf/pmu-events/arch/x86/Broadwell/Floating-point.json
tools/perf/pmu-events/arch/x86/Broadwell/Memory.json
tools/perf/pmu-events/arch/x86/mapfile.csv
$

Taken from:

https://github.com/sukadev/linux/tree/json-code+data-v21/tools/perf/pmu-events/arch/x86/

to get this machinery to actually parse JSON files, generate
$(OUTPUT)pmu-events/pmu-events.c, compile it and link it with perf, that
will then use the table it contains, these files will be submitted right
after this patchkit.

[acme@jouet linux]$ perf list page_walker

List of pre-defined events (to be used in -e):

page_walker_loads.dtlb_l1
[Number of DTLB page walker hits in the L1+FB]
page_walker_loads.dtlb_l2
[Number of DTLB page walker hits in the L2]
page_walker_loads.dtlb_l3
[Number of DTLB page walker hits in the L3 + XSNP]
page_walker_loads.dtlb_memory
[Number of DTLB page walker hits in Memory]
page_walker_loads.itlb_l1
[Number of ITLB page walker hits in the L1+FB]
page_walker_loads.itlb_l2
[Number of ITLB page walker hits in the L2]
page_walker_loads.itlb_l3
[Number of ITLB page walker hits in the L3 + XSNP]

[acme@jouet linux]$

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mi...@kernel.org>
Acked-by: Jiri Olsa <jo...@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linuxp...@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1473978296-20712-7-g...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/pmu.c | 83 +++++++++++++++++++++++++++++++++++++++++----------
tools/perf/util/pmu.h | 1 +
2 files changed, 68 insertions(+), 16 deletions(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 10668b7f5272..9857fb14ea86 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -222,7 +222,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias,
}

static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
- char *desc __maybe_unused, char *val)
+ char *desc, char *val)
{
struct perf_pmu_alias *alias;
int ret;
@@ -255,6 +255,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
perf_pmu__parse_snapshot(alias, dir, name);
}

+ alias->desc = desc ? strdup(desc) : NULL;
+
list_add_tail(&alias->list, list);

return 0;
@@ -1043,11 +1045,42 @@ static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu,
return buf;
}

-static int cmp_string(const void *a, const void *b)
+struct pair {
+ char *name;
+ char *desc;
+};
+
+static int cmp_pair(const void *a, const void *b)
+{
+ const struct pair *as = a;
+ const struct pair *bs = b;
+
+ /* Put extra events last */
+ if (!!as->desc != !!bs->desc)
+ return !!as->desc - !!bs->desc;
+ return strcmp(as->name, bs->name);
+}
+
+static void wordwrap(char *s, int start, int max, int corr)
{
- const char * const *as = a;
- const char * const *bs = b;
- return strcmp(*as, *bs);
+ int column = start;
+ int n;
+
+ while (*s) {
+ int wlen = strcspn(s, " \t");
+
+ if (column + wlen >= max && column > start) {
+ printf("\n%*s", start, "");
+ column = start + corr;
+ }
+ n = printf("%s%.*s", column > start ? " " : "", wlen, s);
+ if (n <= 0)
+ break;
+ s += wlen;
+ column += n;
+ while (isspace(*s))
+ s++;
+ }
}

void print_pmu_events(const char *event_glob, bool name_only)
@@ -1057,7 +1090,9 @@ void print_pmu_events(const char *event_glob, bool name_only)
char buf[1024];
int printed = 0;
int len, j;
- char **aliases;
+ struct pair *aliases;
+ int numdesc = 0;
+ int columns = 78;

pmu = NULL;
len = 0;
@@ -1067,14 +1102,15 @@ void print_pmu_events(const char *event_glob, bool name_only)
if (pmu->selectable)
len++;
}
- aliases = zalloc(sizeof(char *) * len);
+ aliases = zalloc(sizeof(struct pair) * len);
if (!aliases)
goto out_enomem;
pmu = NULL;
j = 0;
while ((pmu = perf_pmu__scan(pmu)) != NULL) {
list_for_each_entry(alias, &pmu->aliases, list) {
- char *name = format_alias(buf, sizeof(buf), pmu, alias);
+ char *name = alias->desc ? alias->name :
+ format_alias(buf, sizeof(buf), pmu, alias);
bool is_cpu = !strcmp(pmu->name, "cpu");

if (event_glob != NULL &&
@@ -1083,12 +1119,19 @@ void print_pmu_events(const char *event_glob, bool name_only)
event_glob))))
continue;

- if (is_cpu && !name_only)
+ if (is_cpu && !name_only && !alias->desc)
name = format_alias_or(buf, sizeof(buf), pmu, alias);

- aliases[j] = strdup(name);
- if (aliases[j] == NULL)
+ aliases[j].name = name;
+ if (is_cpu && !name_only && !alias->desc)
+ aliases[j].name = format_alias_or(buf,
+ sizeof(buf),
+ pmu, alias);
+ aliases[j].name = strdup(aliases[j].name);
+ if (!aliases[j].name)
goto out_enomem;
+
+ aliases[j].desc = alias->desc;
j++;
}
if (pmu->selectable &&
@@ -1096,25 +1139,33 @@ void print_pmu_events(const char *event_glob, bool name_only)
char *s;
if (asprintf(&s, "%s//", pmu->name) < 0)
goto out_enomem;
- aliases[j] = s;
+ aliases[j].name = s;
j++;
}
}
len = j;
- qsort(aliases, len, sizeof(char *), cmp_string);
+ qsort(aliases, len, sizeof(struct pair), cmp_pair);
for (j = 0; j < len; j++) {
if (name_only) {
- printf("%s ", aliases[j]);
+ printf("%s ", aliases[j].name);
continue;
}
- printf(" %-50s [Kernel PMU event]\n", aliases[j]);
+ if (aliases[j].desc) {
+ if (numdesc++ == 0)
+ printf("\n");
+ printf(" %-50s\n", aliases[j].name);
+ printf("%*s", 8, "[");
+ wordwrap(aliases[j].desc, 8, columns, 0);
+ printf("]\n");
+ } else
+ printf(" %-50s [Kernel PMU event]\n", aliases[j].name);
printed++;
}
if (printed && pager_in_use())
printf("\n");
out_free:
for (j = 0; j < len; j++)
- zfree(&aliases[j]);
+ zfree(&aliases[j].name);
zfree(&aliases);
return;

diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 743422ad900b..51d8d0d35e63 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -40,6 +40,7 @@ struct perf_pmu_info {

struct perf_pmu_alias {
char *name;
+ char *desc;
struct list_head terms; /* HEAD struct parse_events_term -> list */
struct list_head list; /* ELEM */
char unit[UNIT_MAX_LEN+1];
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:50:05 PM10/3/16
to
From: Andi Kleen <a...@linux.intel.com>

Implement the code to match CPU types to mapfile types for x86 based on
CPUID. This extends an existing similar function, but changes it to use
the x86 mapfile cpu description. This allows to resolve event lists
generated by jevents.

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mi...@kernel.org>
Acked-by: Jiri Olsa <jo...@redhat.com>
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linuxp...@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1473978296-20712-6-g...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/arch/x86/util/header.c | 24 +++++++++++++++++++++---
1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
index 146d12a1cec0..a74a48db26f5 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -19,8 +19,8 @@ cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
: "a" (op));
}

-int
-get_cpuid(char *buffer, size_t sz)
+static int
+__get_cpuid(char *buffer, size_t sz, const char *fmt)
{
unsigned int a, b, c, d, lvl;
int family = -1, model = -1, step = -1;
@@ -48,7 +48,7 @@ get_cpuid(char *buffer, size_t sz)
if (family >= 0x6)
model += ((a >> 16) & 0xf) << 4;
}
- nb = scnprintf(buffer, sz, "%s,%u,%u,%u$", vendor, family, model, step);
+ nb = scnprintf(buffer, sz, fmt, vendor, family, model, step);

/* look for end marker to ensure the entire data fit */
if (strchr(buffer, '$')) {
@@ -57,3 +57,21 @@ get_cpuid(char *buffer, size_t sz)
}
return -1;
}
+
+int
+get_cpuid(char *buffer, size_t sz)
+{
+ return __get_cpuid(buffer, sz, "%s,%u,%u,%u$");
+}
+
+char *
+get_cpuid_str(void)
+{
+ char *buf = malloc(128);
+
+ if (__get_cpuid(buf, 128, "%s-%u-%X$") < 0) {
+ free(buf);
+ return NULL;
+ }
+ return buf;
+}
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:50:05 PM10/3/16
to
From: Andi Kleen <a...@linux.intel.com>

This is a modified version of an earlier patch by Andi Kleen.

We expect architectures to create JSON files describing the performance
monitoring (PMU) events that each CPU model/family of the architecture
supports.

Following is an example of the JSON file entry for an x86 event:

[
...
{
"EventCode": "0x00",
"UMask": "0x01",
"EventName": "INST_RETIRED.ANY",
"BriefDescription": "Instructions retired from execution.",
"PublicDescription": "Instructions retired from execution.",
"Counter": "Fixed counter 1",
"CounterHTOff": "Fixed counter 1",
"SampleAfterValue": "2000003",
"SampleAfterValue": "2000003",
"MSRIndex": "0",
"MSRValue": "0",
"TakenAlone": "0",
"CounterMask": "0",
"Invert": "0",
"AnyThread": "0",
"EdgeDetect": "0",
"PEBS": "0",
"PRECISE_STORE": "0",
"Errata": "null",
"Offcore": "0"
},
...

]

All the PMU events supported by a CPU model/family must be grouped into
"topics" such as "Pipelining", "Floating-point", "Virtual-memory" etc.

All events belonging to a topic must be placed in a separate JSON file
(eg: "Pipelining.json") and all the topic JSON files for a CPU model must
be in a separate directory.

Eg: for the CPU model "Silvermont_core":

$ ls tools/perf/pmu-events/arch/x86/Silvermont_core
Floating-point.json
Memory.json
Other.json
Pipelining.json
Virtualmemory.json

Finally, to allow multiple CPU models to share a single set of JSON files,
architectures must provide a mapping between a model and its set of events:

$ grep Silvermont tools/perf/pmu-events/arch/x86/mapfile.csv
GenuineIntel-6-4D,V13,Silvermont_core,core
GenuineIntel-6-4C,V13,Silvermont_core,core

which maps each CPU, identified by [vendor, family, model, version, type]
to a directory of JSON files. Thus two (or more) CPU models support the
set of PMU events listed in the directory.

tools/perf/pmu-events/arch/x86/Silvermont_core/

Given this organization of files, the program, jevents:

- locates all JSON files for each CPU-model of the architecture,

- parses all JSON files for the CPU-model and generates a C-style
"PMU-events table" (pmu-events.c) for the model

- locates a mapfile for the architecture

- builds a global table, mapping each model of CPU to the corresponding
PMU-events table.

The 'pmu-events.c' is generated when building perf and added to libperf.a.
The global table pmu_events_map[] table in this pmu-events.c will be used
in perf in a follow-on patch.

If the architecture does not have any JSON files or there is an error in
processing them, an empty mapping file is created. This would allow the
build of perf to proceed even if we are not able to provide aliases for
events.

The parser for JSON files allows parsing Intel style JSON event files. This
allows to use an Intel event list directly with perf. The Intel event lists
can be quite large and are too big to store in unswappable kernel memory.

The conversion from JSON to C-style is straight forward. The parser knows
(very little) Intel specific information, and can be easily extended to
handle fields for other CPUs.

The parser code is partially shared with an independent parsing library,
which is 2-clause BSD licensed. To avoid any conflicts I marked those
files as BSD licensed too. As part of perf they become GPLv2.

Committer notes:

Fixes:

1) Limit maxfds to 512 to avoid nftd() segfaulting on alloca() with a
big rlim_max, as in docker containers - acme

2) Make jevents a hostprog, supporting cross compilation - jolsa

3) Use HOSTCC for jevents final step - acme

4) Define _GNU_SOURCE for asprintf, as we can't use CC's EXTRA_CFLAGS,
that has to have --sysroot on the Android NDK 24 - acme

5) Removed $(srctree)/tools/perf/pmu-events/pmu-events.c from the
'clean' target, it is generated on $(OUTPUT)pmu-events/pmu-events.c,
which is already taken care of in the original patch - acme

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Jiri Olsa <jo...@redhat.com>
Signed-off-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Acked-by: Ingo Molnar <mi...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: linuxp...@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1473978296-20712-3-g...@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/20160927141846.GA6589@krava
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/Makefile.perf | 34 +-
tools/perf/pmu-events/Build | 13 +
tools/perf/pmu-events/jevents.c | 767 +++++++++++++++++++++++++++++++++++++
tools/perf/pmu-events/jevents.h | 17 +
tools/perf/pmu-events/json.h | 6 +
tools/perf/pmu-events/pmu-events.h | 36 ++
6 files changed, 869 insertions(+), 4 deletions(-)
create mode 100644 tools/perf/pmu-events/Build
create mode 100644 tools/perf/pmu-events/jevents.c
create mode 100644 tools/perf/pmu-events/jevents.h
create mode 100644 tools/perf/pmu-events/pmu-events.h

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index d710db16b963..982d6439bb07 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -144,6 +144,10 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld)

LD += $(EXTRA_LDFLAGS)

+HOSTCC ?= gcc
+HOSTLD ?= ld
+HOSTAR ?= ar
+
PKG_CONFIG = $(CROSS_COMPILE)pkg-config

RM = rm -f
@@ -345,8 +349,18 @@ strip: $(PROGRAMS) $(OUTPUT)perf
PERF_IN := $(OUTPUT)perf-in.o

export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK
+export HOSTCC HOSTLD HOSTAR
include $(srctree)/tools/build/Makefile.include

+JEVENTS := $(OUTPUT)pmu-events/jevents
+JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o
+
+PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o
+
+export JEVENTS
+
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
$(PERF_IN): prepare FORCE
@(test -f ../../include/uapi/linux/perf_event.h && ( \
(diff -B ../include/uapi/linux/perf_event.h ../../include/uapi/linux/perf_event.h >/dev/null) \
@@ -443,9 +457,18 @@ $(PERF_IN): prepare FORCE
|| echo "Warning: tools/include/uapi/linux/mman.h differs from kernel" >&2 )) || true
$(Q)$(MAKE) $(build)=perf

-$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
+$(JEVENTS_IN): FORCE
+ $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=jevents
+
+$(JEVENTS): $(JEVENTS_IN)
+ $(QUIET_LINK)$(HOSTCC) $(JEVENTS_IN) -o $@
+
+$(PMU_EVENTS_IN): $(JEVENTS) FORCE
+ $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events
+
+$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
- $(PERF_IN) $(LIBS) -o $@
+ $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@

$(GTK_IN): fixdep FORCE
$(Q)$(MAKE) $(build)=gtk
@@ -474,6 +497,8 @@ perf.spec $(SCRIPTS) \
ifneq ($(OUTPUT),)
%.o: $(OUTPUT)%.o
@echo " # Redirected target $@ => $(OUTPUT)$@"
+pmu-events/%.o: $(OUTPUT)pmu-events/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
util/%.o: $(OUTPUT)util/%.o
@echo " # Redirected target $@ => $(OUTPUT)$@"
bench/%.o: $(OUTPUT)bench/%.o
@@ -729,10 +754,11 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
- $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32
+ $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents
$(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
$(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
- $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c
+ $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
+ $(OUTPUT)pmu-events/pmu-events.c
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
$(python-clean)

diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
new file mode 100644
index 000000000000..9213a1273697
--- /dev/null
+++ b/tools/perf/pmu-events/Build
@@ -0,0 +1,13 @@
+hostprogs := jevents
+
+jevents-y += json.o jsmn.o jevents.o
+pmu-events-y += pmu-events.o
+JDIR = pmu-events/arch/$(ARCH)
+JSON = $(shell [ -d $(JDIR) ] && \
+ find $(JDIR) -name '*.json' -o -name 'mapfile.csv')
+#
+# Locate/process JSON files in pmu-events/arch/
+# directory and create tables in pmu-events.c.
+#
+$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS)
+ $(Q)$(call echo-cmd,gen)$(JEVENTS) $(ARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
new file mode 100644
index 000000000000..c4c074a49b6e
--- /dev/null
+++ b/tools/perf/pmu-events/jevents.c
@@ -0,0 +1,767 @@
+#define _XOPEN_SOURCE 500 /* needed for nftw() */
+#define _GNU_SOURCE /* needed for asprintf() */
+
+/* Parse event JSON files */
+
+/*
+ * Copyright (c) 2014, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <libgen.h>
+#include <dirent.h>
+#include <sys/time.h> /* getrlimit */
+#include <sys/resource.h> /* getrlimit */
+#include <ftw.h>
+#include <sys/stat.h>
+#include "jsmn.h"
+#include "json.h"
+#include "jevents.h"
+
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((unused))
+#endif
+
+int verbose;
+char *prog;
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+
+ int ret;
+ va_list args;
+
+ if (var < level)
+ return 0;
+
+ va_start(args, fmt);
+
+ ret = vfprintf(stderr, fmt, args);
+
+ va_end(args);
+
+ return ret;
+}
+
+__attribute__((weak)) char *get_cpu_str(void)
+{
+ return NULL;
+}
+
+static void addfield(char *map, char **dst, const char *sep,
+ const char *a, jsmntok_t *bt)
+{
+ unsigned int len = strlen(a) + 1 + strlen(sep);
+ int olen = *dst ? strlen(*dst) : 0;
+ int blen = bt ? json_len(bt) : 0;
+ char *out;
+
+ out = realloc(*dst, len + olen + blen);
+ if (!out) {
+ /* Don't add field in this case */
+ return;
+ }
+ *dst = out;
+
+ if (!olen)
+ *(*dst) = 0;
+ else
+ strcat(*dst, sep);
+ strcat(*dst, a);
+ if (bt)
+ strncat(*dst, map + bt->start, blen);
+}
+
+static void fixname(char *s)
+{
+ for (; *s; s++)
+ *s = tolower(*s);
+}
+
+static void fixdesc(char *s)
+{
+ char *e = s + strlen(s);
+
+ /* Remove trailing dots that look ugly in perf list */
+ --e;
+ while (e >= s && isspace(*e))
+ --e;
+ if (*e == '.')
+ *e = 0;
+}
+
+static struct msrmap {
+ const char *num;
+ const char *pname;
+} msrmap[] = {
+ { "0x3F6", "ldlat=" },
+ { "0x1A6", "offcore_rsp=" },
+ { "0x1A7", "offcore_rsp=" },
+ { NULL, NULL }
+};
+
+static struct field {
+ const char *field;
+ const char *kernel;
+} fields[] = {
+ { "EventCode", "event=" },
+ { "UMask", "umask=" },
+ { "CounterMask", "cmask=" },
+ { "Invert", "inv=" },
+ { "AnyThread", "any=" },
+ { "EdgeDetect", "edge=" },
+ { "SampleAfterValue", "period=" },
+ { NULL, NULL }
+};
+
+static void cut_comma(char *map, jsmntok_t *newval)
+{
+ int i;
+
+ /* Cut off everything after comma */
+ for (i = newval->start; i < newval->end; i++) {
+ if (map[i] == ',')
+ newval->end = i;
+ }
+}
+
+static int match_field(char *map, jsmntok_t *field, int nz,
+ char **event, jsmntok_t *val)
+{
+ struct field *f;
+ jsmntok_t newval = *val;
+
+ for (f = fields; f->field; f++)
+ if (json_streq(map, field, f->field) && nz) {
+ cut_comma(map, &newval);
+ addfield(map, event, ",", f->kernel, &newval);
+ return 1;
+ }
+ return 0;
+}
+
+static struct msrmap *lookup_msr(char *map, jsmntok_t *val)
+{
+ jsmntok_t newval = *val;
+ static bool warned;
+ int i;
+
+ cut_comma(map, &newval);
+ for (i = 0; msrmap[i].num; i++)
+ if (json_streq(map, &newval, msrmap[i].num))
+ return &msrmap[i];
+ if (!warned) {
+ warned = true;
+ pr_err("%s: Unknown MSR in event file %.*s\n", prog,
+ json_len(val), map + val->start);
+ }
+ return NULL;
+}
+
+#define EXPECT(e, t, m) do { if (!(e)) { \
+ jsmntok_t *loc = (t); \
+ if (!(t)->start && (t) > tokens) \
+ loc = (t) - 1; \
+ pr_err("%s:%d: " m ", got %s\n", fn, \
+ json_line(map, loc), \
+ json_name(t)); \
+ goto out_free; \
+} } while (0)
+
+#define TOPIC_DEPTH 256
+static char *topic_array[TOPIC_DEPTH];
+static int topic_level;
+
+static char *get_topic(void)
+{
+ char *tp_old, *tp = NULL;
+ int i;
+
+ for (i = 0; i < topic_level + 1; i++) {
+ int n;
+
+ tp_old = tp;
+ n = asprintf(&tp, "%s%s", tp ?: "", topic_array[i]);
+ if (n < 0) {
+ pr_info("%s: asprintf() error %s\n", prog);
+ return NULL;
+ }
+ free(tp_old);
+ }
+
+ for (i = 0; i < (int) strlen(tp); i++) {
+ char c = tp[i];
+
+ if (c == '-')
+ tp[i] = ' ';
+ else if (c == '.') {
+ tp[i] = '\0';
+ break;
+ }
+ }
+
+ return tp;
+}
+
+static int add_topic(int level, char *bname)
+{
+ char *topic;
+
+ level -= 2;
+
+ if (level >= TOPIC_DEPTH)
+ return -EINVAL;
+
+ topic = strdup(bname);
+ if (!topic) {
+ pr_info("%s: strdup() error %s for file %s\n", prog,
+ strerror(errno), bname);
+ return -ENOMEM;
+ }
+
+ free(topic_array[topic_level]);
+ topic_array[topic_level] = topic;
+ topic_level = level;
+ return 0;
+}
+
+struct perf_entry_data {
+ FILE *outfp;
+ char *topic;
+};
+
+static int close_table;
+
+static void print_events_table_prefix(FILE *fp, const char *tblname)
+{
+ fprintf(fp, "struct pmu_event %s[] = {\n", tblname);
+ close_table = 1;
+}
+
+static int print_events_table_entry(void *data, char *name, char *event,
+ char *desc)
+{
+ struct perf_entry_data *pd = data;
+ FILE *outfp = pd->outfp;
+ char *topic = pd->topic;
+
+ /*
+ * TODO: Remove formatting chars after debugging to reduce
+ * string lengths.
+ */
+ fprintf(outfp, "{\n");
+
+ fprintf(outfp, "\t.name = \"%s\",\n", name);
+ fprintf(outfp, "\t.event = \"%s\",\n", event);
+ fprintf(outfp, "\t.desc = \"%s\",\n", desc);
+ fprintf(outfp, "\t.topic = \"%s\",\n", topic);
+
+ fprintf(outfp, "},\n");
+
+ return 0;
+}
+
+static void print_events_table_suffix(FILE *outfp)
+{
+ fprintf(outfp, "{\n");
+
+ fprintf(outfp, "\t.name = 0,\n");
+ fprintf(outfp, "\t.event = 0,\n");
+ fprintf(outfp, "\t.desc = 0,\n");
+
+ fprintf(outfp, "},\n");
+ fprintf(outfp, "};\n");
+ close_table = 0;
+}
+
+/* Call func with each event in the json file */
+int json_events(const char *fn,
+ int (*func)(void *data, char *name, char *event, char *desc),
+ void *data)
+{
+ int err = -EIO;
+ size_t size;
+ jsmntok_t *tokens, *tok;
+ int i, j, len;
+ char *map;
+
+ if (!fn)
+ return -ENOENT;
+
+ tokens = parse_json(fn, &map, &size, &len);
+ if (!tokens)
+ return -EIO;
+ EXPECT(tokens->type == JSMN_ARRAY, tokens, "expected top level array");
+ tok = tokens + 1;
+ for (i = 0; i < tokens->size; i++) {
+ char *event = NULL, *desc = NULL, *name = NULL;
+ struct msrmap *msr = NULL;
+ jsmntok_t *msrval = NULL;
+ jsmntok_t *precise = NULL;
+ jsmntok_t *obj = tok++;
+
+ EXPECT(obj->type == JSMN_OBJECT, obj, "expected object");
+ for (j = 0; j < obj->size; j += 2) {
+ jsmntok_t *field, *val;
+ int nz;
+
+ field = tok + j;
+ EXPECT(field->type == JSMN_STRING, tok + j,
+ "Expected field name");
+ val = tok + j + 1;
+ EXPECT(val->type == JSMN_STRING, tok + j + 1,
+ "Expected string value");
+
+ nz = !json_streq(map, val, "0");
+ if (match_field(map, field, nz, &event, val)) {
+ /* ok */
+ } else if (json_streq(map, field, "EventName")) {
+ addfield(map, &name, "", "", val);
+ } else if (json_streq(map, field, "BriefDescription")) {
+ addfield(map, &desc, "", "", val);
+ fixdesc(desc);
+ } else if (json_streq(map, field, "PEBS") && nz) {
+ precise = val;
+ } else if (json_streq(map, field, "MSRIndex") && nz) {
+ msr = lookup_msr(map, val);
+ } else if (json_streq(map, field, "MSRValue")) {
+ msrval = val;
+ } else if (json_streq(map, field, "Errata") &&
+ !json_streq(map, val, "null")) {
+ addfield(map, &desc, ". ",
+ " Spec update: ", val);
+ } else if (json_streq(map, field, "Data_LA") && nz) {
+ addfield(map, &desc, ". ",
+ " Supports address when precise",
+ NULL);
+ }
+ /* ignore unknown fields */
+ }
+ if (precise && desc && !strstr(desc, "(Precise Event)")) {
+ if (json_streq(map, precise, "2"))
+ addfield(map, &desc, " ", "(Must be precise)",
+ NULL);
+ else
+ addfield(map, &desc, " ",
+ "(Precise event)", NULL);
+ }
+ if (msr != NULL)
+ addfield(map, &event, ",", msr->pname, msrval);
+ fixname(name);
+ err = func(data, name, event, desc);
+ free(event);
+ free(desc);
+ free(name);
+ if (err)
+ break;
+ tok += j;
+ }
+ EXPECT(tok - tokens == len, tok, "unexpected objects at end");
+ err = 0;
+out_free:
+ free_json(map, size, tokens);
+ return err;
+}
+
+static char *file_name_to_table_name(char *fname)
+{
+ unsigned int i;
+ int n;
+ int c;
+ char *tblname;
+
+ /*
+ * Ensure tablename starts with alphabetic character.
+ * Derive rest of table name from basename of the JSON file,
+ * replacing hyphens and stripping out .json suffix.
+ */
+ n = asprintf(&tblname, "pme_%s", basename(fname));
+ if (n < 0) {
+ pr_info("%s: asprintf() error %s for file %s\n", prog,
+ strerror(errno), fname);
+ return NULL;
+ }
+
+ for (i = 0; i < strlen(tblname); i++) {
+ c = tblname[i];
+
+ if (c == '-')
+ tblname[i] = '_';
+ else if (c == '.') {
+ tblname[i] = '\0';
+ break;
+ } else if (!isalnum(c) && c != '_') {
+ pr_err("%s: Invalid character '%c' in file name %s\n",
+ prog, c, basename(fname));
+ free(tblname);
+ tblname = NULL;
+ break;
+ }
+ }
+
+ return tblname;
+}
+
+static void print_mapping_table_prefix(FILE *outfp)
+{
+ fprintf(outfp, "struct pmu_events_map pmu_events_map[] = {\n");
+}
+
+static void print_mapping_table_suffix(FILE *outfp)
+{
+ /*
+ * Print the terminating, NULL entry.
+ */
+ fprintf(outfp, "{\n");
+ fprintf(outfp, "\t.cpuid = 0,\n");
+ fprintf(outfp, "\t.version = 0,\n");
+ fprintf(outfp, "\t.type = 0,\n");
+ fprintf(outfp, "\t.table = 0,\n");
+ fprintf(outfp, "},\n");
+
+ /* and finally, the closing curly bracket for the struct */
+ fprintf(outfp, "};\n");
+}
+
+static int process_mapfile(FILE *outfp, char *fpath)
+{
+ int n = 16384;
+ FILE *mapfp;
+ char *save = NULL;
+ char *line, *p;
+ int line_num;
+ char *tblname;
+
+ pr_info("%s: Processing mapfile %s\n", prog, fpath);
+
+ line = malloc(n);
+ if (!line)
+ return -1;
+
+ mapfp = fopen(fpath, "r");
+ if (!mapfp) {
+ pr_info("%s: Error %s opening %s\n", prog, strerror(errno),
+ fpath);
+ return -1;
+ }
+
+ print_mapping_table_prefix(outfp);
+
+ line_num = 0;
+ while (1) {
+ char *cpuid, *version, *type, *fname;
+
+ line_num++;
+ p = fgets(line, n, mapfp);
+ if (!p)
+ break;
+
+ if (line[0] == '#' || line[0] == '\n')
+ continue;
+
+ if (line[strlen(line)-1] != '\n') {
+ /* TODO Deal with lines longer than 16K */
+ pr_info("%s: Mapfile %s: line %d too long, aborting\n",
+ prog, fpath, line_num);
+ return -1;
+ }
+ line[strlen(line)-1] = '\0';
+
+ cpuid = strtok_r(p, ",", &save);
+ version = strtok_r(NULL, ",", &save);
+ fname = strtok_r(NULL, ",", &save);
+ type = strtok_r(NULL, ",", &save);
+
+ tblname = file_name_to_table_name(fname);
+ fprintf(outfp, "{\n");
+ fprintf(outfp, "\t.cpuid = \"%s\",\n", cpuid);
+ fprintf(outfp, "\t.version = \"%s\",\n", version);
+ fprintf(outfp, "\t.type = \"%s\",\n", type);
+
+ /*
+ * CHECK: We can't use the type (eg "core") field in the
+ * table name. For us to do that, we need to somehow tweak
+ * the other caller of file_name_to_table(), process_json()
+ * to determine the type. process_json() file has no way
+ * of knowing these are "core" events unless file name has
+ * core in it. If filename has core in it, we can safely
+ * ignore the type field here also.
+ */
+ fprintf(outfp, "\t.table = %s\n", tblname);
+ fprintf(outfp, "},\n");
+ }
+
+ print_mapping_table_suffix(outfp);
+
+ return 0;
+}
+
+/*
+ * If we fail to locate/process JSON and map files, create a NULL mapping
+ * table. This would at least allow perf to build even if we can't find/use
+ * the aliases.
+ */
+static void create_empty_mapping(const char *output_file)
+{
+ FILE *outfp;
+
+ pr_info("%s: Creating empty pmu_events_map[] table\n", prog);
+
+ /* Truncate file to clear any partial writes to it */
+ outfp = fopen(output_file, "w");
+ if (!outfp) {
+ perror("fopen()");
+ _Exit(1);
+ }
+
+ fprintf(outfp, "#include \"../../pmu-events/pmu-events.h\"\n");
+ print_mapping_table_prefix(outfp);
+ print_mapping_table_suffix(outfp);
+ fclose(outfp);
+}
+
+static int get_maxfds(void)
+{
+ struct rlimit rlim;
+
+ if (getrlimit(RLIMIT_NOFILE, &rlim) == 0)
+ return min((int)rlim.rlim_max / 2, 512);
+
+ return 512;
+}
+
+/*
+ * nftw() doesn't let us pass an argument to the processing function,
+ * so use a global variables.
+ */
+static FILE *eventsfp;
+static char *mapfile;
+
+static int process_one_file(const char *fpath, const struct stat *sb,
+ int typeflag, struct FTW *ftwbuf)
+{
+ char *tblname, *bname = (char *) fpath + ftwbuf->base;
+ int is_dir = typeflag == FTW_D;
+ int is_file = typeflag == FTW_F;
+ int level = ftwbuf->level;
+ int err = 0;
+
+ pr_debug("%s %d %7jd %-20s %s\n",
+ is_file ? "f" : is_dir ? "d" : "x",
+ level, sb->st_size, bname, fpath);
+
+ /* base dir */
+ if (level == 0)
+ return 0;
+
+ /* model directory, reset topic */
+ if (level == 1 && is_dir) {
+ if (close_table)
+ print_events_table_suffix(eventsfp);
+
+ /*
+ * Drop file name suffix. Replace hyphens with underscores.
+ * Fail if file name contains any alphanum characters besides
+ * underscores.
+ */
+ tblname = file_name_to_table_name(bname);
+ if (!tblname) {
+ pr_info("%s: Error determining table name for %s\n", prog,
+ bname);
+ return -1;
+ }
+
+ print_events_table_prefix(eventsfp, tblname);
+ return 0;
+ }
+
+ /*
+ * Save the mapfile name for now. We will process mapfile
+ * after processing all JSON files (so we can write out the
+ * mapping table after all PMU events tables).
+ *
+ * TODO: Allow for multiple mapfiles? Punt for now.
+ */
+ if (level == 1 && is_file) {
+ if (!strncmp(bname, "mapfile.csv", 11)) {
+ if (mapfile) {
+ pr_info("%s: Many mapfiles? Using %s, ignoring %s\n",
+ prog, mapfile, fpath);
+ } else {
+ mapfile = strdup(fpath);
+ }
+ return 0;
+ }
+
+ pr_info("%s: Ignoring file %s\n", prog, fpath);
+ return 0;
+ }
+
+ /*
+ * If the file name does not have a .json extension,
+ * ignore it. It could be a readme.txt for instance.
+ */
+ if (is_file) {
+ char *suffix = bname + strlen(bname) - 5;
+
+ if (strncmp(suffix, ".json", 5)) {
+ pr_info("%s: Ignoring file without .json suffix %s\n", prog,
+ fpath);
+ return 0;
+ }
+ }
+
+ if (level > 1 && add_topic(level, bname))
+ return -ENOMEM;
+
+ /*
+ * Assume all other files are JSON files.
+ *
+ * If mapfile refers to 'power7_core.json', we create a table
+ * named 'power7_core'. Any inconsistencies between the mapfile
+ * and directory tree could result in build failure due to table
+ * names not being found.
+ *
+ * Atleast for now, be strict with processing JSON file names.
+ * i.e. if JSON file name cannot be mapped to C-style table name,
+ * fail.
+ */
+ if (is_file) {
+ struct perf_entry_data data = {
+ .topic = get_topic(),
+ .outfp = eventsfp,
+ };
+
+ err = json_events(fpath, print_events_table_entry, &data);
+
+ free(data.topic);
+ }
+
+ return err;
+}
+
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+/*
+ * Starting in directory 'start_dirname', find the "mapfile.csv" and
+ * the set of JSON files for the architecture 'arch'.
+ *
+ * From each JSON file, create a C-style "PMU events table" from the
+ * JSON file (see struct pmu_event).
+ *
+ * From the mapfile, create a mapping between the CPU revisions and
+ * PMU event tables (see struct pmu_events_map).
+ *
+ * Write out the PMU events tables and the mapping table to pmu-event.c.
+ *
+ * If unable to process the JSON or arch files, create an empty mapping
+ * table so we can continue to build/use perf even if we cannot use the
+ * PMU event aliases.
+ */
+int main(int argc, char *argv[])
+{
+ int rc;
+ int maxfds;
+ char ldirname[PATH_MAX];
+
+ const char *arch;
+ const char *output_file;
+ const char *start_dirname;
+
+ prog = basename(argv[0]);
+ if (argc < 4) {
+ pr_err("Usage: %s <arch> <starting_dir> <output_file>\n", prog);
+ return 1;
+ }
+
+ arch = argv[1];
+ start_dirname = argv[2];
+ output_file = argv[3];
+
+ if (argc > 4)
+ verbose = atoi(argv[4]);
+
+ eventsfp = fopen(output_file, "w");
+ if (!eventsfp) {
+ pr_err("%s Unable to create required file %s (%s)\n",
+ prog, output_file, strerror(errno));
+ return 2;
+ }
+
+ /* Include pmu-events.h first */
+ fprintf(eventsfp, "#include \"../../pmu-events/pmu-events.h\"\n");
+
+ sprintf(ldirname, "%s/%s", start_dirname, arch);
+
+ /*
+ * The mapfile allows multiple CPUids to point to the same JSON file,
+ * so, not sure if there is a need for symlinks within the pmu-events
+ * directory.
+ *
+ * For now, treat symlinks of JSON files as regular files and create
+ * separate tables for each symlink (presumably, each symlink refers
+ * to specific version of the CPU).
+ */
+
+ maxfds = get_maxfds();
+ mapfile = NULL;
+ rc = nftw(ldirname, process_one_file, maxfds, 0);
+ if (rc && verbose) {
+ pr_info("%s: Error walking file tree %s\n", prog, ldirname);
+ goto empty_map;
+ } else if (rc) {
+ goto empty_map;
+ }
+
+ if (close_table)
+ print_events_table_suffix(eventsfp);
+
+ if (!mapfile) {
+ pr_info("%s: No CPU->JSON mapping?\n", prog);
+ goto empty_map;
+ }
+
+ if (process_mapfile(eventsfp, mapfile)) {
+ pr_info("%s: Error processing mapfile %s\n", prog, mapfile);
+ goto empty_map;
+ }
+
+ return 0;
+
+empty_map:
+ fclose(eventsfp);
+ create_empty_mapping(output_file);
+ return 0;
+}
diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h
new file mode 100644
index 000000000000..996601f828b6
--- /dev/null
+++ b/tools/perf/pmu-events/jevents.h
@@ -0,0 +1,17 @@
+#ifndef JEVENTS_H
+#define JEVENTS_H 1
+
+int json_events(const char *fn,
+ int (*func)(void *data, char *name, char *event, char *desc),
+ void *data);
+char *get_cpu_str(void);
+
+#ifndef min
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+#endif
+
+#endif
diff --git a/tools/perf/pmu-events/json.h b/tools/perf/pmu-events/json.h
index f2745c7a87bc..278ebd32cfb6 100644
--- a/tools/perf/pmu-events/json.h
+++ b/tools/perf/pmu-events/json.h
@@ -20,6 +20,12 @@ extern int eprintf(int level, int var, const char *fmt, ...);
#define pr_err(fmt, ...) \
eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)

+#define pr_info(fmt, ...) \
+ eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define pr_debug(fmt, ...) \
+ eprintf(2, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+
#ifndef roundup
#define roundup(x, y) ( \
{ \
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
new file mode 100644
index 000000000000..70d54794e3cb
--- /dev/null
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -0,0 +1,36 @@
+#ifndef PMU_EVENTS_H
+#define PMU_EVENTS_H
+
+/*
+ * Describe each PMU event. Each CPU has a table of PMU events.
+ */
+struct pmu_event {
+ const char *name;
+ const char *event;
+ const char *desc;
+ const char *topic;
+};
+
+/*
+ *
+ * Map a CPU to its table of PMU events. The CPU is identified by the
+ * cpuid field, which is an arch-specific identifier for the CPU.
+ * The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
+ * must match the get_cpustr() in tools/perf/arch/xxx/util/header.c)
+ *
+ * The cpuid can contain any character other than the comma.
+ */
+struct pmu_events_map {
+ const char *cpuid;
+ const char *version;
+ const char *type; /* core, uncore etc */
+ struct pmu_event *table;
+};
+
+/*
+ * Global table mapping each known CPU for the architecture to its
+ * table of PMU events.
+ */
+extern struct pmu_events_map pmu_events_map[];
+
+#endif
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:50:06 PM10/3/16
to
From: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>

At run time (when 'perf' is starting up), locate the specific table of
PMU events that corresponds to the current CPU. Using that table, create
aliases for the each of the PMU events in the CPU. The use these aliases
to parse the user specified perf event.

In short this would allow the user to specify events using their aliases
rather than raw event codes.

Based on input and some earlier patches from Andi Kleen, Jiri Olsa.

Signed-off-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mi...@kernel.org>
Acked-by: Jiri Olsa <jo...@redhat.com>
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1473978296-20712-4-g...@linux.vnet.ibm.com
[ Make pmu_add_cpu_aliases() return void, since it was returning just '0' and
furthermore, even that was being discarded via an explicit (void) cast ]
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>

Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/header.h | 1 +
tools/perf/util/pmu.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 61 insertions(+)

diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index d306ca118449..d30109b421ee 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -151,4 +151,5 @@ int write_padded(int fd, const void *bf, size_t count, size_t count_aligned);
*/
int get_cpuid(char *buffer, size_t sz);

+char *get_cpuid_str(void);
#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 2babcdf62839..10668b7f5272 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -12,6 +12,8 @@
#include "pmu.h"
#include "parse-events.h"
#include "cpumap.h"
+#include "header.h"
+#include "pmu-events/pmu-events.h"

struct perf_pmu_format {
char *name;
@@ -473,6 +475,61 @@ static struct cpu_map *pmu_cpumask(const char *name)
return cpus;
}

+/*
+ * Return the CPU id as a raw string.
+ *
+ * Each architecture should provide a more precise id string that
+ * can be use to match the architecture's "mapfile".
+ */
+char * __weak get_cpuid_str(void)
+{
+ return NULL;
+}
+
+/*
+ * From the pmu_events_map, find the table of PMU events that corresponds
+ * to the current running CPU. Then, add all PMU events from that table
+ * as aliases.
+ */
+static void pmu_add_cpu_aliases(struct list_head *head)
+{
+ int i;
+ struct pmu_events_map *map;
+ struct pmu_event *pe;
+ char *cpuid;
+
+ cpuid = get_cpuid_str();
+ if (!cpuid)
+ return;
+
+ i = 0;
+ while (1) {
+ map = &pmu_events_map[i++];
+ if (!map->table)
+ goto out;
+
+ if (!strcmp(map->cpuid, cpuid))
+ break;
+ }
+
+ /*
+ * Found a matching PMU events table. Create aliases
+ */
+ i = 0;
+ while (1) {
+ pe = &map->table[i++];
+ if (!pe->name)
+ break;
+
+ /* need type casts to override 'const' */
+ __perf_pmu__new_alias(head, NULL, (char *)pe->name,
+ (char *)pe->desc, (char *)pe->event);
+ }
+
+out:
+ free(cpuid);
+}
+
struct perf_event_attr * __weak
perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
{
@@ -497,6 +554,9 @@ static struct perf_pmu *pmu_lookup(const char *name)
if (pmu_aliases(name, &aliases))
return NULL;

+ if (!strcmp(name, "cpu"))
+ pmu_add_cpu_aliases(&aliases);
+
if (pmu_type(name, &type))
return NULL;

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:50:06 PM10/3/16
to
From: Andi Kleen <a...@linux.intel.com>

Add support to group the output of perf list by the Topic field in the
JSON file.

Example output:

% perf list
...
Cache:
l1d.replacement
[L1D data line replacements]
l1d_pend_miss.pending
[L1D miss oustandings duration in cycles]
l1d_pend_miss.pending_cycles
[Cycles with L1D load Misses outstanding]
l2_l1d_wb_rqsts.all
[Not rejected writebacks from L1D to L2 cache lines in any state]
l2_l1d_wb_rqsts.hit_e
[Not rejected writebacks from L1D to L2 cache lines in E state]
l2_l1d_wb_rqsts.hit_m
[Not rejected writebacks from L1D to L2 cache lines in M state]

...
Pipeline:
arith.fpu_div
[Divide operations executed]
arith.fpu_div_active
[Cycles when divider is busy executing divide operations]
baclears.any
[Counts the total number when the front end is resteered, mainly
when the BPU cannot provide a correct prediction and this is
corrected by other branch handling mechanisms at the front end]
br_inst_exec.all_branches
[Speculative and retired branches]
br_inst_exec.all_conditional
[Speculative and retired macro-conditional branches]
br_inst_exec.all_direct_jmp
[Speculative and retired macro-unconditional branches excluding
calls and indirects]
br_inst_exec.all_direct_near_call
[Speculative and retired direct near calls]
br_inst_exec.all_indirect_jump_non_call_ret

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mi...@kernel.org>
Acked-by: Jiri Olsa <jo...@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linuxp...@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1473978296-20712-14-g...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/pmu.c | 37 +++++++++++++++++++++++++++----------
tools/perf/util/pmu.h | 1 +
2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 8ff382cb161d..b1474dcadfa2 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -223,7 +223,8 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias,
}

static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
- char *desc, char *val, char *long_desc)
+ char *desc, char *val, char *long_desc,
+ char *topic)
{
struct perf_pmu_alias *alias;
int ret;
@@ -259,6 +260,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
alias->desc = desc ? strdup(desc) : NULL;
alias->long_desc = long_desc ? strdup(long_desc) :
desc ? strdup(desc) : NULL;
+ alias->topic = topic ? strdup(topic) : NULL;

list_add_tail(&alias->list, list);

@@ -276,7 +278,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI

buf[ret] = 0;

- return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL);
+ return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL);
}

static inline bool pmu_alias_info_file(char *name)
@@ -535,7 +537,7 @@ static void pmu_add_cpu_aliases(struct list_head *head)
/* need type casts to override 'const' */
__perf_pmu__new_alias(head, NULL, (char *)pe->name,
(char *)pe->desc, (char *)pe->event,
- (char *)pe->long_desc);
+ (char *)pe->long_desc, (char *)pe->topic);
}

out:
@@ -1055,19 +1057,26 @@ static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu,
return buf;
}

-struct pair {
+struct sevent {
char *name;
char *desc;
+ char *topic;
};

-static int cmp_pair(const void *a, const void *b)
+static int cmp_sevent(const void *a, const void *b)
{
- const struct pair *as = a;
- const struct pair *bs = b;
+ const struct sevent *as = a;
+ const struct sevent *bs = b;

/* Put extra events last */
if (!!as->desc != !!bs->desc)
return !!as->desc - !!bs->desc;
+ if (as->topic && bs->topic) {
+ int n = strcmp(as->topic, bs->topic);
+
+ if (n)
+ return n;
+ }
return strcmp(as->name, bs->name);
}

@@ -1101,9 +1110,10 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
char buf[1024];
int printed = 0;
int len, j;
- struct pair *aliases;
+ struct sevent *aliases;
int numdesc = 0;
int columns = pager_get_columns();
+ char *topic = NULL;

pmu = NULL;
len = 0;
@@ -1113,7 +1123,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
if (pmu->selectable)
len++;
}
- aliases = zalloc(sizeof(struct pair) * len);
+ aliases = zalloc(sizeof(struct sevent) * len);
if (!aliases)
goto out_enomem;
pmu = NULL;
@@ -1144,6 +1154,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,

aliases[j].desc = long_desc ? alias->long_desc :
alias->desc;
+ aliases[j].topic = alias->topic;
j++;
}
if (pmu->selectable &&
@@ -1156,7 +1167,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
}
}
len = j;
- qsort(aliases, len, sizeof(struct pair), cmp_pair);
+ qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
for (j = 0; j < len; j++) {
if (name_only) {
printf("%s ", aliases[j].name);
@@ -1165,6 +1176,12 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
if (aliases[j].desc && !quiet_flag) {
if (numdesc++ == 0)
printf("\n");
+ if (aliases[j].topic && (!topic ||
+ strcmp(topic, aliases[j].topic))) {
+ printf("%s%s:\n", topic ? "\n" : "",
+ aliases[j].topic);
+ topic = aliases[j].topic;
+ }
printf(" %-50s\n", aliases[j].name);
printf("%*s", 8, "[");
wordwrap(aliases[j].desc, 8, columns, 0);
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 2fb8aa0fb47f..25712034c815 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -42,6 +42,7 @@ struct perf_pmu_alias {
char *name;
char *desc;
char *long_desc;
+ char *topic;

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:50:06 PM10/3/16
to
From: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>

This avoids the JSON PMU events parser having to know whether its
aliases are for perf stat or perf record.

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mi...@kernel.org>
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linuxp...@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1473978296-20712-20-g...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/parse-events.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 8f88f63bf96c..d445b1144c87 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -924,6 +924,7 @@ config_term_avail(int term_type, struct parse_events_error *err)
case PARSE_EVENTS__TERM_TYPE_CONFIG1:
case PARSE_EVENTS__TERM_TYPE_CONFIG2:
case PARSE_EVENTS__TERM_TYPE_NAME:
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
return true;
default:
if (!err)
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:50:06 PM10/3/16
to
From: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>

Implement code that returns the generic CPU ID string for Powerpc. This
will be used to identify the specific table of PMU events to
parse/compare user specified events against.

Signed-off-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mi...@kernel.org>
Acked-by: Jiri Olsa <jo...@redhat.com>
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linuxp...@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1473978296-20712-5-g...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/arch/powerpc/util/header.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index f8ccee132867..9aaa6f5a9347 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -32,3 +32,14 @@ get_cpuid(char *buffer, size_t sz)
}
return -1;
}
+
+char *
+get_cpuid_str(void)
+{
+ char *bufp;
+
+ if (asprintf(&bufp, "%.8lx", mfspr(SPRN_PVR)) < 0)
+ bufp = NULL;
+
+ return bufp;
+}
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:50:09 PM10/3/16
to
From: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>

Previously we were dropping the useful longer descriptions that some
events have in the event list completely. This patch makes them appear with
perf list.

Old perf list:

baclears:
baclears.all
[Counts the number of baclears]

vs new:

perf list -v:
...
baclears:
baclears.all
[The BACLEARS event counts the number of times the front end is
resteered, mainly when the Branch Prediction Unit cannot provide
a correct prediction and this is corrected by the Branch Address
Calculator at the front end. The BACLEARS.ANY event counts the
number of baclears for any type of branch]

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mi...@kernel.org>
Acked-by: Jiri Olsa <jo...@redhat.com>
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linuxp...@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1473978296-20712-13-g...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/Documentation/perf-list.txt | 6 +++++-
tools/perf/builtin-list.c | 16 +++++++++++-----
tools/perf/util/parse-events.c | 5 +++--
tools/perf/util/parse-events.h | 3 ++-
tools/perf/util/pmu.c | 15 ++++++++++-----
tools/perf/util/pmu.h | 4 +++-
6 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 72209bc0f523..41857cce5e86 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -8,7 +8,7 @@ perf-list - List all symbolic event types
SYNOPSIS
--------
[verse]
-'perf list' [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob]
+'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|event_glob]

DESCRIPTION
-----------
@@ -20,6 +20,10 @@ OPTIONS
--no-desc::
Don't print descriptions.

+-v::
+--long-desc::
+Print longer event descriptions.
+

[[EVENT_MODIFIERS]]
EVENT MODIFIERS
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index b14cb162f841..ba9322ff858b 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -22,14 +22,17 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
{
int i;
bool raw_dump = false;
+ bool long_desc_flag = false;
struct option list_options[] = {
OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"),
OPT_BOOLEAN('d', "desc", &desc_flag,
"Print extra event descriptions. --no-desc to not print."),
+ OPT_BOOLEAN('v', "long-desc", &long_desc_flag,
+ "Print longer event descriptions."),
OPT_END()
};
const char * const list_usage[] = {
- "perf list [--no-desc] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]",
+ "perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]",
NULL
};

@@ -44,7 +47,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
printf("\nList of pre-defined events (to be used in -e):\n\n");

if (argc == 0) {
- print_events(NULL, raw_dump, !desc_flag);
+ print_events(NULL, raw_dump, !desc_flag, long_desc_flag);
return 0;
}

@@ -65,14 +68,16 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
strcmp(argv[i], "hwcache") == 0)
print_hwcache_events(NULL, raw_dump);
else if (strcmp(argv[i], "pmu") == 0)
- print_pmu_events(NULL, raw_dump, !desc_flag);
+ print_pmu_events(NULL, raw_dump, !desc_flag,
+ long_desc_flag);
else if (strcmp(argv[i], "sdt") == 0)
print_sdt_events(NULL, NULL, raw_dump);
else if ((sep = strchr(argv[i], ':')) != NULL) {
int sep_idx;

if (sep == NULL) {
- print_events(argv[i], raw_dump, !desc_flag);
+ print_events(argv[i], raw_dump, !desc_flag,
+ long_desc_flag);
continue;
}
sep_idx = sep - argv[i];
@@ -94,7 +99,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
print_symbol_events(s, PERF_TYPE_SOFTWARE,
event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
print_hwcache_events(s, raw_dump);
- print_pmu_events(s, raw_dump, !desc_flag);
+ print_pmu_events(s, raw_dump, !desc_flag,
+ long_desc_flag);
print_tracepoint_events(NULL, s, raw_dump);
print_sdt_events(NULL, s, raw_dump);
free(s);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 3966ad79ee8d..8f88f63bf96c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2263,7 +2263,8 @@ out_enomem:
/*
* Print the help text for the event symbols:
*/
-void print_events(const char *event_glob, bool name_only, bool quiet_flag)
+void print_events(const char *event_glob, bool name_only, bool quiet_flag,
+ bool long_desc)
{
print_symbol_events(event_glob, PERF_TYPE_HARDWARE,
event_symbols_hw, PERF_COUNT_HW_MAX, name_only);
@@ -2273,7 +2274,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag)

print_hwcache_events(event_glob, name_only);

- print_pmu_events(event_glob, name_only, quiet_flag);
+ print_pmu_events(event_glob, name_only, quiet_flag, long_desc);

if (event_glob != NULL)
return;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 3bf376b42bf9..da246a3ddb69 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -172,7 +172,8 @@ void parse_events_update_lists(struct list_head *list_event,
void parse_events_evlist_error(struct parse_events_evlist *data,
int idx, const char *str);

-void print_events(const char *event_glob, bool name_only, bool quiet);
+void print_events(const char *event_glob, bool name_only, bool quiet,
+ bool long_desc);

struct event_symbol {
const char *symbol;
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 79242cf9bb79..8ff382cb161d 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -223,7 +223,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias,
}

static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
- char *desc, char *val)
+ char *desc, char *val, char *long_desc)
{
struct perf_pmu_alias *alias;
int ret;
@@ -257,6 +257,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
}

alias->desc = desc ? strdup(desc) : NULL;
+ alias->long_desc = long_desc ? strdup(long_desc) :
+ desc ? strdup(desc) : NULL;

list_add_tail(&alias->list, list);

@@ -274,7 +276,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI

buf[ret] = 0;

- return __perf_pmu__new_alias(list, dir, name, NULL, buf);
+ return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL);
}

static inline bool pmu_alias_info_file(char *name)
@@ -532,7 +534,8 @@ static void pmu_add_cpu_aliases(struct list_head *head)

/* need type casts to override 'const' */
__perf_pmu__new_alias(head, NULL, (char *)pe->name,
- (char *)pe->desc, (char *)pe->event);
+ (char *)pe->desc, (char *)pe->event,
+ (char *)pe->long_desc);
}

out:
@@ -1090,7 +1093,8 @@ static void wordwrap(char *s, int start, int max, int corr)
}
}

-void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag)
+void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
+ bool long_desc)
{
struct perf_pmu *pmu;
struct perf_pmu_alias *alias;
@@ -1138,7 +1142,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag)
if (!aliases[j].name)
goto out_enomem;

- aliases[j].desc = alias->desc;
+ aliases[j].desc = long_desc ? alias->long_desc :
+ alias->desc;
j++;
}
if (pmu->selectable &&
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 7b47192e03b5..2fb8aa0fb47f 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -41,6 +41,7 @@ struct perf_pmu_info {
struct perf_pmu_alias {
char *name;
char *desc;
+ char *long_desc;
struct list_head terms; /* HEAD struct parse_events_term -> list */
struct list_head list; /* ELEM */
char unit[UNIT_MAX_LEN+1];
@@ -72,7 +73,8 @@ int perf_pmu__format_parse(char *dir, struct list_head *head);

struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);

-void print_pmu_events(const char *event_glob, bool name_only, bool quiet);
+void print_pmu_events(const char *event_glob, bool name_only, bool quiet,
+ bool long_desc);

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:50:12 PM10/3/16
to
From: Andi Kleen <a...@linux.intel.com>

To work with existing mapfiles, assume that the first line in
'mapfile.csv' is a header line and skip over it.

Signed-off-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mi...@kernel.org>
Acked-by: Jiri Olsa <jo...@redhat.com>
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Link: http://lkml.kernel.org/r/1473978296-20712-15-g...@linux.vnet.ibm.com
Cc: linuxp...@lists.ozlabs.org
Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/pmu-events/jevents.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index c4c074a49b6e..c9bf9a7dc7b2 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -477,7 +477,12 @@ static int process_mapfile(FILE *outfp, char *fpath)

print_mapping_table_prefix(outfp);

- line_num = 0;
+ /* Skip first line (header) */
+ p = fgets(line, n, mapfp);
+ if (!p)
+ goto out;
+
+ line_num = 1;
while (1) {
char *cpuid, *version, *type, *fname;

@@ -521,8 +526,8 @@ static int process_mapfile(FILE *outfp, char *fpath)
fprintf(outfp, "},\n");
}

+out:
print_mapping_table_suffix(outfp);
-
return 0;
}

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:50:12 PM10/3/16
to
From: Andi Kleen <a...@linux.intel.com>

Automatically adapt the now wider and word wrapped perf list output to
wider terminals. This requires querying the terminal before the auto
pager takes over, and exporting this information from the pager
subsystem.

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mi...@kernel.org>
Acked-by: Jiri Olsa <jo...@redhat.com>
Acked-by: Namhyung Kim <namh...@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linuxp...@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1473978296-20712-8-g...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/lib/subcmd/pager.c | 16 ++++++++++++++++
tools/lib/subcmd/pager.h | 1 +
tools/perf/util/pmu.c | 3 ++-
3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/lib/subcmd/pager.c b/tools/lib/subcmd/pager.c
index d50f3b58606b..6518bea926d6 100644
--- a/tools/lib/subcmd/pager.c
+++ b/tools/lib/subcmd/pager.c
@@ -3,6 +3,7 @@
#include <stdio.h>
#include <string.h>
#include <signal.h>
+#include <sys/ioctl.h>
#include "pager.h"
#include "run-command.h"
#include "sigchain.h"
@@ -14,6 +15,7 @@
*/

static int spawned_pager;
+static int pager_columns;

void pager_init(const char *pager_env)
{
@@ -58,9 +60,12 @@ static void wait_for_pager_signal(int signo)
void setup_pager(void)
{
const char *pager = getenv(subcmd_config.pager_env);
+ struct winsize sz;

if (!isatty(1))
return;
+ if (ioctl(1, TIOCGWINSZ, &sz) == 0)
+ pager_columns = sz.ws_col;
if (!pager)
pager = getenv("PAGER");
if (!(pager || access("/usr/bin/pager", X_OK)))
@@ -98,3 +103,14 @@ int pager_in_use(void)
{
return spawned_pager;
}
+
+int pager_get_columns(void)
+{
+ char *s;
+
+ s = getenv("COLUMNS");
+ if (s)
+ return atoi(s);
+
+ return (pager_columns ? pager_columns : 80) - 2;
+}
diff --git a/tools/lib/subcmd/pager.h b/tools/lib/subcmd/pager.h
index 8b83714ecf73..623f5542d05d 100644
--- a/tools/lib/subcmd/pager.h
+++ b/tools/lib/subcmd/pager.h
@@ -5,5 +5,6 @@ extern void pager_init(const char *pager_env);

extern void setup_pager(void);
extern int pager_in_use(void);
+extern int pager_get_columns(void);

#endif /* __SUBCMD_PAGER_H */
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 9857fb14ea86..7b46e772f5f9 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -14,6 +14,7 @@
#include "cpumap.h"
#include "header.h"
#include "pmu-events/pmu-events.h"
+#include "cache.h"

struct perf_pmu_format {
char *name;
@@ -1092,7 +1093,7 @@ void print_pmu_events(const char *event_glob, bool name_only)
int len, j;
struct pair *aliases;
int numdesc = 0;
- int columns = 78;
+ int columns = pager_get_columns();

pmu = NULL;
len = 0;
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 3, 2016, 10:50:12 PM10/3/16
to
From: Andi Kleen <a...@linux.intel.com>

The JSON event lists use a different encoding for fixed counters than
perf for instructions and cycles (ref-cycles is ok)

This lead to some common events like inst_retired.any or
cpu_clk_unhalted.thread not counting, when specified with their JSON
name.

Special case these events in the jevents conversion process. I prefer
to not touch the JSON files for this, as it's intended that standard
JSON files can be just dropped into the perf build without changes.

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
[Fix minor compile error]
Acked-by: Ingo Molnar <mi...@kernel.org>
Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: linuxp...@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1473978296-20712-18-g...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>

Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/pmu-events/jevents.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 13f4284721d5..04e106e799ac 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -306,6 +306,29 @@ static void print_events_table_suffix(FILE *outfp)
close_table = 0;
}

+static struct fixed {
+ const char *name;
+ const char *event;
+} fixed[] = {
+ { "inst_retired.any", "event=0xc0" },
+ { "cpu_clk_unhalted.thread", "event=0x3c" },
+ { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" },
+ { NULL, NULL},
+};
+
+/*
+ * Handle different fixed counter encodings between JSON and perf.
+ */
+static char *real_event(const char *name, char *event)
+{
+ int i;
+
+ for (i = 0; fixed[i].name; i++)
+ if (!strcasecmp(name, fixed[i].name))
+ return (char *)fixed[i].event;
+ return event;
+}
+
/* Call func with each event in the json file */
int json_events(const char *fn,
int (*func)(void *data, char *name, char *event, char *desc,
@@ -392,7 +415,7 @@ int json_events(const char *fn,
addfield(map, &event, ",", msr->pname, msrval);
fixname(name);

- err = func(data, name, event, desc, long_desc);
+ err = func(data, name, real_event(name, event), desc, long_desc);
free(event);
free(desc);
free(name);
--
2.7.4

Ingo Molnar

unread,
Oct 4, 2016, 4:10:06 AM10/4/16
to

* Arnaldo Carvalho de Melo <ac...@kernel.org> wrote:

Arnaldo Carvalho de Melo

unread,
Oct 6, 2016, 12:20:06 PM10/6/16
to
Hi Ingo,

Please consider pulling,

- Arnaldo

Build and test stats at the end of the message.

The following changes since commit 8657355f5b5f657407efc12a2223e8a3a6d658de:

Merge tag 'perf-core-for-mingo-20161003' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent (2016-10-04 10:04:47 +0200)

are available in the git repository at:

git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git tags/perf-core-for-mingo-20161005

for you to fetch changes up to 87095f7ddeff3038a0cf8e6574922f9c11688619:

tools build: Add feature detection for g++ (2016-10-05 19:59:35 -0300)

----------------------------------------------------------------
perf/core improvements and fixes:

- Intel PT timestamp fixes (Adrian Hunter)

- Fix Intel JSON fixed counter conversions (Andi Kleen)

- Sync memcpy, cpufeatures and bpf headers with the kernel (Arnaldo Carvalho de Melo)

- Add some more tool tips (Donghyun Kim, Kim SeonYoung, Nambong Ha)

- Fix libtraceevent's kbuffer_read_at_offset() handling of offsets before or
equal the first event (Namhyung Kim)

- Fix uretprobe probe placement on ppc64le (Ravi Bangoria)

- Support building C++ source files and add feature detection for g++,
prep work for supporting a builtin clang/llvm, to remove the need for having
that toolchain installed to automagically build BPF scriptlets that then
gets uploaded to the kernel via sys_bpf() (Wang Nan)

Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>

----------------------------------------------------------------
Adrian Hunter (2):
perf intel-pt: Fix estimated timestamps for cycle-accurate mode
perf intel-pt: Fix MTC timestamp calculation for large MTC periods

Andi Kleen (1):
perf jevents: Fix Intel JSON fixed counter conversions

Arnaldo Carvalho de Melo (3):
perf bench mem: Sync memcpy assembly sources with the kernel
tools: Synchronize tools/arch/x86/include/asm/cpufeatures.h
tools: Synchronize tools/include/uapi/linux/bpf.h

Donghyun Kim (1):
perf report/top: Add a tip about system-wide collection from all CPUs

Kim SeonYoung (1):
perf report/top: Add a tip about source line numbers with overhead

Nambong Ha (1):
perf top/report: Add tips about a list option

Namhyung Kim (1):
tools lib traceevent: Fix kbuffer_read_at_offset()

Ravi Bangoria (1):
perf uretprobe ppc64le: Fix probe location

Wang Nan (2):
tools build: Support compiling C++ source file
tools build: Add feature detection for g++

tools/arch/x86/include/asm/cpufeatures.h | 1 -
tools/arch/x86/lib/memcpy_64.S | 6 ++--
tools/build/Build.include | 1 +
tools/build/Makefile.build | 7 ++++
tools/build/Makefile.feature | 2 +-
tools/build/feature/Makefile | 10 +++++-
tools/build/feature/test-cxx.cpp | 15 +++++++++
tools/include/uapi/linux/bpf.h | 4 +--
tools/lib/traceevent/kbuffer-parse.c | 1 +
tools/perf/Documentation/tips.txt | 4 +++
tools/perf/arch/powerpc/util/sym-handling.c | 3 +-
tools/perf/pmu-events/jevents.c | 2 ++
.../perf/util/intel-pt-decoder/intel-pt-decoder.c | 38 ++++++++++++++++++++++
13 files changed, 85 insertions(+), 9 deletions(-)
create mode 100644 tools/build/feature/test-cxx.cpp

[root@jouet ~]# time dm
real 44m58.202s
user 0m2.864s
sys 0m1.975s
[root@jouet ~]#

[acme@jouet linux]$ make -C tools/perf build-test
make: Entering directory '/home/acme/git/linux/tools/perf'
tarpkg: ./tests/perf-targz-src-pkg .
make_static_O: make LDFLAGS=-static
make_no_libdw_dwarf_unwind_O: make NO_LIBDW_DWARF_UNWIND=1
make_install_prefix_slash_O: make install prefix=/tmp/krava/
make_no_ui_O: make NO_NEWT=1 NO_SLANG=1 NO_GTK2=1
make_help_O: make help
make_no_libpython_O: make NO_LIBPYTHON=1
make_install_O: make install
make_install_prefix_O: make install prefix=/tmp/krava
make_no_scripts_O: make NO_LIBPYTHON=1 NO_LIBPERL=1
make_no_libperl_O: make NO_LIBPERL=1
make_debug_O: make DEBUG=1
make_perf_o_O: make perf.o
make_clean_all_O: make clean all
make_util_pmu_bison_o_O: make util/pmu-bison.o
make_minimal_O: make NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 NO_LIBCRYPTO=1 NO_SDT=1
make_no_demangle_O: make NO_DEMANGLE=1
make_no_libunwind_O: make NO_LIBUNWIND=1
make_with_babeltrace_O: make LIBBABELTRACE=1
make_no_libnuma_O: make NO_LIBNUMA=1
make_no_libbpf_O: make NO_LIBBPF=1
make_install_bin_O: make install-bin
make_util_map_o_O: make util/map.o
make_no_libelf_O: make NO_LIBELF=1
make_no_newt_O: make NO_NEWT=1
make_no_auxtrace_O: make NO_AUXTRACE=1
make_no_gtk2_O: make NO_GTK2=1
make_no_slang_O: make NO_SLANG=1
make_doc_O: make doc
make_pure_O: make
make_no_libbionic_O: make NO_LIBBIONIC=1
make_no_libaudit_O: make NO_LIBAUDIT=1
make_tags_O: make tags
make_no_backtrace_O: make NO_BACKTRACE=1
OK

[root@jouet ~]# perf test
[root@jouet ~]#

Arnaldo Carvalho de Melo

unread,
Oct 6, 2016, 12:20:06 PM10/6/16
to
From: Andi Kleen <a...@linux.intel.com>

Intel fixed counters are special cases in the JSON conversion process
because their decoding differs between perf and the event files. Add
some missing entries in the conversion table.

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1475696832-9188-4-...@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/pmu-events/jevents.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 79c2133bc534..41611d7f9873 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -312,6 +312,8 @@ static struct fixed {
const char *event;
} fixed[] = {
{ "inst_retired.any", "event=0xc0" },
+ { "inst_retired.any_p", "event=0xc0" },
+ { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" },
{ "cpu_clk_unhalted.thread", "event=0x3c" },
{ "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" },
{ NULL, NULL},
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 6, 2016, 12:20:06 PM10/6/16
to
From: Ravi Bangoria <ravi.b...@linux.vnet.ibm.com>

Perf uretprobe probes on GEP(Global Entry Point) which fails to record
all function calls via LEP(Local Entry Point). Fix that by probing on LEP.

Objdump:

00000000100005f0 <doit>:
100005f0: 02 10 40 3c lis r2,4098
100005f4: 00 7f 42 38 addi r2,r2,32512
100005f8: a6 02 08 7c mflr r0
100005fc: 10 00 01 f8 std r0,16(r1)
10000600: f8 ff e1 fb std r31,-8(r1)

Before applying patch:

$ cat /sys/kernel/debug/tracing/uprobe_events
r:probe_uprobe_test/doit /home/ravi/uprobe_test:0x00000000000005f0

After applying patch:

$ cat /sys/kernel/debug/tracing/uprobe_events
r:probe_uprobe_test/doit /home/ravi/uprobe_test:0x00000000000005f8

This is not the case with kretprobes because the kernel itself finds LEP
and probes on it.

Signed-off-by: Ravi Bangoria <ravi.b...@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander...@linux.intel.com>
Cc: Balbir Singh <bsing...@gmail.com>
Cc: Masami Hiramatsu <mhir...@kernel.org>
Cc: Naveen N. Rao <naveen...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Link: http://lkml.kernel.org/r/1475576865-6562-1-git-s...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/arch/powerpc/util/sym-handling.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index ed9d5d15d5b6..1030a6e504bb 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -82,7 +82,8 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
*
* In addition, we shouldn't specify an offset for kretprobes.
*/
- if (pev->point.offset || pev->point.retprobe || !map || !sym)
+ if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) ||
+ !map || !sym)
return;

lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 6, 2016, 12:20:07 PM10/6/16
to
From: Nambong Ha <over...@gmail.com>

Add two tips that describe --list option of config sub-command and
explain how to choose particular config file location.

Signed-off-by: Nambong Ha <over...@gmail.com>
Cc: Jiri Olsa <jo...@kernel.org>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Taeung Song <tae...@kosslab.kr>
Link: http://lkml.kernel.org/r/1475191562-3240-1-gi...@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/Documentation/tips.txt | 2 ++
1 file changed, 2 insertions(+)

diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt
index 9d3e1ee99e83..8a6479c0eac9 100644
--- a/tools/perf/Documentation/tips.txt
+++ b/tools/perf/Documentation/tips.txt
@@ -30,3 +30,5 @@ If you prefer Intel style assembly, try: perf annotate -M intel
For hierarchical output, try: perf report --hierarchy
Order by the overhead of source file name and line number: perf report -s srcline
System-wide collection from all CPUs: perf record -a
+Show current config key-value pairs: perf config --list
+Show user configuration overrides: perf config --user --list
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 6, 2016, 12:20:07 PM10/6/16
to
From: Wang Nan <wang...@huawei.com>

Check if g++ is available. The result will be used by builtin clang and
LLVM support. Since LLVM requires C++11, this feature detector checks
std::move().

Signed-off-by: Wang Nan <wang...@huawei.com>
Cc: Alexei Starovoitov <a...@fb.com>
Cc: He Kuang <hek...@huawei.com>
Cc: Jiri Olsa <jo...@kernel.org>
Cc: Zefan Li <liz...@huawei.com>
Cc: pi3o...@163.com
Link: http://lkml.kernel.org/r/1474874832-134786-3-g...@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/build/Makefile.feature | 2 +-
tools/build/feature/Makefile | 10 +++++++++-
tools/build/feature/test-cxx.cpp | 15 +++++++++++++++
3 files changed, 25 insertions(+), 2 deletions(-)
create mode 100644 tools/build/feature/test-cxx.cpp

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index a120c6b755a9..ae52e029dd22 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -7,7 +7,7 @@ endif

feature_check = $(eval $(feature_check_code))
define feature_check_code
- feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
+ feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
endef

feature_set = $(eval $(feature_set_code))
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index a0b29a311816..ac9c477a2a48 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -46,11 +46,13 @@ FILES= \
test-lzma.bin \
test-bpf.bin \
test-get_cpuid.bin \
- test-sdt.bin
+ test-sdt.bin \
+ test-cxx.bin

FILES := $(addprefix $(OUTPUT),$(FILES))

CC := $(CROSS_COMPILE)gcc -MD
+CXX := $(CROSS_COMPILE)g++ -MD
PKG_CONFIG := $(CROSS_COMPILE)pkg-config

all: $(FILES)
@@ -58,6 +60,9 @@ all: $(FILES)
__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1

+__BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
+ BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
+
###############################

$(OUTPUT)test-all.bin:
@@ -217,6 +222,9 @@ $(OUTPUT)test-bpf.bin:
$(OUTPUT)test-sdt.bin:
$(BUILD)

+$(OUTPUT)test-cxx.bin:
+ $(BUILDXX) -std=gnu++11
+
-include $(OUTPUT)*.d

###############################
diff --git a/tools/build/feature/test-cxx.cpp b/tools/build/feature/test-cxx.cpp
new file mode 100644
index 000000000000..b1dee9a31d6c
--- /dev/null
+++ b/tools/build/feature/test-cxx.cpp
@@ -0,0 +1,15 @@
+#include <iostream>
+#include <memory>
+
+static void print_str(std::string s)
+{
+ std::cout << s << std::endl;
+}
+
+int main()
+{
+ std::string s("Hello World!");
+ print_str(std::move(s));
+ std::cout << "|" << s << "|" << std::endl;
+ return 0;
+}
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 6, 2016, 12:20:08 PM10/6/16
to
From: Wang Nan <wang...@huawei.com>

Add new rule to compile .cpp file to .o use g++. C++ support is required
for built-in clang and LLVM support.

Linker side support will be introduced by following commits.

Signed-off-by: Wang Nan <wang...@huawei.com>
Cc: Alexei Starovoitov <a...@fb.com>
Cc: He Kuang <hek...@huawei.com>
Cc: Jiri Olsa <jo...@kernel.org>
Cc: Zefan Li <liz...@huawei.com>
Cc: pi3o...@163.com
Link: http://lkml.kernel.org/r/1474874832-134786-2-g...@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/build/Build.include | 1 +
tools/build/Makefile.build | 7 +++++++
2 files changed, 8 insertions(+)

diff --git a/tools/build/Build.include b/tools/build/Build.include
index 02489380d79b..1dcb95e76f70 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -90,6 +90,7 @@ if_changed = $(if $(strip $(any-prereq) $(arg-check)), \
# - per object C flags
# - BUILD_STR macro to allow '-D"$(variable)"' constructs
c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj))
+cxx_flags = -Wp,-MD,$(depfile),-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj))

###
## HOSTCC C flags
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index 190519a94ce5..99c0ccd2f176 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -61,6 +61,9 @@ quiet_cmd_cc_o_c = CC $@
quiet_cmd_host_cc_o_c = HOSTCC $@
cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $<

+quiet_cmd_cxx_o_c = CXX $@
+ cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $<
+
quiet_cmd_cpp_i_c = CPP $@
cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $<

@@ -88,6 +91,10 @@ $(OUTPUT)%.o: %.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,$(host)cc_o_c)

+$(OUTPUT)%.o: %.cpp FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cxx_o_c)
+
$(OUTPUT)%.o: %.S FORCE
$(call rule_mkdir)
$(call if_changed_dep,$(host)cc_o_c)
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 6, 2016, 12:20:13 PM10/6/16
to
From: Arnaldo Carvalho de Melo <ac...@redhat.com>

Commit 9a6fb28a355d ("x86/mce: Improve memcpy_mcsafe()") renames
memcpy_mcsafe() to memcpy_mcsafe_unrolled(), making
tools/arch/x86/lib/memcpy_64.S drift from the its kernel counterpart,
triggering this warning in the perf build:

Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel

Sync that copy to acknowledge that, no changes to 'perf bench' are
needed, as this function is not used there.

Cc: Adrian Hunter <adrian...@intel.com>
Cc: David Ahern <dsa...@gmail.com>
Cc: Jiri Olsa <jo...@kernel.org>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Tony Luck <tony...@intel.com>
Cc: Wang Nan <wang...@huawei.com>
Link: http://lkml.kernel.org/n/tip-xfwc1raw8o...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/arch/x86/lib/memcpy_64.S | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 2ec0b0abbfaa..49e6ebac7e73 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -181,11 +181,11 @@ ENDPROC(memcpy_orig)

#ifndef CONFIG_UML
/*
- * memcpy_mcsafe - memory copy with machine check exception handling
+ * memcpy_mcsafe_unrolled - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
-ENTRY(memcpy_mcsafe)
+ENTRY(memcpy_mcsafe_unrolled)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
@@ -273,7 +273,7 @@ ENTRY(memcpy_mcsafe)
.L_done_memcpy_trap:
xorq %rax, %rax
ret
-ENDPROC(memcpy_mcsafe)
+ENDPROC(memcpy_mcsafe_unrolled)

.section .fixup, "ax"
/* Return -EFAULT for any failure */
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 6, 2016, 12:20:14 PM10/6/16
to
From: Adrian Hunter <adrian...@intel.com>

In cycle-accurate mode, timestamps can be calculated from CYC packets.
The decoder also estimates timestamps based on the number of
instructions since the last timestamp. For that to work in
cycle-accurate mode, the instruction count needs to be reset to zero
when a timestamp is calculated from a CYC packet, but that wasn't
happening, so fix it.

Signed-off-by: Adrian Hunter <adrian...@intel.com>
Cc: Jiri Olsa <jo...@redhat.com>
Cc: sta...@vger.kernel.org # v4.3+
Link: http://lkml.kernel.org/r/1475062896-22274-1-git-...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 7591a0c37473..3d1d446f037f 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -1353,6 +1353,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
timestamp, decoder->timestamp);
else
decoder->timestamp = timestamp;
+
+ decoder->timestamp_insn_cnt = 0;
}

/* Walk PSB+ packets when already in sync. */
--
2.7.4

Ingo Molnar

unread,
Oct 6, 2016, 6:40:06 PM10/6/16
to

* Arnaldo Carvalho de Melo <ac...@kernel.org> wrote:

tip-bot for Arnaldo Carvalho de Melo

unread,
Oct 6, 2016, 6:50:05 PM10/6/16
to
Commit-ID: d23e354fe58aada6d1cdeeb7e8463b75d44bc687
Gitweb: http://git.kernel.org/tip/d23e354fe58aada6d1cdeeb7e8463b75d44bc687
Author: Arnaldo Carvalho de Melo <ac...@redhat.com>
AuthorDate: Wed, 5 Oct 2016 19:12:46 -0300
Committer: Arnaldo Carvalho de Melo <ac...@redhat.com>
CommitDate: Wed, 5 Oct 2016 19:12:46 -0300

perf bench mem: Sync memcpy assembly sources with the kernel

Commit 9a6fb28a355d ("x86/mce: Improve memcpy_mcsafe()") renames
memcpy_mcsafe() to memcpy_mcsafe_unrolled(), making
tools/arch/x86/lib/memcpy_64.S drift from the its kernel counterpart,
triggering this warning in the perf build:

Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel

Sync that copy to acknowledge that, no changes to 'perf bench' are
needed, as this function is not used there.

Cc: Adrian Hunter <adrian...@intel.com>
Cc: David Ahern <dsa...@gmail.com>
Cc: Jiri Olsa <jo...@kernel.org>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Tony Luck <tony...@intel.com>
Cc: Wang Nan <wang...@huawei.com>
Link: http://lkml.kernel.org/n/tip-xfwc1raw8o...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/arch/x86/lib/memcpy_64.S | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 2ec0b0abb..49e6eba 100644

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:06 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

It is to be displayed in the main cachelines overall output:

percent_hitm

It displays HITMs percentage for cacheline.

It counts remote HITMs at the moment, but it is changed later to support
local as well, based on the sort configuration.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-czd17qsh5u...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 91 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 2411fe025bc7..dd356d88285c 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -565,6 +565,87 @@ tot_loads_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
return tot_recs_left - tot_recs_right;
}

+typedef double (get_percent_cb)(struct c2c_hist_entry *);
+
+static int
+percent_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he, get_percent_cb get_percent)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ per = get_percent(c2c_he);
+
+ return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, per);
+}
+
+static double percent_hitm(struct c2c_hist_entry *c2c_he)
+{
+ struct c2c_hists *hists;
+ struct c2c_stats *stats;
+ struct c2c_stats *total;
+ int tot, st;
+ double p;
+
+ hists = container_of(c2c_he->he.hists, struct c2c_hists, hists);
+ stats = &c2c_he->stats;
+ total = &hists->stats;
+
+ st = stats->rmt_hitm;
+ tot = total->rmt_hitm;
+
+ p = tot ? (double) st / tot : 0;
+
+ return 100 * p;
+}
+
+#define PERC_STR(__s, __v) \
+({ \
+ scnprintf(__s, sizeof(__s), "%.2F%%", __v); \
+ __s; \
+})
+
+static int
+percent_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[10];
+ double per;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ per = percent_hitm(c2c_he);
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_hitm);
+}
+
+static int64_t
+percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ struct c2c_hist_entry *c2c_left;
+ struct c2c_hist_entry *c2c_right;
+ double per_left;
+ double per_right;
+
+ c2c_left = container_of(left, struct c2c_hist_entry, he);
+ c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+ per_left = percent_hitm(c2c_left);
+ per_right = percent_hitm(c2c_right);
+
+ return per_left - per_right;
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -768,6 +849,15 @@ static struct c2c_dimension dim_tot_loads = {
.width = 7,
};

+static struct c2c_dimension dim_percent_hitm = {
+ .header = HEADER_LOW("%hitm"),
+ .name = "percent_hitm",
+ .cmp = percent_hitm_cmp,
+ .entry = percent_hitm_entry,
+ .color = percent_hitm_color,
+ .width = 7,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -790,6 +880,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_ld_llcmiss,
&dim_tot_recs,
&dim_tot_loads,
+ &dim_percent_hitm,
NULL,
};

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:06 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

They are to be displayed in the single cacheline output:

percent_rmt_hitm, percent_lcl_hitm, percent_stores_l1hit, percent_stores_l1miss

They display percentage of HITMs/stores for specific offset in the
cacheline.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-t365aosxtd...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 202 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index dd356d88285c..bf4859fecc19 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -646,6 +646,167 @@ percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
return per_left - per_right;
}

+static struct c2c_stats *he_stats(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ return &c2c_he->stats;
+}
+
+static struct c2c_stats *total_stats(struct hist_entry *he)
+{
+ struct c2c_hists *hists;
+
+ hists = container_of(he->hists, struct c2c_hists, hists);
+ return &hists->stats;
+}
+
+static double percent(int st, int tot)
+{
+ return tot ? 100. * (double) st / (double) tot : 0;
+}
+
+#define PERCENT(__h, __f) percent(he_stats(__h)->__f, total_stats(__h)->__f)
+
+#define PERCENT_FN(__f) \
+static double percent_ ## __f(struct c2c_hist_entry *c2c_he) \
+{ \
+ struct c2c_hists *hists; \
+ \
+ hists = container_of(c2c_he->he.hists, struct c2c_hists, hists); \
+ return percent(c2c_he->stats.__f, hists->stats.__f); \
+}
+
+PERCENT_FN(rmt_hitm)
+PERCENT_FN(lcl_hitm)
+PERCENT_FN(st_l1hit)
+PERCENT_FN(st_l1miss)
+
+static int
+percent_rmt_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per = PERCENT(he, rmt_hitm);
+ char buf[10];
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_rmt_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_rmt_hitm);
+}
+
+static int64_t
+percent_rmt_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ double per_left;
+ double per_right;
+
+ per_left = PERCENT(left, lcl_hitm);
+ per_right = PERCENT(right, lcl_hitm);
+
+ return per_left - per_right;
+}
+
+static int
+percent_lcl_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per = PERCENT(he, lcl_hitm);
+ char buf[10];
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_lcl_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_lcl_hitm);
+}
+
+static int64_t
+percent_lcl_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ double per_left;
+ double per_right;
+
+ per_left = PERCENT(left, lcl_hitm);
+ per_right = PERCENT(right, lcl_hitm);
+
+ return per_left - per_right;
+}
+
+static int
+percent_stores_l1hit_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per = PERCENT(he, st_l1hit);
+ char buf[10];
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_stores_l1hit_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_st_l1hit);
+}
+
+static int64_t
+percent_stores_l1hit_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ double per_left;
+ double per_right;
+
+ per_left = PERCENT(left, st_l1hit);
+ per_right = PERCENT(right, st_l1hit);
+
+ return per_left - per_right;
+}
+
+static int
+percent_stores_l1miss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per = PERCENT(he, st_l1miss);
+ char buf[10];
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_stores_l1miss_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_st_l1miss);
+}
+
+static int64_t
+percent_stores_l1miss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ double per_left;
+ double per_right;
+
+ per_left = PERCENT(left, st_l1miss);
+ per_right = PERCENT(right, st_l1miss);
+
+ return per_left - per_right;
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -858,6 +1019,42 @@ static struct c2c_dimension dim_percent_hitm = {
.width = 7,
};

+static struct c2c_dimension dim_percent_rmt_hitm = {
+ .header = HEADER_SPAN("----- HITM -----", "Rmt", 1),
+ .name = "percent_rmt_hitm",
+ .cmp = percent_rmt_hitm_cmp,
+ .entry = percent_rmt_hitm_entry,
+ .color = percent_rmt_hitm_color,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_percent_lcl_hitm = {
+ .header = HEADER_SPAN_LOW("Lcl"),
+ .name = "percent_lcl_hitm",
+ .cmp = percent_lcl_hitm_cmp,
+ .entry = percent_lcl_hitm_entry,
+ .color = percent_lcl_hitm_color,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_percent_stores_l1hit = {
+ .header = HEADER_SPAN("-- Store Refs --", "L1 Hit", 1),
+ .name = "percent_stores_l1hit",
+ .cmp = percent_stores_l1hit_cmp,
+ .entry = percent_stores_l1hit_entry,
+ .color = percent_stores_l1hit_color,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_percent_stores_l1miss = {
+ .header = HEADER_SPAN_LOW("L1 Miss"),
+ .name = "percent_stores_l1miss",
+ .cmp = percent_stores_l1miss_cmp,
+ .entry = percent_stores_l1miss_entry,
+ .color = percent_stores_l1miss_color,
+ .width = 7,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -881,6 +1078,10 @@ static struct c2c_dimension *dimensions[] = {
&dim_tot_recs,
&dim_tot_loads,
&dim_percent_hitm,
+ &dim_percent_rmt_hitm,
+ &dim_percent_lcl_hitm,
+ &dim_percent_stores_l1hit,
+ &dim_percent_stores_l1miss,
NULL,
};

@@ -968,6 +1169,7 @@ static struct c2c_fmt *get_format(const char *name)

fmt->cmp = dim->se ? c2c_se_cmp : dim->cmp;
fmt->sort = dim->se ? c2c_se_cmp : dim->cmp;
+ fmt->color = dim->se ? NULL : dim->color;
fmt->entry = dim->se ? c2c_se_entry : dim->entry;
fmt->header = c2c_header;
fmt->width = c2c_width;
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:07 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

It is to be displayed in the main cachelines overall output:

ld_llcmiss

It displays bare number of LLC misses for cacheline.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-wojujik7zz...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 47 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 6b601836b031..f525384dbbad 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -421,6 +421,44 @@ STAT_FN(ld_l2hit)
STAT_FN(ld_llchit)
STAT_FN(rmt_hit)

+static uint64_t llc_miss(struct c2c_stats *stats)
+{
+ uint64_t llcmiss;
+
+ llcmiss = stats->lcl_dram +
+ stats->rmt_dram +
+ stats->rmt_hitm +
+ stats->rmt_hit;
+
+ return llcmiss;
+}
+
+static int
+ld_llcmiss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+ return scnprintf(hpp->buf, hpp->size, "%*lu", width,
+ llc_miss(&c2c_he->stats));
+}
+
+static int64_t
+ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ struct c2c_hist_entry *c2c_left;
+ struct c2c_hist_entry *c2c_right;
+
+ c2c_left = container_of(left, struct c2c_hist_entry, he);
+ c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+ return llc_miss(&c2c_left->stats) - llc_miss(&c2c_right->stats);
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -600,6 +638,14 @@ static struct c2c_dimension dim_ld_rmthit = {
.width = 8,
};

+static struct c2c_dimension dim_ld_llcmiss = {
+ .header = HEADER_BOTH("LLC", "Ld Miss"),
+ .name = "ld_llcmiss",
+ .cmp = ld_llcmiss_cmp,
+ .entry = ld_llcmiss_entry,
+ .width = 7,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -619,6 +665,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_ld_l2hit,
&dim_ld_llchit,
&dim_ld_rmthit,
+ &dim_ld_llcmiss,
NULL,
};

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:07 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

The width of symbol and source line entries could get really long
and not convenient to display. Adding support to display only
patrt of such strings and possibility to switch to full length
by uing --full-symbols option or 's' key in TUI browser.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-yxf5hfteyf...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 33 ++++++++++++++++++++++++++++++++-
1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 430360e618b0..00d1620dc2bf 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -62,6 +62,7 @@ struct perf_c2c {
bool show_src;
bool use_stdio;
bool stats_only;
+ bool symbol_full;

/* HITM shared clines stats */
struct c2c_stats hitm_stats;
@@ -336,6 +337,21 @@ struct c2c_fmt {
struct c2c_dimension *dim;
};

+#define SYMBOL_WIDTH 30
+
+static struct c2c_dimension dim_symbol;
+static struct c2c_dimension dim_srcline;
+
+static int symbol_width(struct hists *hists, struct sort_entry *se)
+{
+ int width = hists__col_len(hists, se->se_width_idx);
+
+ if (!c2c.symbol_full)
+ width = MIN(width, SYMBOL_WIDTH);
+
+ return width;
+}
+
static int c2c_width(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp __maybe_unused,
struct hists *hists __maybe_unused)
@@ -346,6 +362,9 @@ static int c2c_width(struct perf_hpp_fmt *fmt,
c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
dim = c2c_fmt->dim;

+ if (dim == &dim_symbol || dim == &dim_srcline)
+ return symbol_width(hists, dim->se);
+
return dim->se ? hists__col_len(hists, dim->se->se_width_idx) :
c2c_fmt->dim->width;
}
@@ -1563,9 +1582,13 @@ static int c2c_se_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct c2c_dimension *dim = c2c_fmt->dim;
size_t len = fmt->user_len;

- if (!len)
+ if (!len) {
len = hists__col_len(he->hists, dim->se->se_width_idx);

+ if (dim == &dim_symbol || dim == &dim_srcline)
+ len = symbol_width(he->hists, dim->se);
+ }
+
return dim->se->se_snprintf(he, hpp->buf, hpp->size, len);
}

@@ -2159,6 +2182,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
struct hist_browser *browser;
int key = -1;

+ /* Display compact version first. */
+ c2c.symbol_full = false;
+
c2c_he = container_of(he, struct c2c_hist_entry, he);
c2c_hists = c2c_he->hists;

@@ -2178,6 +2204,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
key = hist_browser__run(browser, "help");

switch (key) {
+ case 's':
+ c2c.symbol_full = !c2c.symbol_full;
+ break;
case 'q':
goto out;
default:
@@ -2449,6 +2478,8 @@ static int perf_c2c__report(int argc, const char **argv)
#endif
OPT_BOOLEAN(0, "stats", &c2c.stats_only,
"Use the stdio interface"),
+ OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full,
+ "Display full length of symbols"),
OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
"print_type,threshold[,print_limit],order,sort_key[,branch],value",
callchain_help, &parse_callchain_opt,
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:07 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Display global stats table as part of the stdio output
or when --stats option is speicified:

$ perf c2c report --stats
=================================================
Trace Event Information
=================================================
Total records : 41237
Locked Load/Store Operations : 4075
Load Operations : 20526
Loads - uncacheable : 0
Loads - IO : 0
Loads - Miss : 552
Loads - no mapping : 31
Load Fill Buffer Hit : 7333
Load L1D hit : 6398
Load L2D hit : 144
Load LLC hit : 4889
Load Local HITM : 1185
Load Remote HITM : 838
Load Remote HIT : 52
Load Local DRAM : 183
Load Remote DRAM : 106
Load MESI State Exclusive : 289
Load MESI State Shared : 0
Load LLC Misses : 1179
LLC Misses to Local DRAM : 15.5%
LLC Misses to Remote DRAM : 9.0%
LLC Misses to Remote cache (HIT) : 4.4%
LLC Misses to Remote cache (HITM) : 71.1%
Store Operations : 20711
Store - uncacheable : 0
Store - no mapping : 1
Store L1D Hit : 20158
Store L1D Miss : 552
No Page Map Rejects : 7
Unable to parse data source : 0

Original-patch-by: Dick Fowles <rfo...@redhat.com>
Original-patch-by: Don Zickus <dzi...@redhat.com>
Signed-off-by: Jiri Olsa <jo...@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-qkyvao3qsr...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 56 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 2b99d1273024..d365902acf61 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -55,6 +55,7 @@ struct perf_c2c {

bool show_src;
bool use_stdio;
+ bool stats_only;
};

static struct perf_c2c c2c;
@@ -1731,6 +1732,51 @@ static int setup_nodes(struct perf_session *session)
return 0;
}

+static void print_c2c__display_stats(FILE *out)
+{
+ int llc_misses;
+ struct c2c_stats *stats = &c2c.hists.stats;
+
+ llc_misses = stats->lcl_dram +
+ stats->rmt_dram +
+ stats->rmt_hit +
+ stats->rmt_hitm;
+
+ fprintf(out, "=================================================\n");
+ fprintf(out, " Trace Event Information \n");
+ fprintf(out, "=================================================\n");
+ fprintf(out, " Total records : %10d\n", stats->nr_entries);
+ fprintf(out, " Locked Load/Store Operations : %10d\n", stats->locks);
+ fprintf(out, " Load Operations : %10d\n", stats->load);
+ fprintf(out, " Loads - uncacheable : %10d\n", stats->ld_uncache);
+ fprintf(out, " Loads - IO : %10d\n", stats->ld_io);
+ fprintf(out, " Loads - Miss : %10d\n", stats->ld_miss);
+ fprintf(out, " Loads - no mapping : %10d\n", stats->ld_noadrs);
+ fprintf(out, " Load Fill Buffer Hit : %10d\n", stats->ld_fbhit);
+ fprintf(out, " Load L1D hit : %10d\n", stats->ld_l1hit);
+ fprintf(out, " Load L2D hit : %10d\n", stats->ld_l2hit);
+ fprintf(out, " Load LLC hit : %10d\n", stats->ld_llchit + stats->lcl_hitm);
+ fprintf(out, " Load Local HITM : %10d\n", stats->lcl_hitm);
+ fprintf(out, " Load Remote HITM : %10d\n", stats->rmt_hitm);
+ fprintf(out, " Load Remote HIT : %10d\n", stats->rmt_hit);
+ fprintf(out, " Load Local DRAM : %10d\n", stats->lcl_dram);
+ fprintf(out, " Load Remote DRAM : %10d\n", stats->rmt_dram);
+ fprintf(out, " Load MESI State Exclusive : %10d\n", stats->ld_excl);
+ fprintf(out, " Load MESI State Shared : %10d\n", stats->ld_shared);
+ fprintf(out, " Load LLC Misses : %10d\n", llc_misses);
+ fprintf(out, " LLC Misses to Local DRAM : %10.1f%%\n", ((double)stats->lcl_dram/(double)llc_misses) * 100.);
+ fprintf(out, " LLC Misses to Remote DRAM : %10.1f%%\n", ((double)stats->rmt_dram/(double)llc_misses) * 100.);
+ fprintf(out, " LLC Misses to Remote cache (HIT) : %10.1f%%\n", ((double)stats->rmt_hit /(double)llc_misses) * 100.);
+ fprintf(out, " LLC Misses to Remote cache (HITM) : %10.1f%%\n", ((double)stats->rmt_hitm/(double)llc_misses) * 100.);
+ fprintf(out, " Store Operations : %10d\n", stats->store);
+ fprintf(out, " Store - uncacheable : %10d\n", stats->st_uncache);
+ fprintf(out, " Store - no mapping : %10d\n", stats->st_noadrs);
+ fprintf(out, " Store L1D Hit : %10d\n", stats->st_l1hit);
+ fprintf(out, " Store L1D Miss : %10d\n", stats->st_l1miss);
+ fprintf(out, " No Page Map Rejects : %10d\n", stats->nomap);
+ fprintf(out, " Unable to parse data source : %10d\n", stats->noparse);
+}
+
static void print_cacheline(struct c2c_hists *c2c_hists,
struct hist_entry *he_cl,
struct perf_hpp_list *hpp_list,
@@ -1794,6 +1840,11 @@ static void perf_c2c__hists_fprintf(FILE *out)
{
setup_pager();

+ print_c2c__display_stats(out);
+
+ if (c2c.stats_only)
+ return;
+
fprintf(out, "\n");
fprintf(out, "=================================================\n");
fprintf(out, " Shared Data Cache Line Table \n");
@@ -2005,6 +2056,8 @@ static int perf_c2c__report(int argc, const char **argv)
#ifdef HAVE_SLANG_SUPPORT
OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"),
#endif
+ OPT_BOOLEAN(0, "stats", &c2c.stats_only,
+ "Use the stdio interface"),
OPT_END()
};
int err = 0;
@@ -2014,6 +2067,9 @@ static int perf_c2c__report(int argc, const char **argv)
if (argc)
usage_with_options(report_c2c_usage, c2c_options);

+ if (c2c.stats_only)
+ c2c.use_stdio = true;
+
if (c2c.use_stdio)
use_browser = 0;
else
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:07 PM10/11/16
to
From: Andi Kleen <a...@linux.intel.com>

Vendor events are often specified in upper case. perf list outputs them
in lower case. Handle this case in perf-completion.sh so that completion
on the upper case events still works.

v2: Use locale aware check for upper case
v3: Use perf list json

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Jiri Olsa <jo...@kernel.org>
Link: http://lkml.kernel.org/r/1475781807-6034-2-...@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/perf-completion.sh | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh
index 3ba80b2359cc..3b5a258a4b7b 100644
--- a/tools/perf/perf-completion.sh
+++ b/tools/perf/perf-completion.sh
@@ -161,7 +161,11 @@ __perf_main ()
# List possible events for -e option
elif [[ $prev == @("-e"|"--event") &&
$prev_skip_opts == @(record|stat|top) ]]; then
- evts=$($cmd list --raw-dump)
+ # handle upper case events
+ case "$cur" in
+ [[:upper:]]*) evts=$($cmd list --raw-dump json | tr a-z A-Z) ;;
+ *) evts=$($cmd list --raw-dump) ;;
+ esac
__perfcomp_colon "$evts" "$cur"
else
# List subcommands for perf commands
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:07 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Sukadev reported segfault on releasing perf env's numa data. It's due
to nr_numa_nodes being set no matter if the numa data gets parsed
properly. The perf_env__exit crash the on releasing non existed data.

Setting nr_numa_nodes only when data are parsed out properly.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Reported-by: Sukadev Bhattiprolu <suk...@linux.vnet.ibm.com>
Cc: Adrian Hunter <adrian...@intel.com>
Cc: David Ahern <dsa...@gmail.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Wang Nan <wang...@huawei.com>
Link: http://lkml.kernel.org/n/tip-dt9c0zgkt4...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/header.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 43ded20f1edf..d89c9c7ef4e5 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1895,7 +1895,6 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse
if (ph->needs_swap)
nr = bswap_32(nr);

- ph->env.nr_numa_nodes = nr;
nodes = zalloc(sizeof(*nodes) * nr);
if (!nodes)
return -ENOMEM;
@@ -1932,6 +1931,7 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse

free(str);
}
+ ph->env.nr_numa_nodes = nr;
ph->env.numa_nodes = nodes;
return 0;

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:07 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

It is to be displayed in the single cacheline output:

median, mean_rmt, mean_lcl, mean_load, stddev

It displays statistics hits related to cacheline accesses.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-m1r4uc9lcy...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 80 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index ca2f37479e6d..043344a720bf 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -20,11 +20,20 @@ struct c2c_hists {
struct c2c_stats stats;
};

+struct compute_stats {
+ struct stats lcl_hitm;
+ struct stats rmt_hitm;
+ struct stats load;
+};
+
struct c2c_hist_entry {
struct c2c_hists *hists;
struct c2c_stats stats;
unsigned long *cpuset;
struct c2c_stats *node_stats;
+
+ struct compute_stats cstats;
+
/*
* must be at the end,
* because of its callchain dynamic entry
@@ -61,6 +70,10 @@ static void *c2c_he_zalloc(size_t size)
if (!c2c_he->node_stats)
return NULL;

+ init_stats(&c2c_he->cstats.lcl_hitm);
+ init_stats(&c2c_he->cstats.rmt_hitm);
+ init_stats(&c2c_he->cstats.load);
+
return &c2c_he->he;
}

@@ -122,6 +135,20 @@ static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
set_bit(sample->cpu, c2c_he->cpuset);
}

+static void compute_stats(struct c2c_hist_entry *c2c_he,
+ struct c2c_stats *stats,
+ u64 weight)
+{
+ struct compute_stats *cstats = &c2c_he->cstats;
+
+ if (stats->rmt_hitm)
+ update_stats(&cstats->rmt_hitm, weight);
+ else if (stats->lcl_hitm)
+ update_stats(&cstats->lcl_hitm, weight);
+ else if (stats->load)
+ update_stats(&cstats->load, weight);
+}
+
static int process_sample_event(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -200,6 +227,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
c2c_add_stats(&c2c_hists->stats, &stats);
c2c_add_stats(&c2c_he->node_stats[node], &stats);

+ compute_stats(c2c_he, &stats, sample->weight);
+
c2c_he__set_cpu(c2c_he, sample);

hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
@@ -962,6 +991,30 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
return 0;
}

+static int
+mean_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he, double mean)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[10];
+
+ scnprintf(buf, 10, "%6.0f", mean);
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+#define MEAN_ENTRY(__func, __val) \
+static int \
+__func(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ struct c2c_hist_entry *c2c_he; \
+ c2c_he = container_of(he, struct c2c_hist_entry, he); \
+ return mean_entry(fmt, hpp, he, avg_stats(&c2c_he->cstats.__val)); \
+}
+
+MEAN_ENTRY(mean_rmt_entry, rmt_hitm);
+MEAN_ENTRY(mean_lcl_entry, lcl_hitm);
+MEAN_ENTRY(mean_load_entry, load);
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -1264,6 +1317,30 @@ static struct c2c_dimension dim_node = {
.width = 4,
};

+static struct c2c_dimension dim_mean_rmt = {
+ .header = HEADER_SPAN("---------- cycles ----------", "rmt hitm", 2),
+ .name = "mean_rmt",
+ .cmp = empty_cmp,
+ .entry = mean_rmt_entry,
+ .width = 8,
+};
+
+static struct c2c_dimension dim_mean_lcl = {
+ .header = HEADER_SPAN_LOW("lcl hitm"),
+ .name = "mean_lcl",
+ .cmp = empty_cmp,
+ .entry = mean_lcl_entry,
+ .width = 8,
+};
+
+static struct c2c_dimension dim_mean_load = {
+ .header = HEADER_SPAN_LOW("load"),
+ .name = "mean_load",
+ .cmp = empty_cmp,
+ .entry = mean_load_entry,
+ .width = 8,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -1298,6 +1375,9 @@ static struct c2c_dimension *dimensions[] = {
&dim_symbol,
&dim_dso,
&dim_node,
+ &dim_mean_rmt,
+ &dim_mean_lcl,
+ &dim_mean_load,
NULL,
};

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:08 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Adding c2c command base wirings. Its implementation is going to be added
gradually in following patches.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-11-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/Build | 1 +
tools/perf/builtin-c2c.c | 23 +++++++++++++++++++++++
tools/perf/builtin.h | 1 +
tools/perf/perf.c | 1 +
4 files changed, 26 insertions(+)
create mode 100644 tools/perf/builtin-c2c.c

diff --git a/tools/perf/Build b/tools/perf/Build
index a43fae7f439a..b12d5d1666e3 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -21,6 +21,7 @@ perf-y += builtin-inject.o
perf-y += builtin-mem.o
perf-y += builtin-data.o
perf-y += builtin-version.o
+perf-y += builtin-c2c.o

perf-$(CONFIG_AUDIT) += builtin-trace.o
perf-$(CONFIG_LIBELF) += builtin-probe.o
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
new file mode 100644
index 000000000000..8252ed0ba5d0
--- /dev/null
+++ b/tools/perf/builtin-c2c.c
@@ -0,0 +1,23 @@
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include "util.h"
+#include "debug.h"
+#include "builtin.h"
+#include <subcmd/parse-options.h>
+
+static const char * const c2c_usage[] = {
+ "perf c2c",
+ NULL
+};
+
+int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+ const struct option c2c_options[] = {
+ OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+ OPT_END()
+ };
+
+ argc = parse_options(argc, argv, c2c_options, c2c_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ return 0;
+}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 41c24010ab43..0bcf68e98ccc 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -18,6 +18,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix);
int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
int cmd_buildid_list(int argc, const char **argv, const char *prefix);
int cmd_config(int argc, const char **argv, const char *prefix);
+int cmd_c2c(int argc, const char **argv, const char *prefix);
int cmd_diff(int argc, const char **argv, const char *prefix);
int cmd_evlist(int argc, const char **argv, const char *prefix);
int cmd_help(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 64c06961bfe4..aa23b3347d6b 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -43,6 +43,7 @@ static struct cmd_struct commands[] = {
{ "buildid-cache", cmd_buildid_cache, 0 },
{ "buildid-list", cmd_buildid_list, 0 },
{ "config", cmd_config, 0 },
+ { "c2c", cmd_c2c, 0 },
{ "diff", cmd_diff, 0 },
{ "evlist", cmd_evlist, 0 },
{ "help", cmd_help, 0 },
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:08 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

It is to be displayed in the single cacheline output:

node

It displays nodes hits related to cacheline accesses.

The node filed comes in 3 flavors:
- node IDs separated by ','
- node IDs with stats for each ID, in following format:
Node{cpus %hitms %stores}
- node IDs with list of affected CPUs in following format:
Node{cpu list}

User can switch the flavor with -N option (-NN,-NNN).
It will be available in TUI to switch this with 'n' key.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-6742e6g0r7...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 219 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 219 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index ffd41744886e..ca2f37479e6d 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1,6 +1,7 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/stringify.h>
+#include <asm/bug.h>
#include "util.h"
#include "debug.h"
#include "builtin.h"
@@ -22,6 +23,8 @@ struct c2c_hists {
struct c2c_hist_entry {
struct c2c_hists *hists;
struct c2c_stats stats;
+ unsigned long *cpuset;
+ struct c2c_stats *node_stats;
/*
* must be at the end,
* because of its callchain dynamic entry
@@ -32,6 +35,12 @@ struct c2c_hist_entry {
struct perf_c2c {
struct perf_tool tool;
struct c2c_hists hists;
+
+ unsigned long **nodes;
+ int nodes_cnt;
+ int cpus_cnt;
+ int *cpu2node;
+ int node_info;
};

static struct perf_c2c c2c;
@@ -44,6 +53,14 @@ static void *c2c_he_zalloc(size_t size)
if (!c2c_he)
return NULL;

+ c2c_he->cpuset = bitmap_alloc(c2c.cpus_cnt);
+ if (!c2c_he->cpuset)
+ return NULL;
+
+ c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats));
+ if (!c2c_he->node_stats)
+ return NULL;
+
return &c2c_he->he;
}

@@ -57,6 +74,8 @@ static void c2c_he_free(void *he)
free(c2c_he->hists);
}

+ free(c2c_he->cpuset);
+ free(c2c_he->node_stats);
free(c2c_he);
}

@@ -93,6 +112,16 @@ he__get_c2c_hists(struct hist_entry *he,
return hists;
}

+static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
+ struct perf_sample *sample)
+{
+ if (WARN_ONCE(sample->cpu == (unsigned int) -1,
+ "WARNING: no sample cpu value"))
+ return;
+
+ set_bit(sample->cpu, c2c_he->cpuset);
+}
+
static int process_sample_event(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -133,10 +162,23 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
c2c_add_stats(&c2c_he->stats, &stats);
c2c_add_stats(&c2c_hists->stats, &stats);

+ c2c_he__set_cpu(c2c_he, sample);
+
hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
ret = hist_entry__append_callchain(he, sample);

if (!ret) {
+ /*
+ * There's already been warning about missing
+ * sample's cpu value. Let's account all to
+ * node 0 in this case, without any further
+ * warning.
+ *
+ * Doing node stats only for single callchain data.
+ */
+ int cpu = sample->cpu == (unsigned int) -1 ? 0 : sample->cpu;
+ int node = c2c.cpu2node[cpu];
+
mi = mi_dup;

mi_dup = memdup(mi, sizeof(*mi));
@@ -156,6 +198,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
c2c_he = container_of(he, struct c2c_hist_entry, he);
c2c_add_stats(&c2c_he->stats, &stats);
c2c_add_stats(&c2c_hists->stats, &stats);
+ c2c_add_stats(&c2c_he->node_stats[node], &stats);
+
+ c2c_he__set_cpu(c2c_he, sample);

hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
ret = hist_entry__append_callchain(he, sample);
@@ -826,6 +871,97 @@ pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
return left->thread->pid_ - right->thread->pid_;
}

+static int64_t
+empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left __maybe_unused,
+ struct hist_entry *right __maybe_unused)
+{
+ return 0;
+}
+
+static int
+node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ bool first = true;
+ int node;
+ int ret = 0;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+ for (node = 0; node < c2c.nodes_cnt; node++) {
+ DECLARE_BITMAP(set, c2c.cpus_cnt);
+
+ bitmap_zero(set, c2c.cpus_cnt);
+ bitmap_and(set, c2c_he->cpuset, c2c.nodes[node], c2c.cpus_cnt);
+
+ if (!bitmap_weight(set, c2c.cpus_cnt)) {
+ if (c2c.node_info == 1) {
+ ret = scnprintf(hpp->buf, hpp->size, "%21s", " ");
+ advance_hpp(hpp, ret);
+ }
+ continue;
+ }
+
+ if (!first) {
+ ret = scnprintf(hpp->buf, hpp->size, " ");
+ advance_hpp(hpp, ret);
+ }
+
+ switch (c2c.node_info) {
+ case 0:
+ ret = scnprintf(hpp->buf, hpp->size, "%2d", node);
+ advance_hpp(hpp, ret);
+ break;
+ case 1:
+ {
+ int num = bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt);
+ struct c2c_stats *stats = &c2c_he->node_stats[node];
+
+ ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num);
+ advance_hpp(hpp, ret);
+
+
+ if (c2c_he->stats.rmt_hitm > 0) {
+ ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ",
+ percent(stats->rmt_hitm, c2c_he->stats.rmt_hitm));
+ } else {
+ ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a");
+ }
+
+ advance_hpp(hpp, ret);
+
+ if (c2c_he->stats.store > 0) {
+ ret = scnprintf(hpp->buf, hpp->size, "%5.1f%%}",
+ percent(stats->store, c2c_he->stats.store));
+ } else {
+ ret = scnprintf(hpp->buf, hpp->size, "%6s}", "n/a");
+ }
+
+ advance_hpp(hpp, ret);
+ break;
+ }
+ case 2:
+ ret = scnprintf(hpp->buf, hpp->size, "%2d{", node);
+ advance_hpp(hpp, ret);
+
+ ret = bitmap_scnprintf(set, c2c.cpus_cnt, hpp->buf, hpp->size);
+ advance_hpp(hpp, ret);
+
+ ret = scnprintf(hpp->buf, hpp->size, "}");
+ advance_hpp(hpp, ret);
+ break;
+ default:
+ break;
+ }
+
+ first = false;
+ }
+
+ return 0;
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -1115,6 +1251,19 @@ static struct c2c_dimension dim_dso = {
.se = &sort_dso,
};

+static struct c2c_header header_node[3] = {
+ HEADER_LOW("Node"),
+ HEADER_LOW("Node{cpus %hitms %stores}"),
+ HEADER_LOW("Node{cpu list}"),
+};
+
+static struct c2c_dimension dim_node = {
+ .name = "node",
+ .cmp = empty_cmp,
+ .entry = node_entry,
+ .width = 4,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -1148,6 +1297,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_tid,
&dim_symbol,
&dim_dso,
+ &dim_node,
NULL,
};

@@ -1374,6 +1524,68 @@ static int resort_cl_cb(struct hist_entry *he)
return 0;
}

+static void setup_nodes_header(void)
+{
+ dim_node.header = header_node[c2c.node_info];
+}
+
+static int setup_nodes(struct perf_session *session)
+{
+ struct numa_node *n;
+ unsigned long **nodes;
+ int node, cpu;
+ int *cpu2node;
+
+ if (c2c.node_info > 2)
+ c2c.node_info = 2;
+
+ c2c.nodes_cnt = session->header.env.nr_numa_nodes;
+ c2c.cpus_cnt = session->header.env.nr_cpus_online;
+
+ n = session->header.env.numa_nodes;
+ if (!n)
+ return -EINVAL;
+
+ nodes = zalloc(sizeof(unsigned long *) * c2c.nodes_cnt);
+ if (!nodes)
+ return -ENOMEM;
+
+ c2c.nodes = nodes;
+
+ cpu2node = zalloc(sizeof(int) * c2c.cpus_cnt);
+ if (!cpu2node)
+ return -ENOMEM;
+
+ for (cpu = 0; cpu < c2c.cpus_cnt; cpu++)
+ cpu2node[cpu] = -1;
+
+ c2c.cpu2node = cpu2node;
+
+ for (node = 0; node < c2c.nodes_cnt; node++) {
+ struct cpu_map *map = n[node].map;
+ unsigned long *set;
+
+ set = bitmap_alloc(c2c.cpus_cnt);
+ if (!set)
+ return -ENOMEM;
+
+ for (cpu = 0; cpu < map->nr; cpu++) {
+ set_bit(map->map[cpu], set);
+
+ if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug"))
+ return -EINVAL;
+
+ cpu2node[map->map[cpu]] = node;
+ }
+
+ nodes[node] = set;
+ }
+
+ setup_nodes_header();
+ return 0;
+}
+
+
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
@@ -1388,6 +1600,8 @@ static int perf_c2c__report(int argc, const char **argv)
"be more verbose (show counter open errors, etc)"),
OPT_STRING('i', "input", &input_name, "file",
"the input file to process"),
+ OPT_INCR('N', "node-info", &c2c.node_info,
+ "show extra node info in report (repeat for more info)"),
OPT_END()
};
int err = 0;
@@ -1413,6 +1627,11 @@ static int perf_c2c__report(int argc, const char **argv)
pr_debug("No memory for session\n");
goto out;
}
+ err = setup_nodes(session);
+ if (err) {
+ pr_err("Failed setup nodes\n");
+ goto out;
+ }

if (symbol__init(&session->header.env) < 0)
goto out_session;
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:08 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Adding simple TUI cacheline browser. It triggers when you press 'd' in
the main browser on the specific cacheline.

It allows to navigate through cacheline's offsets and display callchains
(implemented in following patches).

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-fovjwgyusv...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 81 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c42991664703..2b99d1273024 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1829,6 +1829,84 @@ static void c2c_browser__update_nr_entries(struct hist_browser *hb)
hb->nr_non_filtered_entries = nr_entries;
}

+struct c2c_cacheline_browser {
+ struct hist_browser hb;
+ struct hist_entry *he;
+};
+
+static int
+perf_c2c_cacheline_browser__title(struct hist_browser *browser,
+ char *bf, size_t size)
+{
+ struct c2c_cacheline_browser *cl_browser;
+ struct hist_entry *he;
+ uint64_t addr = 0;
+
+ cl_browser = container_of(browser, struct c2c_cacheline_browser, hb);
+ he = cl_browser->he;
+
+ if (he->mem_info)
+ addr = cl_address(he->mem_info->daddr.addr);
+
+ scnprintf(bf, size, "Cacheline 0x%lx", addr);
+ return 0;
+}
+
+static struct c2c_cacheline_browser*
+c2c_cacheline_browser__new(struct hists *hists, struct hist_entry *he)
+{
+ struct c2c_cacheline_browser *browser;
+
+ browser = zalloc(sizeof(*browser));
+ if (browser) {
+ hist_browser__init(&browser->hb, hists);
+ browser->hb.c2c_filter = true;
+ browser->hb.title = perf_c2c_cacheline_browser__title;
+ browser->he = he;
+ }
+
+ return browser;
+}
+
+static int perf_c2c__browse_cacheline(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ struct c2c_hists *c2c_hists;
+ struct c2c_cacheline_browser *cl_browser;
+ struct hist_browser *browser;
+ int key = -1;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ c2c_hists = c2c_he->hists;
+
+ cl_browser = c2c_cacheline_browser__new(&c2c_hists->hists, he);
+ if (cl_browser == NULL)
+ return -1;
+
+ browser = &cl_browser->hb;
+
+ /* reset abort key so that it can get Ctrl-C as a key */
+ SLang_reset_tty();
+ SLang_init_tty(0, 0, 0);
+
+ c2c_browser__update_nr_entries(browser);
+
+ while (1) {
+ key = hist_browser__run(browser, "help");
+
+ switch (key) {
+ case 'q':
+ goto out;
+ default:
+ break;
+ }
+ }
+
+out:
+ free(cl_browser);
+ return 0;
+}
+
static int perf_c2c_browser__title(struct hist_browser *browser,
char *bf, size_t size)
{
@@ -1872,6 +1950,9 @@ static int perf_c2c__hists_browse(struct hists *hists)
switch (key) {
case 'q':
goto out;
+ case 'd':
+ perf_c2c__browse_cacheline(browser->he_selection);
+ break;
default:
break;
}
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:09 PM10/11/16
to
From: Arnaldo Carvalho de Melo <ac...@redhat.com>

Hi Ingo,

Please consider pulling,

- Arnaldo

Build and test stats at the end of the message.

The following changes since commit c68306ce20ad03ce655a367fc33ad06e12bb87a6:

Merge tag 'perf-core-for-mingo-20161005' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent (2016-10-07 00:36:49 +0200)

are available in the git repository at:

git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git tags/perf-core-for-mingo-20161011

for you to fetch changes up to 193b29e31a5cfec42790a59fc453359bb6ee0ea1:

perf jevents: Handle events including .c and .o (2016-10-11 12:34:39 -0300)

----------------------------------------------------------------
perf/core improvements and fixes:

New features:

- The 'perf c2c' tool provides means for Shared Data C2C/HITM analysis.
It allows you to track down cacheline contention. The tool is based
on x86's load latency and precise store facility events provided by
Intel CPUs.

It was tested by Joe Mario and has proven to be useful, finding som
cacheline contentions. Joe also wrote a blog about c2c tool with
examples:

https://joemario.github.io/blog/2016/09/01/c2c-blog/

There one finds extensive details on using the tool, with tips on
reducing the volume of samples while still capturing enough to do
its job. (Dick Fowles, Joe Mario, Don Zickus, Jiri Olsa)

- Add support in 'perf list' to show only events in vendor notation,
built from JSON (Andi Kleen)

- Handle completion of upper case events, as users of the JSON events
are used to. Using it as lowercase also works. (Andi Kleen)

- Report Intel-PT/BTS instruction bytes in 'perf script' (Andi Kleen)

Fixes:

- Fix handling of numa nodes in perf.data files (Jiri Olsa)

- Fix scrolling when refreshing 'perf top --tui --hierarchy' entries (Namhyung Kim)

- Fix handling of events including .c and .o, that were being treated as
BPF scripts instead of JSON ones (Wang Nan)

Infrastructure:

- Sync copy of x86's syscall table (Arnaldo Carvalho de Melo)

- prep work for making libtraceevent more widely used (Jiri Olsa)

- Show list of features not present in a perf.data file when using
'perf report --header-only', to help with debugging (Jiri Olsa)

- When failing to process a record, show its name, not its number (Jiri Olsa)

Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>

----------------------------------------------------------------
Adrian Hunter (1):
perf intel-pt/bts: Tidy instruction buffer size usage

Andi Kleen (3):
perf list: Add support for listing only json events
perf tools: Handle completion of upper case events
perf intel-pt/bts: Report instruction bytes and length in sample

Arnaldo Carvalho de Melo (1):
perf tools: Sync copy of x86's syscall table

Jiri Olsa (61):
perf c2c: Introduce c2c_decode_stats function
perf c2c: Introduce c2c_add_stats function
perf c2c: Add c2c command
perf c2c: Add record subcommand
perf c2c: Add report subcommand
perf c2c report: Add dimension support
perf c2c report: Add sort_entry dimension support
perf c2c report: Fallback to standard dimensions
perf c2c report: Add sample processing
perf c2c report: Add cacheline hists processing
perf c2c report: Decode c2c_stats for hist entries
perf c2c report: Add header macros
perf c2c report: Add 'dcacheline' dimension key
perf c2c report: Add 'offset' dimension key
perf c2c report: Add 'iaddr' dimension key
perf c2c report: Add hitm related dimension keys
perf c2c report: Add stores related dimension keys
perf c2c report: Add loads related dimension keys
perf c2c report: Add llc and remote loads related dimension keys
perf c2c report: Add llc load miss dimension key
perf c2c report: Add total record sort key
perf c2c report: Add total loads sort key
perf c2c report: Add hitm percent sort key
perf c2c report: Add hitm/store percent related sort keys
perf c2c report: Add dram related sort keys
perf c2c report: Add 'pid' sort key
perf c2c report: Add 'tid' sort key
perf c2c report: Add 'symbol' and 'dso' sort keys
perf c2c report: Add 'node' sort key
perf c2c report: Add stats related sort keys
perf c2c report: Add 'cpucnt' sort key
perf c2c report: Add src line sort key
perf c2c report: Setup number of header lines for hists
perf c2c report: Set final resort fields
perf c2c report: Add stdio output support
perf c2c report: Add main TUI browser
perf c2c report: Add TUI cacheline browser
perf c2c report: Add global stats stdio output
perf c2c report: Add shared cachelines stats stdio output
perf c2c report: Add c2c related stats stdio output
perf c2c report: Allow to report callchains
perf c2c report: Limit the cachelines table entries
perf c2c report: Add support to choose local HITMs
perf c2c report: Allow to set cacheline sort fields
perf c2c report: Recalc width of global sort entries
perf c2c report: Add cacheline index entry
perf c2c report: Add support to manage symbol name length
perf c2c report: Iterate node display in browser
perf c2c report: Add help windows
perf c2c: Add man page and credits
tools lib traceevent: Add install_headers target
tools lib traceevent: Add do_install_mkdir Makefile function
tools lib traceevent: Rename LIB_FILE to LIB_TARGET
tools lib traceevent: Add version for traceevent shared object
tools lib: Add for_each_clear_bit macro
perf report: Move captured info to generic header info
perf header: Display missing features
perf header: Display feature name on write failure
perf header: Set nr_numa_nodes only when we parsed all the data
perf c2c report: Add --no-source option
perf c2c report: Add --show-all option

Namhyung Kim (1):
perf top: Fix refreshing hierarchy entries on TUI

Wang Nan (1):
perf jevents: Handle events including .c and .o

tools/include/asm-generic/bitops.h | 1 +
tools/include/asm-generic/bitops/__ffz.h | 12 +
tools/include/asm-generic/bitops/find.h | 28 +
tools/include/linux/bitops.h | 5 +
tools/lib/find_bit.c | 25 +
tools/lib/traceevent/Makefile | 40 +-
tools/perf/Build | 1 +
tools/perf/Documentation/perf-c2c.txt | 282 ++
tools/perf/Documentation/perf-list.txt | 2 +-
tools/perf/MANIFEST | 1 +
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 4 +-
tools/perf/builtin-c2c.c | 2754 ++++++++++++++++++++
tools/perf/builtin-list.c | 9 +-
tools/perf/builtin.h | 1 +
tools/perf/perf-completion.sh | 6 +-
tools/perf/perf.c | 1 +
tools/perf/ui/browsers/hists.c | 5 +-
tools/perf/ui/browsers/hists.h | 1 +
tools/perf/util/event.h | 3 +
tools/perf/util/header.c | 21 +-
tools/perf/util/hist.c | 1 +
tools/perf/util/hist.h | 1 +
tools/perf/util/intel-bts.c | 9 +-
.../perf/util/intel-pt-decoder/intel-pt-decoder.c | 2 +
.../perf/util/intel-pt-decoder/intel-pt-decoder.h | 1 +
.../util/intel-pt-decoder/intel-pt-insn-decoder.c | 13 +-
.../util/intel-pt-decoder/intel-pt-insn-decoder.h | 6 +-
tools/perf/util/intel-pt-decoder/intel-pt-log.c | 4 +-
tools/perf/util/intel-pt.c | 19 +-
tools/perf/util/mem-events.c | 128 +
tools/perf/util/mem-events.h | 37 +
tools/perf/util/parse-events.c | 2 +-
tools/perf/util/parse-events.l | 4 +-
tools/perf/util/pmu.c | 14 +-
tools/perf/util/pmu.h | 3 +-
tools/perf/util/session.c | 10 -
tools/perf/util/sort.c | 2 +-
tools/perf/util/sort.h | 1 +
38 files changed, 3393 insertions(+), 66 deletions(-)
create mode 100644 tools/include/asm-generic/bitops/__ffz.h
create mode 100644 tools/perf/Documentation/perf-c2c.txt
create mode 100644 tools/perf/builtin-c2c.c

[root@jouet ~]# time dm
1 66.368836810 alpine:3.4: Ok
2 26.154146190 android-ndk:r12b-arm: Ok
3 69.746739126 archlinux:latest: Ok
4 39.624220291 centos:5: Ok
5 58.689782208 centos:6: Ok
6 69.851635081 centos:7: Ok
7 63.079827869 debian:7: Ok
8 68.955435266 debian:8: Ok
9 38.571431258 debian:experimental: Ok
10 69.558879497 fedora:20: Ok
11 73.092759654 fedora:21: Ok
12 72.443082285 fedora:22: Ok
13 72.305159323 fedora:23: Ok
14 77.316048256 fedora:24: Ok
15 32.774333511 fedora:24-x-ARC-uClibc: Ok
16 80.985293289 fedora:rawhide: Ok
17 79.388121697 mageia:5: Ok
18 72.485900821 opensuse:13.2: Ok
19 73.519405793 opensuse:42.1: Ok
20 81.367665352 opensuse:tumbleweed: Ok
21 56.263699207 ubuntu:12.04.5: Ok
22 38.300297066 ubuntu:14.04: Ok
23 68.467777551 ubuntu:14.04.4: Ok
24 70.120014470 ubuntu:15.10: Ok
25 69.392704717 ubuntu:16.04: Ok
26 68.643732518 ubuntu:16.04-x-arm: Ok
27 58.529762081 ubuntu:16.04-x-arm64: Ok
28 57.908570394 ubuntu:16.04-x-powerpc: Ok
29 58.354897750 ubuntu:16.04-x-powerpc64: Ok
30 60.598809333 ubuntu:16.04-x-powerpc64el: Ok
31 58.995355673 ubuntu:16.04-x-s390: Ok
32 74.705277358 ubuntu:16.10: Ok

real 33m47.198s
user 0m2.009s
sys 0m2.429s
[root@jouet ~]#

[acme@jouet linux]$ perf stat make -C tools/perf build-test
make: Entering directory '/home/acme/git/linux/tools/perf'
- tarpkg: ./tests/perf-targz-src-pkg .
make_util_pmu_bison_o_O: make util/pmu-bison.o
make_no_libelf_O: make NO_LIBELF=1
make_pure_O: make
make_no_libbionic_O: make NO_LIBBIONIC=1
make_no_libperl_O: make NO_LIBPERL=1
make_no_demangle_O: make NO_DEMANGLE=1
make_no_gtk2_O: make NO_GTK2=1
make_clean_all_O: make clean all
make_no_slang_O: make NO_SLANG=1
make_no_libnuma_O: make NO_LIBNUMA=1
make_debug_O: make DEBUG=1
make_util_map_o_O: make util/map.o
make_no_libpython_O: make NO_LIBPYTHON=1
make_no_newt_O: make NO_NEWT=1
make_no_scripts_O: make NO_LIBPYTHON=1 NO_LIBPERL=1
make_perf_o_O: make perf.o
make_install_prefix_O: make install prefix=/tmp/krava
make_install_bin_O: make install-bin
make_no_libdw_dwarf_unwind_O: make NO_LIBDW_DWARF_UNWIND=1
make_install_prefix_slash_O: make install prefix=/tmp/krava/
make_help_O: make help
make_no_backtrace_O: make NO_BACKTRACE=1
- /home/acme/git/linux/tools/pD_TEST_FEATURE_DUMP_STATIC: cd . && make FEATURE_DUMP_COPY=/home/acme/git/linux/tools/perf/BUILD_TEST_FEATURE_DUMP_STATIC LDFLAGS='-static' feature-dump
cd . && make FEATURE_DUMP_COPYcme/git/linux/tools/perf/BUILD_TEST_FEATURE_DUMP_STATIC LDFLAGS='-static' feature-dump
make_static_O: make LDFLAGS=-static
make_doc_O: make doc
make_no_auxtrace_O: make NO_AUXTRACE=1
make_no_ui_O: make NO_NEWT=1 NO_SLANG=1 NO_GTK2=1
make_tags_O: make tags
make_with_babeltrace_O: make LIBBABELTRACE=1
make_minimal_O: make NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 NO_LIBCRYPTO=1 NO_SDT=1
make_install_O: make install
make_no_libunwind_O: make NO_LIBUNWIND=1
make_no_libbpf_O: make NO_LIBBPF=1
make_no_libaudit_O: make NO_LIBAUDIT=1
OK
make: Leaving directory '/home/acme/git/linux/tools/perf'
[acme@jouet linux]$

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:09 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Normally we limit the main list to contain only entries with HITM %
value > 0.0005, but it might be useful to display all captured entries.
Adding --show-all option for that.

Requested-and-Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-nokgjdwikb...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/Documentation/perf-c2c.txt | 3 +++
tools/perf/builtin-c2c.c | 7 +++++--
2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index 33ed4564a8c0..21810d711f5f 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -97,6 +97,9 @@ REPORT OPTIONS
--no-source::
Do not display Source:Line column.

+--show-all::
+ Show all captured HITM lines, with no regard to HITM % 0.0005 limit.
+
C2C RECORD
----------
The perf c2c record command setup options related to HITM cacheline analysis
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 6656fcbe8d85..dc4f0636dfa1 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -71,6 +71,7 @@ struct perf_c2c {
int node_info;

bool show_src;
+ bool show_all;
bool use_stdio;
bool stats_only;
bool symbol_full;
@@ -1773,8 +1774,8 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
struct c2c_hist_entry *c2c_he;
double ld_dist;

- /* XXX Disabled for now, till we get a command line switch to control this */
- return true;
+ if (c2c.show_all)
+ return true;

c2c_he = container_of(he, struct c2c_hist_entry, he);

@@ -2513,6 +2514,8 @@ static int perf_c2c__report(int argc, const char **argv)
"Display full length of symbols"),
OPT_BOOLEAN(0, "no-source", &no_source,
"Do not display Source Line column"),
+ OPT_BOOLEAN(0, "show-all", &c2c.show_all,
+ "Show all captured HITM lines."),

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:09 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

It displays cacheline address as hex number.

Using c2c wrapper to standard 'dcacheline' object to defined own header
and simple (just address) cacheline output.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-21-...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 37 +++++++++++++++++++++++++++++++++++++
1 file changed, 37 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 78addc42c9e5..3a3e67f6e772 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1,5 +1,6 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/stringify.h>
#include "util.h"
#include "debug.h"
#include "builtin.h"
@@ -7,6 +8,7 @@
#include "mem-events.h"
#include "session.h"
#include "hist.h"
+#include "sort.h"
#include "tool.h"
#include "data.h"
#include "sort.h"
@@ -273,6 +275,32 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
return scnprintf(hpp->buf, hpp->size, "%*s", width, text);
}

+#define HEX_STR(__s, __v) \
+({ \
+ scnprintf(__s, sizeof(__s), "0x%" PRIx64, __v); \
+ __s; \
+})
+
+static int64_t
+dcacheline_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return sort__dcacheline_cmp(left, right);
+}
+
+static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ uint64_t addr = 0;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[20];
+
+ if (he->mem_info)
+ addr = cl_address(he->mem_info->daddr.addr);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -308,7 +336,16 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
}, \
}

+static struct c2c_dimension dim_dcacheline = {
+ .header = HEADER_LOW("Cacheline"),
+ .name = "dcacheline",
+ .cmp = dcacheline_cmp,
+ .entry = dcacheline_entry,
+ .width = 18,
+};
+
static struct c2c_dimension *dimensions[] = {
+ &dim_dcacheline,
NULL,
};

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:09 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

It is to be displayed in the single cacheline output:

tid

It's a wrapper for global sort_thread sort entry with c2c specific
header.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-fr0socae5s...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 7 +++++++
1 file changed, 7 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 4795713ad211..77810a30a4cc 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1098,6 +1098,12 @@ static struct c2c_dimension dim_pid = {
.width = 7,
};

+static struct c2c_dimension dim_tid = {
+ .header = HEADER_LOW("Tid"),
+ .name = "tid",
+ .se = &sort_thread,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -1128,6 +1134,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_dram_lcl,
&dim_dram_rmt,
&dim_pid,
+ &dim_tid,
NULL,
};

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:11 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Currently we sort and limit displayed data based on the remote HITMs
count. Adding support to switch to local HITMs via --display option:

--display ... lcl,rmt

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-inykbom2f1...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 114 +++++++++++++++++++++++++++++++++++++++--------
1 file changed, 95 insertions(+), 19 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 0bae8c965554..aad450861045 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -63,6 +63,13 @@ struct perf_c2c {
/* HITM shared clines stats */
struct c2c_stats hitm_stats;
int shared_clines;
+
+ int display;
+};
+
+enum {
+ DISPLAY_LCL,
+ DISPLAY_RMT,
};

static struct perf_c2c c2c;
@@ -684,15 +691,24 @@ static double percent_hitm(struct c2c_hist_entry *c2c_he)
struct c2c_hists *hists;
struct c2c_stats *stats;
struct c2c_stats *total;
- int tot, st;
+ int tot = 0, st = 0;
double p;

hists = container_of(c2c_he->he.hists, struct c2c_hists, hists);
stats = &c2c_he->stats;
total = &hists->stats;

- st = stats->rmt_hitm;
- tot = total->rmt_hitm;
+ switch (c2c.display) {
+ case DISPLAY_RMT:
+ st = stats->rmt_hitm;
+ tot = total->rmt_hitm;
+ break;
+ case DISPLAY_LCL:
+ st = stats->lcl_hitm;
+ tot = total->lcl_hitm;
+ default:
+ break;
+ }

p = tot ? (double) st / tot : 0;

@@ -975,14 +991,26 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num);
advance_hpp(hpp, ret);

+ #define DISPLAY_HITM(__h) \
+ if (c2c_he->stats.__h> 0) { \
+ ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", \
+ percent(stats->__h, c2c_he->stats.__h));\
+ } else { \
+ ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); \
+ }

- if (c2c_he->stats.rmt_hitm > 0) {
- ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ",
- percent(stats->rmt_hitm, c2c_he->stats.rmt_hitm));
- } else {
- ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a");
+ switch (c2c.display) {
+ case DISPLAY_RMT:
+ DISPLAY_HITM(rmt_hitm);
+ break;
+ case DISPLAY_LCL:
+ DISPLAY_HITM(lcl_hitm);
+ default:
+ break;
}

+ #undef DISPLAY_HITM
+
advance_hpp(hpp, ret);

if (c2c_he->stats.store > 0) {
@@ -1258,8 +1286,12 @@ static struct c2c_dimension dim_tot_loads = {
.width = 7,
};

+static struct c2c_header percent_hitm_header[] = {
+ [DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"),
+ [DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"),
+};
+
static struct c2c_dimension dim_percent_hitm = {
- .header = HEADER_LOW("%hitm"),
.name = "percent_hitm",
.cmp = percent_hitm_cmp,
.entry = percent_hitm_entry,
@@ -1654,23 +1686,39 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats)

c2c_he = container_of(he, struct c2c_hist_entry, he);

- if (stats->rmt_hitm) {
- ld_dist = ((double)c2c_he->stats.rmt_hitm / stats->rmt_hitm);
- if (ld_dist < DISPLAY_LINE_LIMIT)
- he->filtered = HIST_FILTER__C2C;
- } else {
- he->filtered = HIST_FILTER__C2C;
+#define FILTER_HITM(__h) \
+ if (stats->__h) { \
+ ld_dist = ((double)c2c_he->stats.__h / stats->__h); \
+ if (ld_dist < DISPLAY_LINE_LIMIT) \
+ he->filtered = HIST_FILTER__C2C; \
+ } else { \
+ he->filtered = HIST_FILTER__C2C; \
}

+ switch (c2c.display) {
+ case DISPLAY_LCL:
+ FILTER_HITM(lcl_hitm);
+ break;
+ case DISPLAY_RMT:
+ FILTER_HITM(rmt_hitm);
+ default:
+ break;
+ };
+
+#undef FILTER_HITM
+
return he->filtered == 0;
}

static inline int valid_hitm_or_store(struct hist_entry *he)
{
struct c2c_hist_entry *c2c_he;
+ bool has_hitm;

c2c_he = container_of(he, struct c2c_hist_entry, he);
- return c2c_he->stats.rmt_hitm || c2c_he->stats.store;
+ has_hitm = c2c.display == DISPLAY_LCL ?
+ c2c_he->stats.lcl_hitm : c2c_he->stats.rmt_hitm;
+ return has_hitm || c2c_he->stats.store;
}

static int filter_cb(struct hist_entry *he)
@@ -1951,6 +1999,8 @@ static void print_c2c_info(FILE *out, struct perf_session *session)
perf_evsel__name(evsel));
first = false;
}
+ fprintf(out, " Cachelines sort on : %s HITMs\n",
+ c2c.display == DISPLAY_LCL ? "Local" : "Remote");
}

static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session)
@@ -2083,8 +2133,10 @@ static int perf_c2c_browser__title(struct hist_browser *browser,
char *bf, size_t size)
{
scnprintf(bf, size,
- "Shared Data Cache Line Table "
- "(%lu entries)", browser->nr_non_filtered_entries);
+ "Shared Data Cache Line Table "
+ "(%lu entries, sorted on %s HITMs)",
+ browser->nr_non_filtered_entries,
+ c2c.display == DISPLAY_LCL ? "local" : "remote");
return 0;
}

@@ -2156,6 +2208,8 @@ static void ui_quirks(void)
dim_offset.width = 5;
dim_offset.header = header_offset_tui;
}
+
+ dim_percent_hitm.header = percent_hitm_header[c2c.display];
}

#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
@@ -2210,6 +2264,22 @@ static int setup_callchain(struct perf_evlist *evlist)
return 0;
}

+static int setup_display(const char *str)
+{
+ const char *display = str ?: "rmt";
+
+ if (!strcmp(display, "rmt"))
+ c2c.display = DISPLAY_RMT;
+ else if (!strcmp(display, "lcl"))
+ c2c.display = DISPLAY_LCL;
+ else {
+ pr_err("failed: unknown display type: %s\n", str);
+ return -1;
+ }
+
+ return 0;
+}
+
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
@@ -2218,6 +2288,7 @@ static int perf_c2c__report(int argc, const char **argv)
.mode = PERF_DATA_MODE_READ,
};
char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
+ const char *display = NULL;
const struct option c2c_options[] = {
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
@@ -2236,6 +2307,7 @@ static int perf_c2c__report(int argc, const char **argv)
"print_type,threshold[,print_limit],order,sort_key[,branch],value",
callchain_help, &parse_callchain_opt,
callchain_default_opt),
+ OPT_STRING('d', "display", &display, NULL, "lcl,rmt"),
OPT_END()
};
int err = 0;
@@ -2260,6 +2332,10 @@ static int perf_c2c__report(int argc, const char **argv)

file.path = input_name;

+ err = setup_display(display);
+ if (err)
+ goto out;
+
err = c2c_hists__init(&c2c.hists, "dcacheline", 2);
if (err) {
pr_debug("Failed to initialize hists\n");
@@ -2307,7 +2383,7 @@ static int perf_c2c__report(int argc, const char **argv)
"tot_loads,"
"ld_fbhit,ld_l1hit,ld_l2hit,"
"ld_lclhit,ld_rmthit",
- "rmt_hitm"
+ c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm"
);

ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:13 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Display name of feature instead of just the number
during recording data.

Before:
failed to write feature 13

Now:
failed to write feature HEADER_CPU_TOPOLOGY

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Adrian Hunter <adrian...@intel.com>
Cc: David Ahern <dsa...@gmail.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Wang Nan <wang...@huawei.com>
Link: http://lkml.kernel.org/n/tip-k9d9trozi5...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/header.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 59b67aefa8b2..43ded20f1edf 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2290,7 +2290,7 @@ static int do_write_feat(int fd, struct perf_header *h, int type,

err = feat_ops[type].write(fd, h, evlist);
if (err < 0) {
- pr_debug("failed to write feature %d\n", type);
+ pr_debug("failed to write feature %s\n", feat_ops[type].name);

/* undo anything written */
lseek(fd, (*p)->offset, SEEK_SET);
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:14 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Display c2c related configuration options/setup.
So far it's output of monitored events:

$ perf c2c report --stats
...

=================================================
c2c details
=================================================
Events : cpu/mem-loads,ldlat=50/pp
: cpu/mem-stores/pp

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-ypz84f3a9f...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 33 +++++++++++++++++++++++++++------
1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 7bbb969f284a..c36d1dc668ac 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -13,6 +13,8 @@
#include "tool.h"
#include "data.h"
#include "sort.h"
+#include "evlist.h"
+#include "evsel.h"
#include <asm/bug.h>
#include "ui/browsers/hists.h"

@@ -1893,13 +1895,32 @@ static void print_pareto(FILE *out)
}
}

-static void perf_c2c__hists_fprintf(FILE *out)
+static void print_c2c_info(FILE *out, struct perf_session *session)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_evsel *evsel;
+ bool first = true;
+
+ fprintf(out, "=================================================\n");
+ fprintf(out, " c2c details \n");
+ fprintf(out, "=================================================\n");
+
+ evlist__for_each_entry(evlist, evsel) {
+ fprintf(out, "%-36s: %s\n", first ? " Events" : "",
+ perf_evsel__name(evsel));
+ first = false;
+ }
+}
+
+static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session)
{
setup_pager();

print_c2c__display_stats(out);
fprintf(out, "\n");
print_shared_cacheline_info(out);
+ fprintf(out, "\n");
+ print_c2c_info(out, session);

if (c2c.stats_only)
return;
@@ -2073,18 +2094,18 @@ out:
return 0;
}

-static void perf_c2c_display(void)
+static void perf_c2c_display(struct perf_session *session)
{
if (c2c.use_stdio)
- perf_c2c__hists_fprintf(stdout);
+ perf_c2c__hists_fprintf(stdout, session);
else
perf_c2c__hists_browse(&c2c.hists.hists);
}
#else
-static void perf_c2c_display(void)
+static void perf_c2c_display(struct perf_session *session)
{
use_browser = 0;
- perf_c2c__hists_fprintf(stdout);
+ perf_c2c__hists_fprintf(stdout, session);
}
#endif /* HAVE_SLANG_SUPPORT */

@@ -2197,7 +2218,7 @@ static int perf_c2c__report(int argc, const char **argv)

ui_quirks();

- perf_c2c_display();
+ perf_c2c_display(session);

out_session:
perf_session__delete(session);
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:15 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Allowing user to configure the way the single cacheline
data are sorted after being sorted by offset.

Adding 'c' option to specify sorting fields for single cacheline:

-c, --coalesce <coalesce fields>
coalesce fields: pid,tid,iaddr,dso

It's allowed to use following combination of fields:
pid - process pid
tid - process tid
iaddr - code address
dso - shared object

Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-aka8z31umx...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 119 ++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 102 insertions(+), 17 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index aad450861045..2f3e430ddde0 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -46,6 +46,8 @@ struct c2c_hist_entry {
struct hist_entry he;
};

+static char const *coalesce_default = "pid,tid,iaddr";
+
struct perf_c2c {
struct perf_tool tool;
struct c2c_hists hists;
@@ -65,6 +67,11 @@ struct perf_c2c {
int shared_clines;

int display;
+
+ const char *coalesce;
+ char *cl_sort;
+ char *cl_resort;
+ char *cl_output;
};

enum {
@@ -239,7 +246,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (!mi_dup)
goto free_mi;

- c2c_hists = he__get_c2c_hists(he, "offset", 2);
+ c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2);
if (!c2c_hists)
goto free_mi_dup;

@@ -1742,22 +1749,7 @@ static int resort_cl_cb(struct hist_entry *he)
c2c_hists = c2c_he->hists;

if (display && c2c_hists) {
- c2c_hists__reinit(c2c_hists,
- "percent_rmt_hitm,"
- "percent_lcl_hitm,"
- "percent_stores_l1hit,"
- "percent_stores_l1miss,"
- "offset,"
- "pid,"
- "tid,"
- "mean_rmt,"
- "mean_lcl,"
- "mean_load,"
- "cpucnt,"
- "symbol,"
- "dso,"
- "node",
- "offset,rmt_hitm,lcl_hitm");
+ c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort);

hists__collapse_resort(&c2c_hists->hists, NULL);
hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb);
@@ -2001,6 +1993,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session)
}
fprintf(out, " Cachelines sort on : %s HITMs\n",
c2c.display == DISPLAY_LCL ? "Local" : "Remote");
+ fprintf(out, " Cacheline data grouping : %s\n", c2c.cl_sort);
}

static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session)
@@ -2280,6 +2273,89 @@ static int setup_display(const char *str)
return 0;
}

+#define for_each_token(__tok, __buf, __sep, __tmp) \
+ for (__tok = strtok_r(__buf, __sep, &__tmp); __tok; \
+ __tok = strtok_r(NULL, __sep, &__tmp))
+
+static int build_cl_output(char *cl_sort)
+{
+ char *tok, *tmp, *buf = strdup(cl_sort);
+ bool add_pid = false;
+ bool add_tid = false;
+ bool add_iaddr = false;
+ bool add_sym = false;
+ bool add_dso = false;
+ bool add_src = false;
+
+ if (!buf)
+ return -ENOMEM;
+
+ for_each_token(tok, buf, ",", tmp) {
+ if (!strcmp(tok, "tid")) {
+ add_tid = true;
+ } else if (!strcmp(tok, "pid")) {
+ add_pid = true;
+ } else if (!strcmp(tok, "iaddr")) {
+ add_iaddr = true;
+ add_sym = true;
+ add_dso = true;
+ add_src = true;
+ } else if (!strcmp(tok, "dso")) {
+ add_dso = true;
+ } else if (strcmp(tok, "offset")) {
+ pr_err("unrecognized sort token: %s\n", tok);
+ return -EINVAL;
+ }
+ }
+
+ if (asprintf(&c2c.cl_output,
+ "%s%s%s%s%s%s%s%s%s",
+ "percent_rmt_hitm,"
+ "percent_lcl_hitm,"
+ "percent_stores_l1hit,"
+ "percent_stores_l1miss,"
+ "offset,",
+ add_pid ? "pid," : "",
+ add_tid ? "tid," : "",
+ add_iaddr ? "iaddr," : "",
+ "mean_rmt,"
+ "mean_lcl,"
+ "mean_load,"
+ "cpucnt,",
+ add_sym ? "symbol," : "",
+ add_dso ? "dso," : "",
+ add_src ? "cl_srcline," : "",
+ "node") < 0)
+ return -ENOMEM;
+
+ c2c.show_src = add_src;
+
+ free(buf);
+ return 0;
+}
+
+static int setup_coalesce(const char *coalesce)
+{
+ const char *c = coalesce ?: coalesce_default;
+
+ if (asprintf(&c2c.cl_sort, "offset,%s", c) < 0)
+ return -ENOMEM;
+
+ if (build_cl_output(c2c.cl_sort))
+ return -1;
+
+ if (asprintf(&c2c.cl_resort, "offset,%s",
+ c2c.display == DISPLAY_RMT ?
+ "rmt_hitm,lcl_hitm" :
+ "lcl_hitm,rmt_hitm") < 0)
+ return -ENOMEM;
+
+ pr_debug("coalesce sort fields: %s\n", c2c.cl_sort);
+ pr_debug("coalesce resort fields: %s\n", c2c.cl_resort);
+ pr_debug("coalesce output fields: %s\n", c2c.cl_output);
+ return 0;
+}
+
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
@@ -2289,6 +2365,7 @@ static int perf_c2c__report(int argc, const char **argv)
};
char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
const char *display = NULL;
+ const char *coalesce = NULL;
const struct option c2c_options[] = {
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
@@ -2308,6 +2385,8 @@ static int perf_c2c__report(int argc, const char **argv)
callchain_help, &parse_callchain_opt,
callchain_default_opt),
OPT_STRING('d', "display", &display, NULL, "lcl,rmt"),
+ OPT_STRING('c', "coalesce", &coalesce, "coalesce fields",
+ "coalesce fields: pid,tid,iaddr,dso"),
OPT_END()
};
int err = 0;
@@ -2336,6 +2415,12 @@ static int perf_c2c__report(int argc, const char **argv)
if (err)
goto out;

+ err = setup_coalesce(coalesce);
+ if (err) {
+ pr_debug("Failed to initialize hists\n");
+ goto out;
+ }
+
err = c2c_hists__init(&c2c.hists, "dcacheline", 2);
if (err) {
pr_debug("Failed to initialize hists\n");
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:15 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

It's convenient to have an index for each cacheline to help discussions
about results over the phone.

Add new 'Index' and 'Num' fields in main and single cacheline tables.

$ perf c2c report
=================================================
Shared Data Cache Line Table
=================================================
#
# Total Lcl ----- LLC Load Hitm -----
# Index Cacheline records Hitm Total Lcl Rmt ...
# ..... .................. ....... ....... ....... ....... .......
#
0 0xffff880036233b40 1 11.11% 1 1 0
1 0xffff88009ccb2900 1 11.11% 1 1 0
2 0xffff8800b5b3bc40 7 11.11% 1 1 0
...

=================================================
Shared Cache Line Distribution Pareto
=================================================
#
# ----- HITM ----- -- Store Refs -- Data address
# Num Rmt Lcl L1 Hit L1 Miss Offset Pid ...
# ..... ....... ....... ....... ....... .................. .......
#
-------------------------------------------------------------
0 0 1 0 0 0xffff880036233b40
-------------------------------------------------------------
0.00% 100.00% 0.00% 0.00% 0x30 0

-------------------------------------------------------------
1 0 1 0 0 0xffff88009ccb2900
-------------------------------------------------------------
0.00% 100.00% 0.00% 0.00% 0x28 549
...

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-4dhfagaz57...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 64 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 61 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 473dae1baed8..430360e618b0 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -36,6 +36,7 @@ struct c2c_hist_entry {
struct c2c_stats stats;
unsigned long *cpuset;
struct c2c_stats *node_stats;
+ unsigned int cacheline_idx;

struct compute_stats cstats;

@@ -1088,6 +1089,29 @@ cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
}

+static int
+cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[10];
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+ scnprintf(buf, 10, "%u", c2c_he->cacheline_idx);
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+static int
+cl_idx_empty_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, "");
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -1433,6 +1457,30 @@ static struct c2c_dimension dim_srcline = {
.se = &sort_srcline,
};

+static struct c2c_dimension dim_dcacheline_idx = {
+ .header = HEADER_LOW("Index"),
+ .name = "cl_idx",
+ .cmp = empty_cmp,
+ .entry = cl_idx_entry,
+ .width = 5,
+};
+
+static struct c2c_dimension dim_dcacheline_num = {
+ .header = HEADER_LOW("Num"),
+ .name = "cl_num",
+ .cmp = empty_cmp,
+ .entry = cl_idx_entry,
+ .width = 5,
+};
+
+static struct c2c_dimension dim_dcacheline_num_empty = {
+ .header = HEADER_LOW("Num"),
+ .name = "cl_num_empty",
+ .cmp = empty_cmp,
+ .entry = cl_idx_empty_entry,
+ .width = 5,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_offset,
@@ -1472,6 +1520,9 @@ static struct c2c_dimension *dimensions[] = {
&dim_mean_load,
&dim_cpucnt,
&dim_srcline,
+ &dim_dcacheline_idx,
+ &dim_dcacheline_num,
+ &dim_dcacheline_num_empty,
NULL,
};

@@ -1761,6 +1812,10 @@ static int resort_cl_cb(struct hist_entry *he)
calc_width(he);

if (display && c2c_hists) {
+ static unsigned int idx;
+
+ c2c_he->cacheline_idx = idx++;
+
c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort);

hists__collapse_resort(&c2c_hists->hists, NULL);
@@ -1948,10 +2003,10 @@ static void print_cacheline(struct c2c_hists *c2c_hists,
fprintf(out, "\n");
}

- fprintf(out, " ------------------------------------------------------\n");
+ fprintf(out, " -------------------------------------------------------------\n");
__hist_entry__snprintf(he_cl, &hpp, hpp_list);
fprintf(out, "%s\n", bf);
- fprintf(out, " ------------------------------------------------------\n");
+ fprintf(out, " -------------------------------------------------------------\n");

hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, true);
}
@@ -1964,6 +2019,7 @@ static void print_pareto(FILE *out)

perf_hpp_list__init(&hpp_list);
ret = hpp_list__parse(&hpp_list,
+ "cl_num,"
"cl_rmt_hitm,"
"cl_lcl_hitm,"
"cl_stores_l1hit,"
@@ -2321,7 +2377,8 @@ static int build_cl_output(char *cl_sort)
}

if (asprintf(&c2c.cl_output,
- "%s%s%s%s%s%s%s%s%s",
+ "%s%s%s%s%s%s%s%s%s%s",
+ c2c.use_stdio ? "cl_num_empty," : "",
"percent_rmt_hitm,"
"percent_lcl_hitm,"
"percent_stores_l1hit,"
@@ -2470,6 +2527,7 @@ static int perf_c2c__report(int argc, const char **argv)
}

c2c_hists__reinit(&c2c.hists,
+ "cl_idx,"
"dcacheline,"
"tot_recs,"
"percent_hitm,"
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:16 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Adding TUI support to switch between Node entry versions
in real time with 'n' key.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-xqbw4h4dxi...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 00d1620dc2bf..27eef205a774 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2207,6 +2207,10 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
case 's':
c2c.symbol_full = !c2c.symbol_full;
break;
+ case 'n':
+ c2c.node_info = (c2c.node_info + 1) % 3;
+ setup_nodes_header();
+ break;
case 'q':
goto out;
default:
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:16 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Using resort callbacks to compute the columns' width.

Computing only the global ones, c2c entries have fixed width only.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-zyayvq2u3d...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 12 ++++++++++++
1 file changed, 12 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 2f3e430ddde0..473dae1baed8 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1728,11 +1728,21 @@ static inline int valid_hitm_or_store(struct hist_entry *he)
return has_hitm || c2c_he->stats.store;
}

+static void calc_width(struct hist_entry *he)
+{
+ struct c2c_hists *c2c_hists;
+
+ c2c_hists = container_of(he->hists, struct c2c_hists, hists);
+ hists__calc_col_len(&c2c_hists->hists, he);
+}
+
static int filter_cb(struct hist_entry *he)
{
if (c2c.show_src && !he->srcline)
he->srcline = hist_entry__get_srcline(he);

+ calc_width(he);
+
if (!valid_hitm_or_store(he))
he->filtered = HIST_FILTER__C2C;

@@ -1748,6 +1758,8 @@ static int resort_cl_cb(struct hist_entry *he)
c2c_he = container_of(he, struct c2c_hist_entry, he);
c2c_hists = c2c_he->hists;

+ calc_width(he);
+
if (display && c2c_hists) {
c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort);

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:16 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Add man page for c2c command and credits to builtin-c2c.c file.

Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-twbp391v8v...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/Documentation/perf-c2c.txt | 276 ++++++++++++++++++++++++++++++++++
tools/perf/builtin-c2c.c | 11 ++
2 files changed, 287 insertions(+)
create mode 100644 tools/perf/Documentation/perf-c2c.txt

diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
new file mode 100644
index 000000000000..ba2f4de399c3
--- /dev/null
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -0,0 +1,276 @@
+perf-c2c(1)
+===========
+
+NAME
+----
+perf-c2c - Shared Data C2C/HITM Analyzer.
+
+SYNOPSIS
+--------
+[verse]
+'perf c2c record' [<options>] <command>
+'perf c2c record' [<options>] -- [<record command options>] <command>
+'perf c2c report' [<options>]
+
+DESCRIPTION
+-----------
+C2C stands for Cache To Cache.
+
+The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows
+you to track down the cacheline contentions.
+
+The tool is based on x86's load latency and precise store facility events
+provided by Intel CPUs. These events provide:
+ - memory address of the access
+ - type of the access (load and store details)
+ - latency (in cycles) of the load access
+
+The c2c tool provide means to record this data and report back access details
+for cachelines with highest contention - highest number of HITM accesses.
+
+The basic workflow with this tool follows the standard record/report phase.
+User uses the record command to record events data and report command to
+display it.
+
+
+RECORD OPTIONS
+--------------
+-e::
+--event=::
+ Select the PMU event. Use 'perf mem record -e list'
+ to list available events.
+
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
+
+-l::
+--ldlat::
+ Configure mem-loads latency.
+
+-k::
+--all-kernel::
+ Configure all used events to run in kernel space.
+
+-u::
+--all-user::
+ Configure all used events to run in user space.
+
+REPORT OPTIONS
+--------------
+-k::
+--vmlinux=<file>::
+ vmlinux pathname
+
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
+
+-i::
+--input::
+ Specify the input file to process.
+
+-N::
+--node-info::
+ Show extra node info in report (see NODE INFO section)
+
+-c::
+--coalesce::
+ Specify sorintg fields for single cacheline display.
+ Following fields are available: tid,pid,iaddr,dso
+ (see COALESCE)
+
+-g::
+--call-graph::
+ Setup callchains parameters.
+ Please refer to perf-report man page for details.
+
+--stdio::
+ Force the stdio output (see STDIO OUTPUT)
+
+--stats::
+ Display only statistic tables and force stdio mode.
+
+--full-symbols::
+ Display full length of symbols.
+
+C2C RECORD
+----------
+The perf c2c record command setup options related to HITM cacheline analysis
+and calls standard perf record command.
+
+Following perf record options are configured by default:
+(check perf record man page for details)
+
+ -W,-d,--sample-cpu
+
+Unless specified otherwise with '-e' option, following events are monitored by
+default:
+
+ cpu/mem-loads,ldlat=30/P
+ cpu/mem-stores/P
+
+User can pass any 'perf record' option behind '--' mark, like (to enable
+callchains and system wide monitoring):
+
+ $ perf c2c record -- -g -a
+
+Please check RECORD OPTIONS section for specific c2c record options.
+
+C2C REPORT
+----------
+The perf c2c report command displays shared data analysis. It comes in two
+display modes: stdio and tui (default).
+
+The report command workflow is following:
+ - sort all the data based on the cacheline address
+ - store access details for each cacheline
+ - sort all cachelines based on user settings
+ - display data
+
+In general perf report output consist of 2 basic views:
+ 1) most expensive cachelines list
+ 2) offsets details for each cacheline
+
+For each cacheline in the 1) list we display following data:
+(Both stdio and TUI modes follow the same fields output)
+
+ Index
+ - zero based index to identify the cacheline
+
+ Cacheline
+ - cacheline address (hex number)
+
+ Total records
+ - sum of all cachelines accesses
+
+ Rmt/Lcl Hitm
+ - cacheline percentage of all Remote/Local HITM accesses
+
+ LLC Load Hitm - Total, Lcl, Rmt
+ - count of Total/Local/Remote load HITMs
+
+ Store Reference - Total, L1Hit, L1Miss
+ Total - all store accesses
+ L1Hit - store accesses that hit L1
+ L1Hit - store accesses that missed L1
+
+ Load Dram
+ - count of local and remote DRAM accesses
+
+ LLC Ld Miss
+ - count of all accesses that missed LLC
+
+ Total Loads
+ - sum of all load accesses
+
+ Core Load Hit - FB, L1, L2
+ - count of load hits in FB (Fill Buffer), L1 and L2 cache
+
+ LLC Load Hit - Llc, Rmt
+ - count of LLC and Remote load hits
+
+For each offset in the 2) list we display following data:
+
+ HITM - Rmt, Lcl
+ - % of Remote/Local HITM accesses for given offset within cacheline
+
+ Store Refs - L1 Hit, L1 Miss
+ - % of store accesses that hit/missed L1 for given offset within cacheline
+
+ Data address - Offset
+ - offset address
+
+ Pid
+ - pid of the process responsible for the accesses
+
+ Tid
+ - tid of the process responsible for the accesses
+
+ Code address
+ - code address responsible for the accesses
+
+ cycles - rmt hitm, lcl hitm, load
+ - sum of cycles for given accesses - Remote/Local HITM and generic load
+
+ cpu cnt
+ - number of cpus that participated on the access
+
+ Symbol
+ - code symbol related to the 'Code address' value
+
+ Shared Object
+ - shared object name related to the 'Code address' value
+
+ Source:Line
+ - source information related to the 'Code address' value
+
+ Node
+ - nodes participating on the access (see NODE INFO section)
+
+NODE INFO
+---------
+The 'Node' field displays nodes that accesses given cacheline
+offset. Its output comes in 3 flavors:
+ - node IDs separated by ','
+ - node IDs with stats for each ID, in following format:
+ Node{cpus %hitms %stores}
+ - node IDs with list of affected CPUs in following format:
+ Node{cpu list}
+
+User can switch between above flavors with -N option or
+use 'n' key to interactively switch in TUI mode.
+
+COALESCE
+--------
+User can specify how to sort offsets for cacheline.
+
+Following fields are available and governs the final
+output fields set for caheline offsets output:
+
+ tid - coalesced by process TIDs
+ pid - coalesced by process PIDs
+ iaddr - coalesced by code address, following fields are displayed:
+ Code address, Code symbol, Shared Object, Source line
+ dso - coalesced by shared object
+
+By default the coalescing is setup with 'pid,tid,iaddr'.
+
+STDIO OUTPUT
+------------
+The stdio output displays data on standard output.
+
+Following tables are displayed:
+ Trace Event Information
+ - overall statistics of memory accesses
+
+ Global Shared Cache Line Event Information
+ - overall statistics on shared cachelines
+
+ Shared Data Cache Line Table
+ - list of most expensive cachelines
+
+ Shared Cache Line Distribution Pareto
+ - list of all accessed offsets for each cacheline
+
+TUI OUTPUT
+----------
+The TUI output provides interactive interface to navigate
+through cachelines list and to display offset details.
+
+For details please refer to the help window by pressing '?' key.
+
+CREDITS
+-------
+Although Don Zickus, Dick Fowles and Joe Mario worked together
+to get this implemented, we got lots of early help from Arnaldo
+Carvalho de Melo, Stephane Eranian, Jiri Olsa and Andi Kleen.
+
+C2C BLOG
+--------
+Check Joe's blog on c2c tool for detailed use case explanation:
+ https://joemario.github.io/blog/2016/09/01/c2c-blog/
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-mem[1]
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 3fc18264d4d1..73973da00b31 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1,3 +1,14 @@
+/*
+ * This is rewrite of original c2c tool introduced in here:
+ * http://lwn.net/Articles/588866/
+ *
+ * The original tool was changed to fit in current perf state.
+ *
+ * Original authors:
+ * Don Zickus <dzi...@redhat.com>
+ * Dick Fowles <fow...@inreach.com>
+ * Joe Mario <jma...@redhat.com>
+ */
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/stringify.h>
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:16 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Display missing features in header info, like:

$ perf report --header-only
# ========
# captured on: Mon Oct 10 09:39:47 2016
...
# missing features: HEADER_TRACING_DATA HEADER_CPU_TOPOLOGY ...

To help in diagnosing problems.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Adrian Hunter <adrian...@intel.com>
Cc: David Ahern <dsa...@gmail.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Wang Nan <wang...@huawei.com>
Link: http://lkml.kernel.org/n/tip-bh5gp84gob...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/util/header.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b227dfab56c3..59b67aefa8b2 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2251,7 +2251,7 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
struct perf_header *header = &session->header;
int fd = perf_data_file__fd(session->file);
struct stat st;
- int ret;
+ int ret, bit;

hd.fp = fp;
hd.full = full;
@@ -2264,6 +2264,14 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)

perf_header__process_sections(header, fd, &hd,
perf_file_section__fprintf_info);
+
+ fprintf(fp, "# missing features: ");
+ for_each_clear_bit(bit, header->adds_features, HEADER_LAST_FEATURE) {
+ if (bit)
+ fprintf(fp, "%s ", feat_ops[bit].name);
+ }
+
+ fprintf(fp, "\n");
return 0;
}

--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:16 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Add --call-graph option to properly setup callchain code. Adding default
settings to display callchains whenever they are stored in the
perf.data.

Committer Notes:

Testing it:

[root@jouet ~]# perf c2c record -a -g sleep 5
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 5.331 MB perf.data (4263 samples) ]
[root@jouet ~]# perf evlist -v
cpu/mem-loads,ldlat=30/P: type: 4, size: 112, config: 0x1cd, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CALLCHAIN|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, { bp_addr, config1 }: 0x1f
cpu/mem-stores/P: type: 4, size: 112, config: 0x82d0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CALLCHAIN|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
[root@jouet ~]# perf c2c report --stats
=================================================
Trace Event Information
=================================================
Total records : 4263
Locked Load/Store Operations : 220
Load Operations : 2130
Loads - uncacheable : 1
Loads - IO : 7
Loads - Miss : 86
Loads - no mapping : 5
Load Fill Buffer Hit : 609
Load L1D hit : 612
=================================================
Trace Event Information
=================================================
Total records : 4263
Locked Load/Store Operations : 220
Load Operations : 2130
Loads - uncacheable : 1
Loads - IO : 7
Loads - Miss : 86
Loads - no mapping : 5
Load Fill Buffer Hit : 609
Load L1D hit : 612
Load L2D hit : 27
Load LLC hit : 607
Load Local HITM : 15
Load Remote HITM : 0
Load Remote HIT : 0
Load Local DRAM : 176
Load Remote DRAM : 0
Load MESI State Exclusive : 176
Load MESI State Shared : 0
Load LLC Misses : 176
LLC Misses to Local DRAM : 100.0%
LLC Misses to Remote DRAM : 0.0%
LLC Misses to Remote cache (HIT) : 0.0%
LLC Misses to Remote cache (HITM) : 0.0%
Store Operations : 2133
Store - uncacheable : 0
Store - no mapping : 1
Store L1D Hit : 1967
Store L1D Miss : 165
No Page Map Rejects : 145
Unable to parse data source : 0

=================================================
Global Shared Cache Line Event Information
=================================================
Total Shared Cache Lines : 15
Load HITs on shared lines : 26
Fill Buffer Hits on shared lines : 7
L1D hits on shared lines : 3
L2D hits on shared lines : 0
LLC hits on shared lines : 16
Locked Access on shared lines : 2
Store HITs on shared lines : 8
Store L1D hits on shared lines : 7
Total Merged records : 23

=================================================
c2c details
=================================================
Events : cpu/mem-loads,ldlat=30/P
: cpu/mem-stores/P
[root@jouet ~]#

[root@jouet ~]# perf c2c report
Shared Data Cache Line Table (2378 entries)
Total --- LLC Load Hitm -- -- Store Reference - - Load Dram - LLC Total - Core Load Hit -
Cacheline records %hitm Total Lcl Rmt Total L1Hit L1Miss Lcl Rmt Ld Miss Loads FB L1 L2
- 0xffff880024380c00 10 0.00% 0 0 0 6 6 0 0 0 0 4 1 3 0
- 0.13% _raw_spin_lock_irqsave
- 0.07% ep_poll
sys_epoll_wait
do_syscall_64
return_from_SYSCALL_64
+ 0x103573
- 0.05% ep_poll_callback
__wake_up_common
- __wake_up_sync_key
- 0.02% pipe_read
__vfs_read
vfs_read
sys_read
do_syscall_64
return_from_SYSCALL_64
0xfdad
+ 0.02% sock_def_readable
+ 0.02% ep_scan_ready_list.constprop.12
+ 0.00% mutex_lock
+ 0.00% __wake_up_common
+ 0xffff880024380c40 1 0.00% 0 0 0 1 1 0 0 0 0 0 0 0 0
+ 0xffff880024380c80 1 0.00% 0 0 0 0 0 0 0 0 0 1 0 0 0
- 0xffff8800243e9f00 1 0.00% 0 0 0 1 1 0 0 0 0 0 0 0 0
enqueue_entity
enqueue_task_fair
activate_task
ttwu_do_activate
try_to_wake_up
wake_up_process
hrtimer_wakeup
__hrtimer_run_queues
hrtimer_interrupt
local_apic_timer_interrupt
smp_apic_timer_interrupt
apic_timer_interrupt
cpuidle_enter
call_cpuidle
help

-------------

And when presing 'd' to see the cacheline details:

Cacheline 0xffff880024380c00
----- HITM ----- -- Store Refs -- --------- cycles ----- cpu
Rmt Lcl L1 Hit L1 Miss Off Pid Tid rmt hitm lcl hitm load cnt Symbol
- 0.00% 0.00% 100.00% 0.00% 0x0 1473 1474:Chrome_ChildIOT 0 0 41 2 [k] _raw_spin_lock_irqsave [kernel]
- _raw_spin_lock_irqsave
- 51.52% ep_poll
sys_epoll_wait
do_syscall_64
return_from_SYSCALL_64
- 0x103573
47.19% 0
4.33% 0xc30bd
- 35.93% ep_poll_callback
__wake_up_common
- __wake_up_sync_key
- 18.20% pipe_read
__vfs_read
vfs_read
sys_read
do_syscall_64
return_from_SYSCALL_64
0xfdad
- 17.73% sock_def_readable
unix_stream_sendmsg
sock_sendmsg
___sys_sendmsg
__sys_sendmsg
sys_sendmsg
do_syscall_64
return_from_SYSCALL_64
__GI___libc_sendmsg
0x12c036af1fc0
0x16a4050
0x894928ec83485354
+ 12.45% ep_scan_ready_list.constprop.12
+ 0.00% 0.00% 0.00% 0.00% 0x8 1473 1474:Chrome_ChildIOT 0 0 102 1 [k] mutex_lock [kernel]
+ 0.00% 0.00% 0.00% 0.00% 0x38 1473 1473:chrome 0 0 88 1 [k] __wake_up_common [kernel]

help

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-inykbom2f1...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c36d1dc668ac..d51a6c3515cb 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -17,6 +17,7 @@
#include "evsel.h"
#include <asm/bug.h>
#include "ui/browsers/hists.h"
+#include "evlist.h"

struct c2c_hists {
struct hists hists;
@@ -183,6 +184,11 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
return -1;
}

+ ret = sample__resolve_callchain(sample, &callchain_cursor, NULL,
+ evsel, &al, sysctl_perf_event_max_stack);
+ if (ret)
+ goto out;
+
mi = sample__resolve_mem(sample, &al);
if (mi == NULL)
return -ENOMEM;
@@ -2117,6 +2123,58 @@ static void ui_quirks(void)
}
}

+#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
+
+const char callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
+ CALLCHAIN_REPORT_HELP
+ "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT;
+
+static int
+parse_callchain_opt(const struct option *opt, const char *arg, int unset)
+{
+ struct callchain_param *callchain = opt->value;
+
+ callchain->enabled = !unset;
+ /*
+ * --no-call-graph
+ */
+ if (unset) {
+ symbol_conf.use_callchain = false;
+ callchain->mode = CHAIN_NONE;
+ return 0;
+ }
+
+ return parse_callchain_report_opt(arg);
+}
+
+static int setup_callchain(struct perf_evlist *evlist)
+{
+ u64 sample_type = perf_evlist__combined_sample_type(evlist);
+ enum perf_call_graph_mode mode = CALLCHAIN_NONE;
+
+ if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+ (sample_type & PERF_SAMPLE_STACK_USER))
+ mode = CALLCHAIN_DWARF;
+ else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ mode = CALLCHAIN_LBR;
+ else if (sample_type & PERF_SAMPLE_CALLCHAIN)
+ mode = CALLCHAIN_FP;
+
+ if (!callchain_param.enabled &&
+ callchain_param.mode != CHAIN_NONE &&
+ mode != CALLCHAIN_NONE) {
+ symbol_conf.use_callchain = true;
+ if (callchain_register_param(&callchain_param) < 0) {
+ ui__error("Can't register callchain params.\n");
+ return -EINVAL;
+ }
+ }
+
+ callchain_param.record_mode = mode;
+ callchain_param.min_percent = 0;
+ return 0;
+}
+
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
@@ -2124,6 +2182,7 @@ static int perf_c2c__report(int argc, const char **argv)
struct perf_data_file file = {
.mode = PERF_DATA_MODE_READ,
};
+ char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
const struct option c2c_options[] = {
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
@@ -2138,6 +2197,10 @@ static int perf_c2c__report(int argc, const char **argv)
#endif
OPT_BOOLEAN(0, "stats", &c2c.stats_only,
"Use the stdio interface"),
+ OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
+ "print_type,threshold[,print_limit],order,sort_key[,branch],value",
+ callchain_help, &parse_callchain_opt,
+ callchain_default_opt),
OPT_END()
};
int err = 0;
@@ -2179,6 +2242,10 @@ static int perf_c2c__report(int argc, const char **argv)
goto out;
}

+ err = setup_callchain(session->evlist);
+ if (err)
+ goto out_session;

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:16 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

Add the main cachelines TUI browser. It allows to navigate through
cachelines and display their details and callchains (implemented in the
following patches).

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Andi Kleen <an...@firstfloor.org>
Cc: David Ahern <dsa...@gmail.com>
Cc: Don Zickus <dzi...@redhat.com>
Cc: Joe Mario <jma...@redhat.com>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Peter Zijlstra <a.p.zi...@chello.nl>
Link: http://lkml.kernel.org/n/tip-inykbom2f1...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/perf/builtin-c2c.c | 117 ++++++++++++++++++++++++++++++++++++++++-
tools/perf/ui/browsers/hists.c | 2 +-
tools/perf/ui/browsers/hists.h | 1 +
3 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 33db26c6ca63..c42991664703 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -14,6 +14,7 @@
#include "data.h"
#include "sort.h"
#include <asm/bug.h>
+#include "ui/browsers/hists.h"

struct c2c_hists {
struct hists hists;
@@ -53,6 +54,7 @@ struct perf_c2c {
int node_info;

bool show_src;
+ bool use_stdio;
};

static struct perf_c2c c2c;
@@ -657,6 +659,10 @@ percent_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
c2c_he = container_of(he, struct c2c_hist_entry, he);
per = get_percent(c2c_he);

+#ifdef HAVE_SLANG_SUPPORT
+ if (use_browser)
+ return __hpp__slsmg_color_printf(hpp, "%*.2f%%", width - 1, per);
+#endif
return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, per);
}

@@ -1077,6 +1083,8 @@ static struct c2c_dimension dim_dcacheline = {
.width = 18,
};

+static struct c2c_header header_offset_tui = HEADER_LOW("Off");
+
static struct c2c_dimension dim_offset = {
.header = HEADER_BOTH("Data address", "Offset"),
.name = "offset",
@@ -1803,6 +1811,100 @@ static void perf_c2c__hists_fprintf(FILE *out)
print_pareto(out);
}

+#ifdef HAVE_SLANG_SUPPORT
+static void c2c_browser__update_nr_entries(struct hist_browser *hb)
+{
+ u64 nr_entries = 0;
+ struct rb_node *nd = rb_first(&hb->hists->entries);
+
+ do {
+ struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
+
+ if (!he->filtered)
+ nr_entries++;
+
+ nd = rb_next(nd);
+ } while (nd);
+
+ hb->nr_non_filtered_entries = nr_entries;
+}
+
+static int perf_c2c_browser__title(struct hist_browser *browser,
+ char *bf, size_t size)
+{
+ scnprintf(bf, size,
+ "Shared Data Cache Line Table "
+ "(%lu entries)", browser->nr_non_filtered_entries);
+ return 0;
+}
+
+static struct hist_browser*
+perf_c2c_browser__new(struct hists *hists)
+{
+ struct hist_browser *browser = hist_browser__new(hists);
+
+ if (browser) {
+ browser->title = perf_c2c_browser__title;
+ browser->c2c_filter = true;
+ }
+
+ return browser;
+}
+
+static int perf_c2c__hists_browse(struct hists *hists)
+{
+ struct hist_browser *browser;
+ int key = -1;
+
+ browser = perf_c2c_browser__new(hists);
+ if (browser == NULL)
+ return -1;
+
+ /* reset abort key so that it can get Ctrl-C as a key */
+ SLang_reset_tty();
+ SLang_init_tty(0, 0, 0);
+
+ c2c_browser__update_nr_entries(browser);
+
+ while (1) {
+ key = hist_browser__run(browser, "help");
+
+ switch (key) {
+ case 'q':
+ goto out;
+ default:
+ break;
+ }
+ }
+
+out:
+ hist_browser__delete(browser);
+ return 0;
+}
+
+static void perf_c2c_display(void)
+{
+ if (c2c.use_stdio)
+ perf_c2c__hists_fprintf(stdout);
+ else
+ perf_c2c__hists_browse(&c2c.hists.hists);
+}
+#else
+static void perf_c2c_display(void)
+{
+ use_browser = 0;
+ perf_c2c__hists_fprintf(stdout);
+}
+#endif /* HAVE_SLANG_SUPPORT */
+
+static void ui_quirks(void)
+{
+ if (!c2c.use_stdio) {
+ dim_offset.width = 5;
+ dim_offset.header = header_offset_tui;
+ }
+}
+
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
@@ -1819,6 +1921,9 @@ static int perf_c2c__report(int argc, const char **argv)
"the input file to process"),
OPT_INCR('N', "node-info", &c2c.node_info,
"show extra node info in report (repeat for more info)"),
+#ifdef HAVE_SLANG_SUPPORT
+ OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"),
+#endif
OPT_END()
};
int err = 0;
@@ -1828,6 +1933,13 @@ static int perf_c2c__report(int argc, const char **argv)
if (argc)
usage_with_options(report_c2c_usage, c2c_options);

+ if (c2c.use_stdio)
+ use_browser = 0;
+ else
+ use_browser = 1;
+
+ setup_browser(false);
+
if (!input_name || !strlen(input_name))
input_name = "perf.data";

@@ -1886,8 +1998,9 @@ static int perf_c2c__report(int argc, const char **argv)

ui_progress__finish();

- use_browser = 0;
- perf_c2c__hists_fprintf(stdout);
+ ui_quirks();
+
+ perf_c2c_display();

out_session:
perf_session__delete(session);
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 4ffff7be9299..31d6d5a7c2dc 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -30,7 +30,7 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd,

static bool hist_browser__has_filter(struct hist_browser *hb)
{
- return hists__has_filter(hb->hists) || hb->min_pcnt || symbol_conf.has_filter;
+ return hists__has_filter(hb->hists) || hb->min_pcnt || symbol_conf.has_filter || hb->c2c_filter;
}

static int hist_browser__get_folding(struct hist_browser *browser)
diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h
index 39bd0f28f211..23d6acb84800 100644
--- a/tools/perf/ui/browsers/hists.h
+++ b/tools/perf/ui/browsers/hists.h
@@ -18,6 +18,7 @@ struct hist_browser {
u64 nr_non_filtered_entries;
u64 nr_hierarchy_entries;
u64 nr_callchain_rows;
+ bool c2c_filter;

/* Get title string. */
int (*title)(struct hist_browser *browser,
--
2.7.4

Arnaldo Carvalho de Melo

unread,
Oct 11, 2016, 1:40:16 PM10/11/16
to
From: Jiri Olsa <jo...@kernel.org>

To ease up following patch.

Signed-off-by: Jiri Olsa <jo...@kernel.org>
Cc: Namhyung Kim <namh...@kernel.org>
Cc: Steven Rostedt <ros...@goodmis.org>
Link: http://lkml.kernel.org/n/tip-zpv5gd8y7c...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <ac...@redhat.com>
---
tools/lib/traceevent/Makefile | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index deeae5201ec9..0d7e1725a0f8 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -99,7 +99,7 @@ libdir_SQ = $(subst ','\'',$(libdir))
libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
plugin_dir_SQ = $(subst ','\'',$(plugin_dir))

-LIB_FILE = libtraceevent.a libtraceevent.so
+LIB_TARGET = libtraceevent.a libtraceevent.so

CONFIG_INCLUDES =
CONFIG_LIBS =
@@ -156,11 +156,11 @@ PLUGINS += plugin_cfg80211.so
PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS))
PLUGINS_IN := $(PLUGINS:.so=-in.o)

-TE_IN := $(OUTPUT)libtraceevent-in.o
-LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
+TE_IN := $(OUTPUT)libtraceevent-in.o
+LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list

-CMD_TARGETS = $(LIB_FILE) $(PLUGINS) $(DYNAMIC_LIST_FILE)
+CMD_TARGETS = $(LIB_TARGET) $(PLUGINS) $(DYNAMIC_LIST_FILE)

TARGETS = $(CMD_TARGETS)

@@ -261,8 +261,8 @@ define do_generate_dynamic_list_file
endef

install_lib: all_cmd install_plugins
- $(call QUIET_INSTALL, $(LIB_FILE)) \
- $(call do_install,$(LIB_FILE),$(libdir_SQ))
+ $(call QUIET_INSTALL, $(LIB_TARGET)) \
+ $(call do_install,$(LIB_TARGET),$(libdir_SQ))

install_plugins: $(PLUGINS)
$(call QUIET_INSTALL, trace_plugins) \
--
2.7.4
It is loading more messages.
0 new messages