[PATCH v2 3/4] perf: Add support to the perf tool to specify extra values

Lin Ming

unread,

Mar 1, 2011, 11:50:02 AM3/1/11

to

Change logs:
- Use ":" to specify extra value since "," was already used for multiple
events.

Add support to the perf tool to specify extra values for raw events and
pass them to the kernel.
The new format is -e rXXXX[:YYYY]

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Lin Ming <ming....@intel.com>
---
tools/perf/builtin-report.c | 7 ++++---
tools/perf/util/parse-events.c | 24 +++++++++++++++++++-----
tools/perf/util/parse-events.h | 2 +-
tools/perf/util/ui/browsers/hists.c | 3 ++-
4 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index dddcc7e..724f65b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -201,7 +201,8 @@ static int process_read_event(union perf_event *event,
attr = perf_header__find_attr(event->read.id, &session->header);

if (show_threads) {
- const char *name = attr ? __event_name(attr->type, attr->config)
+ const char *name = attr ? __event_name(attr->type, attr->config,
+ 0)
: "unknown";
perf_read_values_add_value(&show_threads_values,
event->read.pid, event->read.tid,
@@ -211,7 +212,7 @@ static int process_read_event(union perf_event *event,
}

dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid,
- attr ? __event_name(attr->type, attr->config) : "FAIL",
+ attr ? __event_name(attr->type, attr->config, 0) : "FAIL",
event->read.value);

return 0;
@@ -291,7 +292,7 @@ static int hists__tty_browse_tree(struct rb_root *tree, const char *help)
const char *evname = NULL;

if (rb_first(&hists->entries) != rb_last(&hists->entries))
- evname = __event_name(hists->type, hists->config);
+ evname = __event_name(hists->type, hists->config, 0);

hists__fprintf_nr_sample_events(hists, evname, stdout);
hists__fprintf(hists, NULL, false, stdout);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 54a7e26..7838dfb 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -271,15 +271,18 @@ const char *event_name(struct perf_evsel *evsel)
if (evsel->name)
return evsel->name;

- return __event_name(type, config);
+ return __event_name(type, config, evsel->attr.config1);
}

-const char *__event_name(int type, u64 config)
+const char *__event_name(int type, u64 config, u64 extra)
{
static char buf[32];
+ int n;

if (type == PERF_TYPE_RAW) {
- sprintf(buf, "raw 0x%" PRIx64, config);
+ n = sprintf(buf, "raw 0x%" PRIx64, config);
+ if (extra)
+ sprintf(buf + n, ":%#" PRIx64, extra);
return buf;
}

@@ -666,9 +669,20 @@ parse_raw_event(const char **strp, struct perf_event_attr *attr)
return EVT_FAILED;
n = hex2u64(str + 1, &config);
if (n > 0) {
- *strp = str + n + 1;
+ str += n + 1;
+ *strp = str;
attr->type = PERF_TYPE_RAW;
attr->config = config;
+
+ if (*str++ == ':') {
+ n = hex2u64(str + 1, &config);
+ if (n > 0) {
+ attr->config1 = config;
+ str += n + 1;
+ *strp = str;
+ }
+ }
+
return EVT_HANDLED;
}
return EVT_FAILED;
@@ -1035,7 +1049,7 @@ void print_events(const char *event_glob)

printf("\n");
printf(" %-42s [%s]\n",
- "rNNN (see 'perf list --help' on how to encode it)",
+ "rNNN[:EEE] (see 'perf list --help' on how to encode it)",
event_type_descriptors[PERF_TYPE_RAW]);
printf("\n");

diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 212f88e..0fe700c 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -21,7 +21,7 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
extern bool have_tracepoints(struct list_head *evlist);

const char *event_name(struct perf_evsel *event);
-extern const char *__event_name(int type, u64 config);
+extern const char *__event_name(int type, u64 config, u64 extra);

extern int parse_events(const struct option *opt, const char *str, int unset);
extern int parse_filter(const struct option *opt, const char *str, int unset);
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index 497b3c4..f780bc7 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -984,7 +984,8 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help, int evidx)

while (nd) {
struct hists *hists = rb_entry(nd, struct hists, rb_node);
- const char *ev_name = __event_name(hists->type, hists->config);
+ const char *ev_name = __event_name(hists->type, hists->config,
+ 0);

key = hists__browse(hists, help, ev_name, evidx);
switch (key) {
--
1.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Lin Ming

unread,

Mar 2, 2011, 9:30:03 PM3/2/11

to

Lin Ming

unread,

Mar 2, 2011, 9:30:03 PM3/2/11

to

No need to do percore allocations if HT is not capable

Signed-off-by: Lin Ming <ming....@intel.com>
---

arch/x86/kernel/cpu/perf_event_intel.c | 23 +++++++++++++++++------
1 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index aaac4a9..4bee70a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1201,10 +1201,18 @@ static __initconst const struct x86_pmu core_pmu = {
.event_constraints = intel_core_event_constraints,
};

+static bool ht_capable(void)
+{
+ return boot_cpu_has(X86_FEATURE_HT) && smp_num_siblings > 1;
+}
+
static int intel_pmu_cpu_prepare(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);

+ if (!ht_capable())
+ return NOTIFY_OK;
+
cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
GFP_KERNEL, cpu_to_node(cpu));
if (!cpuc->per_core)
@@ -1221,6 +1229,15 @@ static void intel_pmu_cpu_starting(int cpu)
int core_id = topology_core_id(cpu);
int i;

+ init_debug_store_on_cpu(cpu);
+ /*
+ * Deal with CPUs that don't clear their LBRs on power-up.
+ */
+ intel_pmu_lbr_reset();
+
+ if (!ht_capable())
+ return;
+
for_each_cpu(i, topology_thread_cpumask(cpu)) {
struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;

@@ -1233,12 +1250,6 @@ static void intel_pmu_cpu_starting(int cpu)

cpuc->per_core->core_id = core_id;
cpuc->per_core->refcnt++;
-
- init_debug_store_on_cpu(cpu);
- /*
- * Deal with CPUs that don't clear their LBRs on power-up.
- */
- intel_pmu_lbr_reset();
}

static void intel_pmu_cpu_dying(int cpu)

Lin Ming

unread,

Mar 2, 2011, 9:30:02 PM3/2/11

to

From: Andi Kleen <a...@linux.intel.com>

Change logs against Andi's original version:
- Extends perf_event_attr:config to config{,1,2} (Peter Zijlstra)
- Fixed a major event scheduling issue. There cannot be a ref++ on an
event that has already done ref++ once and without calling
put_constraint() in between. (Stephane Eranian)
- Use thread_cpumask for percore allocation. (Lin Ming)
- Use MSR names in the extra reg lists. (Lin Ming)
- Remove redundant "c = NULL" in intel_percore_constraints
- Fix comment of perf_event_attr::config1

Intel Nehalem/Westmere have a special OFFCORE_RESPONSE event
that can be used to monitor any offcore accesses from a core.
This is a very useful event for various tunings, and it's
also needed to implement the generic LLC-* events correctly.

Unfortunately this event requires programming a mask in a separate
register. And worse this separate register is per core, not per
CPU thread.

This patch adds:
- Teaches perf_events that OFFCORE_RESPONSE needs extra parameters.
The extra parameters are passed by user space in the
perf_event_attr::config1 field.
- Add support to the Intel perf_event core to schedule per
core resources. This adds fairly generic infrastructure that
can be also used for other per core resources.
The basic code has is patterned after the similar AMD northbridge
constraints code.

Thanks to Stephane Eranian who pointed out some problems
in the original version and suggested improvements.

Cc: era...@google.com
Signed-off-by: Andi Kleen <a...@linux.intel.com>

Signed-off-by: Lin Ming <ming....@intel.com>
---

arch/x86/include/asm/msr-index.h | 3 +
arch/x86/kernel/cpu/perf_event.c | 64 ++++++++++
arch/x86/kernel/cpu/perf_event_intel.c | 200 ++++++++++++++++++++++++++++++++
include/linux/perf_event.h | 13 ++-
4 files changed, 278 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4d0dfa0..d25e74c 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -47,6 +47,9 @@
#define MSR_IA32_MCG_STATUS 0x0000017a
#define MSR_IA32_MCG_CTL 0x0000017b

+#define MSR_OFFCORE_RSP_0 0x000001a6
+#define MSR_OFFCORE_RSP_1 0x000001a7
+
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ea03c72..ec6a6db 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -93,6 +93,8 @@ struct amd_nb {
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
};

+struct intel_percore;
+
#define MAX_LBR_ENTRIES 16

struct cpu_hw_events {
@@ -128,6 +130,13 @@ struct cpu_hw_events {
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];

/*
+ * Intel percore register state.
+ * Coordinate shared resources between HT threads.
+ */
+ int percore_used; /* Used by this CPU? */
+ struct intel_percore *per_core;
+
+ /*
* AMD specific bits
*/
struct amd_nb *amd_nb;
@@ -177,6 +186,28 @@ struct cpu_hw_events {
#define for_each_event_constraint(e, c) \
for ((e) = (c); (e)->weight; (e)++)

+/*
+ * Extra registers for specific events.
+ * Some events need large masks and require external MSRs.
+ * Define a mapping to these extra registers.
+ */
+struct extra_reg {
+ unsigned int event;
+ unsigned int msr;
+ u64 config_mask;
+ u64 valid_mask;
+};
+
+#define EVENT_EXTRA_REG(e, ms, m, vm) { \
+ .event = (e), \
+ .msr = (ms), \
+ .config_mask = (m), \
+ .valid_mask = (vm), \
+ }
+#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
+ EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
+#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
+
union perf_capabilities {
struct {
u64 lbr_format : 6;
@@ -221,6 +252,7 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);
struct event_constraint *event_constraints;
+ struct event_constraint *percore_constraints;
void (*quirks)(void);
int perfctr_second_write;

@@ -249,6 +281,11 @@ struct x86_pmu {
*/
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */
+
+ /*
+ * Extra registers for events
+ */
+ struct extra_reg *extra_regs;
};

static struct x86_pmu x86_pmu __read_mostly;
@@ -341,6 +378,31 @@ static inline unsigned int x86_pmu_event_addr(int index)
return x86_pmu.perfctr + x86_pmu_addr_offset(index);
}

+/*
+ * Find and validate any extra registers to set up.
+ */
+static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
+{
+ struct extra_reg *er;
+
+ event->hw.extra_reg = 0;
+ event->hw.extra_config = 0;
+
+ if (!x86_pmu.extra_regs)
+ return 0;
+
+ for (er = x86_pmu.extra_regs; er->msr; er++) {
+ if (er->event != (config & er->config_mask))
+ continue;
+ if (event->attr.config1 & ~er->valid_mask)
+ return -EINVAL;
+ event->hw.extra_reg = er->msr;
+ event->hw.extra_config = event->attr.config1;
+ break;
+ }
+ return 0;
+}
+
static atomic_t active_events;
static DEFINE_MUTEX(pmc_reserve_mutex);

@@ -665,6 +727,8 @@ static void x86_pmu_disable(struct pmu *pmu)
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
u64 enable_mask)
{
+ if (hwc->extra_reg)
+ wrmsrl(hwc->extra_reg, hwc->extra_config);
wrmsrl(hwc->config_base, hwc->config | enable_mask);
}

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ba8aad1..af0c6a2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,5 +1,27 @@
#ifdef CONFIG_CPU_SUP_INTEL

+#define MAX_EXTRA_REGS 2
+
+/*
+ * Per register state.
+ */
+struct er_account {
+ int ref; /* reference count */
+ unsigned int extra_reg; /* extra MSR number */
+ u64 extra_config; /* extra MSR config */
+};
+
+/*
+ * Per core state
+ * This used to coordinate shared registers for HT threads.
+ */
+struct intel_percore {
+ raw_spinlock_t lock; /* protect structure */
+ struct er_account regs[MAX_EXTRA_REGS];
+ int refcnt; /* number of threads */
+ unsigned core_id;
+};
+
/*
* Intel PerfMon, used on Core and later.
*/
@@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] =
EVENT_CONSTRAINT_END
};

+static struct extra_reg intel_nehalem_extra_regs[] =
+{
+ INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
+ EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_nehalem_percore_constraints[] =
+{
+ INTEL_EVENT_CONSTRAINT(0xb7, 0),
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint intel_westmere_event_constraints[] =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -89,6 +123,20 @@ static struct event_constraint intel_snb_event_constraints[] =
EVENT_CONSTRAINT_END
};

+static struct extra_reg intel_westmere_extra_regs[] =
+{
+ INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
+ INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
+ EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_westmere_percore_constraints[] =
+{
+ INTEL_EVENT_CONSTRAINT(0xb7, 0),
+ INTEL_EVENT_CONSTRAINT(0xbb, 0),
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint intel_gen_event_constraints[] =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -907,6 +955,67 @@ intel_bts_constraints(struct perf_event *event)
}

static struct event_constraint *
+intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
+ struct event_constraint *c;
+ struct intel_percore *pc;
+ struct er_account *era;
+ int i;
+ int free_slot;
+ int found;
+
+ if (!x86_pmu.percore_constraints || hwc->extra_alloc)
+ return NULL;
+
+ for (c = x86_pmu.percore_constraints; c->cmask; c++) {
+ if (e != c->code)
+ continue;
+
+ /*
+ * Allocate resource per core.
+ */
+ pc = cpuc->per_core;
+ if (!pc)
+ break;
+ c = &emptyconstraint;
+ raw_spin_lock(&pc->lock);
+ free_slot = -1;
+ found = 0;
+ for (i = 0; i < MAX_EXTRA_REGS; i++) {
+ era = &pc->regs[i];
+ if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
+ /* Allow sharing same config */
+ if (hwc->extra_config == era->extra_config) {
+ era->ref++;
+ cpuc->percore_used = 1;
+ hwc->extra_alloc = 1;
+ c = NULL;
+ }
+ /* else conflict */
+ found = 1;
+ break;
+ } else if (era->ref == 0 && free_slot == -1)
+ free_slot = i;
+ }
+ if (!found && free_slot != -1) {
+ era = &pc->regs[free_slot];
+ era->ref = 1;
+ era->extra_reg = hwc->extra_reg;
+ era->extra_config = hwc->extra_config;
+ cpuc->percore_used = 1;
+ hwc->extra_alloc = 1;
+ c = NULL;
+ }
+ raw_spin_unlock(&pc->lock);
+ return c;
+ }
+
+ return NULL;
+}
+
+static struct event_constraint *
intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
{
struct event_constraint *c;
@@ -919,9 +1028,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
if (c)
return c;

+ c = intel_percore_constraints(cpuc, event);
+ if (c)
+ return c;
+
return x86_get_event_constraints(cpuc, event);
}

+static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct extra_reg *er;
+ struct intel_percore *pc;
+ struct er_account *era;
+ struct hw_perf_event *hwc = &event->hw;
+ int i, allref;
+
+ if (!cpuc->percore_used)
+ return;
+
+ for (er = x86_pmu.extra_regs; er->msr; er++) {
+ if (er->event != (hwc->config & er->config_mask))
+ continue;
+
+ pc = cpuc->per_core;
+ raw_spin_lock(&pc->lock);
+ for (i = 0; i < MAX_EXTRA_REGS; i++) {
+ era = &pc->regs[i];
+ if (era->ref > 0 &&
+ era->extra_config == hwc->extra_config &&
+ era->extra_reg == er->msr) {
+ era->ref--;
+ hwc->extra_alloc = 0;
+ break;
+ }
+ }
+ allref = 0;
+ for (i = 0; i < MAX_EXTRA_REGS; i++)
+ allref += pc->regs[i].ref;
+ if (allref == 0)
+ cpuc->percore_used = 0;
+ raw_spin_unlock(&pc->lock);
+ break;
+ }
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -993,11 +1144,43 @@ static __initconst const struct x86_pmu core_pmu = {
*/
.max_period = (1ULL << 31) - 1,
.get_event_constraints = intel_get_event_constraints,
+ .put_event_constraints = intel_put_event_constraints,
.event_constraints = intel_core_event_constraints,
};

+static int intel_pmu_cpu_prepare(int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+

+ cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),

+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!cpuc->per_core)
+ return NOTIFY_BAD;
+
+ raw_spin_lock_init(&cpuc->per_core->lock);
+ cpuc->per_core->core_id = -1;
+ return NOTIFY_OK;
+}
+
static void intel_pmu_cpu_starting(int cpu)
{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ int core_id = topology_core_id(cpu);
+ int i;
+
+ for_each_cpu(i, topology_thread_cpumask(cpu)) {
+ struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
+
+ if (pc && pc->core_id == core_id) {
+ kfree(cpuc->per_core);
+ cpuc->per_core = pc;
+ break;
+ }
+ }
+
+ cpuc->per_core->core_id = core_id;
+ cpuc->per_core->refcnt++;
+
init_debug_store_on_cpu(cpu);
/*

* Deal with CPUs that don't clear their LBRs on power-up.

@@ -1007,6 +1190,15 @@ static void intel_pmu_cpu_starting(int cpu)

static void intel_pmu_cpu_dying(int cpu)
{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ struct intel_percore *pc = cpuc->per_core;
+
+ if (pc) {
+ if (pc->core_id == -1 || --pc->refcnt == 0)
+ kfree(pc);
+ cpuc->per_core = NULL;
+ }
+
fini_debug_store_on_cpu(cpu);
}

@@ -1031,7 +1223,9 @@ static __initconst const struct x86_pmu intel_pmu = {
*/
.max_period = (1ULL << 31) - 1,
.get_event_constraints = intel_get_event_constraints,
+ .put_event_constraints = intel_put_event_constraints,

+ .cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
};
@@ -1149,7 +1343,10 @@ static __init int intel_pmu_init(void)
intel_pmu_lbr_init_nhm();

x86_pmu.event_constraints = intel_nehalem_event_constraints;
+ x86_pmu.percore_constraints =
+ intel_nehalem_percore_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+ x86_pmu.extra_regs = intel_nehalem_extra_regs;
pr_cont("Nehalem events, ");
break;

@@ -1171,7 +1368,10 @@ static __init int intel_pmu_init(void)
intel_pmu_lbr_init_nhm();

x86_pmu.event_constraints = intel_westmere_event_constraints;
+ x86_pmu.percore_constraints =
+ intel_westmere_percore_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+ x86_pmu.extra_regs = intel_westmere_extra_regs;
pr_cont("Westmere events, ");
break;

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8ceb5a6..614615b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -225,8 +225,14 @@ struct perf_event_attr {
};

__u32 bp_type;
- __u64 bp_addr;
- __u64 bp_len;
+ union {
+ __u64 bp_addr;
+ __u64 config1; /* extension of config */
+ };
+ union {
+ __u64 bp_len;
+ __u64 config2; /* extension of config1 */
+ };
};

/*
@@ -541,6 +547,9 @@ struct hw_perf_event {
unsigned long event_base;
int idx;
int last_cpu;
+ unsigned int extra_reg;
+ u64 extra_config;
+ int extra_alloc;
};
struct { /* software */
struct hrtimer hrtimer;

Lin Ming

unread,

Mar 2, 2011, 9:30:03 PM3/2/11

to

From: Andi Kleen <a...@linux.intel.com>

The generic perf LLC-* events do count the L2 caches, not the real
L3 LLC on Intel Nehalem and Westmere. This lead to quite some confusion.

Fixing this properly requires use of the special OFFCORE_RESPONSE
events which need a separate mask register. This has been implemented
in a earlier patch.

Now use this infrastructure to set correct events for the LLC-*
on Nehalem and Westmere

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Lin Ming <ming....@intel.com>
---

arch/x86/kernel/cpu/perf_event.c | 15 ++++--
arch/x86/kernel/cpu/perf_event_intel.c | 81 +++++++++++++++++++++++++++-----
2 files changed, 79 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ec6a6db..4d6ce5d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -310,6 +310,10 @@ static u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
+static u64 __read_mostly hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX];

/*
* Propagate event elapsed time into the generic event.
@@ -524,8 +528,9 @@ static inline int x86_pmu_initialized(void)
}

static inline int
-set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
+set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
{
+ struct perf_event_attr *attr = &event->attr;
unsigned int cache_type, cache_op, cache_result;
u64 config, val;

@@ -552,8 +557,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
return -EINVAL;

hwc->config |= val;
-
- return 0;
+ attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
+ return x86_pmu_extra_regs(val, event);
}

static int x86_setup_perfctr(struct perf_event *event)
@@ -578,10 +583,10 @@ static int x86_setup_perfctr(struct perf_event *event)
}

if (attr->type == PERF_TYPE_RAW)
- return 0;
+ return x86_pmu_extra_regs(event->attr.config, event);

if (attr->type == PERF_TYPE_HW_CACHE)
- return set_ext_hw_attr(hwc, attr);
+ return set_ext_hw_attr(hwc, event);

if (attr->config >= x86_pmu.max_events)
return -EINVAL;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index af0c6a2..aaac4a9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -285,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
- [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
+ /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
},
+ /*
+ * Use RFO, not WRITEBACK, because a write miss would typically occur
+ * on RFO.
+ */
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
- [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
+ /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01bb,
+ /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
- [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
+ /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
},
},
[ C(DTLB) ] = {
@@ -341,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids
},
};

+/*
+ * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
+ */
+
+#define DMND_DATA_RD (1 << 0)
+#define DMND_RFO (1 << 1)
+#define DMND_WB (1 << 3)
+#define PF_DATA_RD (1 << 4)
+#define PF_DATA_RFO (1 << 5)
+#define RESP_UNCORE_HIT (1 << 8)
+#define RESP_MISS (0xf600) /* non uncore hit */
+
+static __initconst const u64 nehalem_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
+ [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
+ [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
+ [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
+ },
+ }
+};
+
static __initconst const u64 nehalem_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -376,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
- [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
+ /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
+ /*
+ * Use RFO, not WRITEBACK, because a write miss would typically occur
+ * on RFO.
+ */
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
- [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
+ /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
- [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
+ /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
},
[ C(DTLB) ] = {
@@ -1339,6 +1392,8 @@ static __init int intel_pmu_init(void)
case 46: /* 45 nm nehalem-ex, "Beckton" */
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));

intel_pmu_lbr_init_nhm();

@@ -1364,6 +1419,8 @@ static __init int intel_pmu_init(void)
case 44: /* 32 nm nehalem, "Gulftown" */
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));

intel_pmu_lbr_init_nhm();

tip-bot for Andi Kleen

unread,

Mar 4, 2011, 7:00:01 AM3/4/11

to

Commit-ID: a7e3ed1e470116c9d12c2f778431a481a6be8ab6
Gitweb: http://git.kernel.org/tip/a7e3ed1e470116c9d12c2f778431a481a6be8ab6
Author: Andi Kleen <a...@linux.intel.com>
AuthorDate: Thu, 3 Mar 2011 10:34:47 +0800
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Fri, 4 Mar 2011 11:32:53 +0100

perf: Add support for supplementary event registers

Change logs against Andi's original version:

- Extends perf_event_attr:config to config{,1,2} (Peter Zijlstra)
- Fixed a major event scheduling issue. There cannot be a ref++ on an
event that has already done ref++ once and without calling
put_constraint() in between. (Stephane Eranian)
- Use thread_cpumask for percore allocation. (Lin Ming)
- Use MSR names in the extra reg lists. (Lin Ming)
- Remove redundant "c = NULL" in intel_percore_constraints
- Fix comment of perf_event_attr::config1

Intel Nehalem/Westmere have a special OFFCORE_RESPONSE event
that can be used to monitor any offcore accesses from a core.
This is a very useful event for various tunings, and it's
also needed to implement the generic LLC-* events correctly.

Unfortunately this event requires programming a mask in a separate
register. And worse this separate register is per core, not per
CPU thread.

This patch:

- Teaches perf_events that OFFCORE_RESPONSE needs extra parameters.
The extra parameters are passed by user space in the
perf_event_attr::config1 field.

- Adds support to the Intel perf_event core to schedule per

core resources. This adds fairly generic infrastructure that
can be also used for other per core resources.
The basic code has is patterned after the similar AMD northbridge
constraints code.

Thanks to Stephane Eranian who pointed out some problems
in the original version and suggested improvements.

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Lin Ming <ming....@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zi...@chello.nl>
LKML-Reference: <1299119690-13991-2-git...@intel.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>

---
arch/x86/include/asm/msr-index.h | 3 +
arch/x86/kernel/cpu/perf_event.c | 64 ++++++++++

arch/x86/kernel/cpu/perf_event_intel.c | 198 ++++++++++++++++++++++++++++++++
include/linux/perf_event.h | 13 ++-
4 files changed, 276 insertions(+), 2 deletions(-)

index c3ce053..13cb6cf 100644

@@ -1151,7 +1345,9 @@ static __init int intel_pmu_init(void)

x86_pmu.event_constraints = intel_nehalem_event_constraints;
x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
+ x86_pmu.percore_constraints = intel_nehalem_percore_constraints;

x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+ x86_pmu.extra_regs = intel_nehalem_extra_regs;
pr_cont("Nehalem events, ");
break;

@@ -1174,8 +1370,10 @@ static __init int intel_pmu_init(void)
intel_pmu_lbr_init_nhm();

x86_pmu.event_constraints = intel_westmere_event_constraints;
+ x86_pmu.percore_constraints = intel_westmere_percore_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;

tip-bot for Andi Kleen

unread,

Mar 4, 2011, 7:00:03 AM3/4/11

to

Commit-ID: e994d7d23a0bae34cd28834e85522ed4e782faf7
Gitweb: http://git.kernel.org/tip/e994d7d23a0bae34cd28834e85522ed4e782faf7
Author: Andi Kleen <a...@linux.intel.com>
AuthorDate: Thu, 3 Mar 2011 10:34:48 +0800

Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Fri, 4 Mar 2011 11:32:53 +0100

perf: Fix LLC-* events on Intel Nehalem/Westmere

On Intel Nehalem and Westmere CPUs the generic perf LLC-* events count the
L2 caches, not the real L3 LLC - this was inconsistent with behavior on
other CPUs.

Fixing this requires the use of the special OFFCORE_RESPONSE

events which need a separate mask register.

This has been implemented by the previous patch, now use this infrastructure
to set correct events for the LLC-* on Nehalem and Westmere.

Signed-off-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Lin Ming <ming....@intel.com>

Signed-off-by: Peter Zijlstra <a.p.zi...@chello.nl>
LKML-Reference: <1299119690-13991-3-git...@intel.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>

index 13cb6cf..6e9b676 100644

@@ -1340,6 +1393,8 @@ static __init int intel_pmu_init(void)

case 46: /* 45 nm nehalem-ex, "Beckton" */
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));

intel_pmu_lbr_init_nhm();

@@ -1366,6 +1421,8 @@ static __init int intel_pmu_init(void)

case 44: /* 32 nm nehalem, "Gulftown" */
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));

intel_pmu_lbr_init_nhm();

--

tip-bot for Lin Ming

unread,

Mar 4, 2011, 8:10:01 AM3/4/11

to

Commit-ID: 90a37ed96b2be7a2ad86764589eabd6992657c89
Gitweb: http://git.kernel.org/tip/90a37ed96b2be7a2ad86764589eabd6992657c89
Author: Lin Ming <ming....@intel.com>
AuthorDate: Thu, 3 Mar 2011 10:34:50 +0800
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Fri, 4 Mar 2011 12:51:10 +0100

perf: Avoid the percore allocations if the CPU is not HT capable

Signed-off-by: Lin Ming <ming....@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zi...@chello.nl>
LKML-Reference: <1299119690-13991-5-git...@intel.com>

Signed-off-by: Ingo Molnar <mi...@elte.hu>
---

arch/x86/include/asm/smp.h | 10 ++++++++++
arch/x86/kernel/cpu/perf_event_intel.c | 18 ++++++++++++------
2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 1f46951..c1bbfa8 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -17,10 +17,20 @@
#endif
#include <asm/thread_info.h>
#include <asm/cpumask.h>
+#include <asm/cpufeature.h>

extern int smp_num_siblings;
extern unsigned int num_processors;

+static inline bool cpu_has_ht_siblings(void)
+{
+ bool has_siblings = false;
+#ifdef CONFIG_SMP
+ has_siblings = cpu_has_ht && smp_num_siblings > 1;
+#endif
+ return has_siblings;
+}
+
DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
DECLARE_PER_CPU(u16, cpu_llc_id);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 6e9b676..8fc2b2c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1205,6 +1205,9 @@ static int intel_pmu_cpu_prepare(int cpu)

{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);

+ if (!cpu_has_ht_siblings())

+ return NOTIFY_OK;
+
cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
GFP_KERNEL, cpu_to_node(cpu));
if (!cpuc->per_core)

@@ -1221,6 +1224,15 @@ static void intel_pmu_cpu_starting(int cpu)

int core_id = topology_core_id(cpu);
int i;

+ init_debug_store_on_cpu(cpu);
+ /*
+ * Deal with CPUs that don't clear their LBRs on power-up.
+ */
+ intel_pmu_lbr_reset();
+

+ if (!cpu_has_ht_siblings())

+ return;
+
for_each_cpu(i, topology_thread_cpumask(cpu)) {
struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;

@@ -1233,12 +1245,6 @@ static void intel_pmu_cpu_starting(int cpu)

cpuc->per_core->core_id = core_id;
cpuc->per_core->refcnt++;
-
- init_debug_store_on_cpu(cpu);
- /*
- * Deal with CPUs that don't clear their LBRs on power-up.
- */
- intel_pmu_lbr_reset();
}

static void intel_pmu_cpu_dying(int cpu)
--

tip-bot for Lin Ming

unread,

Mar 5, 2011, 11:00:02 AM3/5/11

to

Commit-ID: 6909262429b70a162e9e7053672cfd8024c9275d
Gitweb: http://git.kernel.org/tip/6909262429b70a162e9e7053672cfd8024c9275d

Author: Lin Ming <ming....@intel.com>
AuthorDate: Thu, 3 Mar 2011 10:34:50 +0800
Committer: Ingo Molnar <mi...@elte.hu>

CommitDate: Sat, 5 Mar 2011 07:12:16 +0100

perf: Avoid the percore allocations if the CPU is not HT capable

Signed-off-by: Lin Ming <ming....@intel.com>

Signed-off-by: Peter Zijlstra <a.p.zi...@chello.nl>
LKML-Reference: <1299119690-13991-5-git...@intel.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
arch/x86/include/asm/smp.h | 10 ++++++++++

arch/x86/kernel/cpu/perf_event.c | 1 +
arch/x86/kernel/cpu/perf_event_intel.c | 18 ++++++++++++------
3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 1f46951..c1bbfa8 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -17,10 +17,20 @@
#endif
#include <asm/thread_info.h>
#include <asm/cpumask.h>
+#include <asm/cpufeature.h>

extern int smp_num_siblings;
extern unsigned int num_processors;

+static inline bool cpu_has_ht_siblings(void)
+{
+ bool has_siblings = false;
+#ifdef CONFIG_SMP
+ has_siblings = cpu_has_ht && smp_num_siblings > 1;
+#endif
+ return has_siblings;
+}
+
DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
DECLARE_PER_CPU(u16, cpu_llc_id);

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4d6ce5d..2660418 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -30,6 +30,7 @@
#include <asm/stacktrace.h>
#include <asm/nmi.h>
#include <asm/compat.h>
+#include <asm/smp.h>

#if 0
#undef wrmsrl
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 6e9b676..8fc2b2c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1205,6 +1205,9 @@ static int intel_pmu_cpu_prepare(int cpu)

{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);

+ if (!cpu_has_ht_siblings())

+ return NOTIFY_OK;
+
cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
GFP_KERNEL, cpu_to_node(cpu));
if (!cpuc->per_core)

@@ -1221,6 +1224,15 @@ static void intel_pmu_cpu_starting(int cpu)

int core_id = topology_core_id(cpu);
int i;

+ init_debug_store_on_cpu(cpu);
+ /*
+ * Deal with CPUs that don't clear their LBRs on power-up.
+ */
+ intel_pmu_lbr_reset();
+

+ if (!cpu_has_ht_siblings())

+ return;
+
for_each_cpu(i, topology_thread_cpumask(cpu)) {
struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;

@@ -1233,12 +1245,6 @@ static void intel_pmu_cpu_starting(int cpu)

cpuc->per_core->core_id = core_id;
cpuc->per_core->refcnt++;
-
- init_debug_store_on_cpu(cpu);
- /*
- * Deal with CPUs that don't clear their LBRs on power-up.
- */
- intel_pmu_lbr_reset();
}

static void intel_pmu_cpu_dying(int cpu)
--

tip-bot for Peter Zijlstra

unread,

May 6, 2011, 5:50:01 AM5/6/11

to

Commit-ID: 63b6a6758eede2f9283c3594265b6e32e75d7456
Gitweb: http://git.kernel.org/tip/63b6a6758eede2f9283c3594265b6e32e75d7456
Author: Peter Zijlstra <pet...@infradead.org>
AuthorDate: Sat, 23 Apr 2011 00:57:42 +0200
Committer: Ingo Molnar <mi...@elte.hu>
CommitDate: Fri, 6 May 2011 11:24:48 +0200

perf events, x86: Fix Intel Nehalem and Westmere last level cache event definitions

The Intel Nehalem offcore bits implemented in:

e994d7d23a0b: perf: Fix LLC-* events on Intel Nehalem/Westmere

... are wrong: they implemented _ACCESS as _HIT and counted OTHER_CORE_HIT* as
MISS even though its clearly documented as an L3 hit ...

Fix them and the Westmere definitions as well.

Cc: Andi Kleen <a...@linux.intel.com>
Cc: Lin Ming <ming....@intel.com>
Cc: Arnaldo Carvalho de Melo <ac...@redhat.com>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Steven Rostedt <ros...@goodmis.org>
Link: http://lkml.kernel.org/r/1299119690-13991-3-git...@intel.com

Signed-off-by: Ingo Molnar <mi...@elte.hu>
---

arch/x86/kernel/cpu/perf_event_intel.c | 87 +++++++++++++++++++-------------
1 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index e61539b..447a28d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids
},
},
[ C(LL ) ] = {
- /*
- * TBD: Need Off-core Response Performance Monitoring support
- */
[ C(OP_READ) ] = {
- /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+ /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01bb,

+ /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},

[ C(OP_WRITE) ] = {
- /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
+ /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01bb,

+ /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {

- /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+ /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01bb,

+ /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
},
[ C(DTLB) ] = {

@@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids

},
[ C(LL ) ] = {
[ C(OP_READ) ] = {

- /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+ /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01bb,

+ /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},

/*

* Use RFO, not WRITEBACK, because a write miss would typically occur

* on RFO.
*/
[ C(OP_WRITE) ] = {
- /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
- [ C(RESULT_ACCESS) ] = 0x01bb,
- /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */

+ /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */

[ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {

- /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+ /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01bb,

+ /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
},
[ C(DTLB) ] = {

@@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids
};

/*
- * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
+ * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
+ * See IA32 SDM Vol 3B 30.6.1.3
*/

-#define DMND_DATA_RD (1 << 0)
-#define DMND_RFO (1 << 1)
-#define DMND_WB (1 << 3)
-#define PF_DATA_RD (1 << 4)
-#define PF_DATA_RFO (1 << 5)
-#define RESP_UNCORE_HIT (1 << 8)
-#define RESP_MISS (0xf600) /* non uncore hit */
+#define NHM_DMND_DATA_RD (1 << 0)
+#define NHM_DMND_RFO (1 << 1)
+#define NHM_DMND_IFETCH (1 << 2)
+#define NHM_DMND_WB (1 << 3)
+#define NHM_PF_DATA_RD (1 << 4)
+#define NHM_PF_DATA_RFO (1 << 5)
+#define NHM_PF_IFETCH (1 << 6)
+#define NHM_OFFCORE_OTHER (1 << 7)
+#define NHM_UNCORE_HIT (1 << 8)
+#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
+#define NHM_OTHER_CORE_HITM (1 << 10)
+ /* reserved */
+#define NHM_REMOTE_CACHE_FWD (1 << 12)
+#define NHM_REMOTE_DRAM (1 << 13)
+#define NHM_LOCAL_DRAM (1 << 14)
+#define NHM_NON_DRAM (1 << 15)
+
+#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
+
+#define NHM_DMND_READ (NHM_DMND_DATA_RD)
+#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
+#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
+
+#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
+#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)

static __initconst const u64 nehalem_hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
@@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
{

[ C(LL ) ] = {
[ C(OP_READ) ] = {

- [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
- [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS,
+ [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
+ [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS,
},
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
- [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS,
+ [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
+ [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS,
},
[ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
- [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
+ [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
+ [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
},
}
};