This patch is adding a new pair of callback to stop and restart a
counter without actually release the underlying counter resource.
On stop, the counter is stopped, its values saved and that's it.
On start, the value is reloaded and counter is restarted (on x86,
actual restart is delayed until perf_enable()).
Note this patch does not provide support for non-X86 PMU. This needs
to be added.
Signed-off-by: Stephane Eranian <era...@google.com>
--
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index a920f17..ea023cb 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1801,6 +1801,18 @@ static int x86_pmu_enable(struct perf_event *event)
return 0;
}
+static int x86_pmu_start(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->idx == -1)
+ return -EAGAIN;
+
+ x86_perf_event_set_period(event, hwc, hwc->idx);
+
+ return 0;
+}
+
static void x86_pmu_unthrottle(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1949,12 +1961,19 @@ static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cp
cpuc->events[idx] = NULL;
}
+static void x86_pmu_stop(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+ __x86_pmu_disable(event, cpuc);
+}
+
static void x86_pmu_disable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int i;
- __x86_pmu_disable(event, cpuc);
+ x86_pmu_stop(event);
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event_list[i]) {
@@ -1972,6 +1991,7 @@ static void x86_pmu_disable(struct perf_event *event)
perf_event_update_userpage(event);
}
+
/*
* Save and restart an expired event. Called by NMI contexts,
* so it has to be careful about preempting normal event ops:
@@ -2667,6 +2687,8 @@ static inline void x86_pmu_read(struct perf_event *event)
static const struct pmu pmu = {
.enable = x86_pmu_enable,
.disable = x86_pmu_disable,
+ .start = x86_pmu_start,
+ .stop = x86_pmu_stop,
.read = x86_pmu_read,
.unthrottle = x86_pmu_unthrottle,
};
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 14cf4b5..3bb9d1f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -511,6 +511,8 @@ struct perf_event;
struct pmu {
int (*enable) (struct perf_event *event);
void (*disable) (struct perf_event *event);
+ int (*start) (struct perf_event *event);
+ void (*stop) (struct perf_event *event);
void (*read) (struct perf_event *event);
void (*unthrottle) (struct perf_event *event);
};
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ab8a312..654dfb5 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1513,9 +1513,9 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
if (atomic64_read(&hwc->period_left) > 8*sample_period) {
perf_disable();
- event->pmu->disable(event);
+ event->pmu->stop(event);
atomic64_set(&hwc->period_left, 0);
- event->pmu->enable(event);
+ event->pmu->start(event);
perf_enable();
}
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
I think we can do this much easier by adding a list_head to
hw_perf_event and make event_list into a proper list, then we can remove
that funny loop on remove and instead move the event to a remove_list
when there's a put_event_constraint() method and iterate that list on
hw_perf_enable().
But before we do that, I think we need to look at the /* hardware */
part of struct hw_perf_event, and make that arch specific, we've been
growing that a lot lately and I don't think !x86 uses any of that.
Not sure why it's easier. It saves memory for sure, but that problem
is independent of the issue I was trying to address.
>
> But before we do that, I think we need to look at the /* hardware */
> part of struct hw_perf_event, and make that arch specific, we've been
> growing that a lot lately and I don't think !x86 uses any of that.
>
It is clear it will need to grow much more to host non-counting features.
I have played with that myself a few weeks back. So, yes the saved state
needs to be arch specific.
You're right, the thing I had overlooked is that delaying that release
will mess up the constraints for new events.
OK, lets go with this, we can do all other architectures by doing a
fallback to enable/disable, because for everything except the new AMD
code that is still correct. If at some time in the future start/stop
becomes something that will be called frequently, architectures can
provide optimized versions that by-pass the constraint checking (the
current use is rare).
---
Subject: perf_events: Add new start/stop PMU callbacks
From: Stephane Eranian <era...@google.com>
Date: Mon, 8 Feb 2010 17:06:01 +0200
In certain situations, the kernel may need to stop and start the
same event rapidly. The current PMU callbacks do not distinguish
between stop and release (i.e., stop + free the resource). Thus,
a counter may be released, then it will be immediately re-acquired.
Event scheduling will again take place with no guarantee to assign
the same counter. On some processors, this may event yield to failure
to assign the event back due to competion between cores.
This patch is adding a new pair of callback to stop and restart a
counter without actually release the underlying counter resource.
On stop, the counter is stopped, its values saved and that's it.
On start, the value is reloaded and counter is restarted (on x86,
actual restart is delayed until perf_enable()).
Signed-off-by: Stephane Eranian <era...@google.com>
[ added fallback to ->enable/->disable for all other PMUs
fixed x86_pmu_start() to call x86_pmu.enable()
merged __x86_pmu_disable into x86_pmu_stop() ]
Signed-off-by: Peter Zijlstra <a.p.zi...@chello.nl>
LKML-Reference: <4b703875.0a04d0...@mx.google.com>
---
arch/x86/kernel/cpu/perf_event.c | 24 ++++++++++++++++++++----
include/linux/perf_event.h | 2 ++
kernel/perf_event.c | 20 ++++++++++++++++++--
3 files changed, 40 insertions(+), 6 deletions(-)
Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -1495,7 +1495,7 @@ static inline int match_prev_assignment(
hwc->last_tag == cpuc->tags[i];
}
-static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc);
+static void x86_pmu_stop(struct perf_event *event);
void hw_perf_enable(void)
{
@@ -1533,7 +1533,7 @@ void hw_perf_enable(void)
match_prev_assignment(hwc, cpuc, i))
continue;
- __x86_pmu_disable(event, cpuc);
+ x86_pmu_stop(event);
hwc->idx = -1;
}
@@ -1801,6 +1801,19 @@ static int x86_pmu_enable(struct perf_ev
return 0;
}
+static int x86_pmu_start(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->idx == -1)
+ return -EAGAIN;
+
+ x86_perf_event_set_period(event, hwc, hwc->idx);
+ x86_pmu.enable(hwc, hwc->idx);
+
+ return 0;
+}
+
static void x86_pmu_unthrottle(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1924,8 +1937,9 @@ static void intel_pmu_drain_bts_buffer(s
event->pending_kill = POLL_IN;
}
-static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc)
+static void x86_pmu_stop(struct perf_event *event)
{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
@@ -1954,7 +1968,7 @@ static void x86_pmu_disable(struct perf_
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int i;
- __x86_pmu_disable(event, cpuc);
+ x86_pmu_stop(event);
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event_list[i]) {
@@ -2667,6 +2681,8 @@ static inline void x86_pmu_read(struct p
static const struct pmu pmu = {
.enable = x86_pmu_enable,
.disable = x86_pmu_disable,
+ .start = x86_pmu_start,
+ .stop = x86_pmu_stop,
.read = x86_pmu_read,
.unthrottle = x86_pmu_unthrottle,
};
Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -511,6 +511,8 @@ struct perf_event;
struct pmu {
int (*enable) (struct perf_event *event);
void (*disable) (struct perf_event *event);
+ int (*start) (struct perf_event *event);
+ void (*stop) (struct perf_event *event);
void (*read) (struct perf_event *event);
void (*unthrottle) (struct perf_event *event);
};
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -1493,6 +1493,22 @@ do { \
return div64_u64(dividend, divisor);
}
+static void perf_event_stop(struct perf_event *event)
+{
+ if (!event->pmu->stop)
+ return event->pmu->disable(event);
+
+ return event->pmu->stop(event);
+}
+
+static int perf_event_start(struct perf_event *event)
+{
+ if (!event->pmu->start)
+ return event->pmu->enable(event);
+
+ return event->pmu->start(event);
+}
+
static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
{
struct hw_perf_event *hwc = &event->hw;
@@ -1513,9 +1529,9 @@ static void perf_adjust_period(struct pe
if (atomic64_read(&hwc->period_left) > 8*sample_period) {
perf_disable();
- event->pmu->disable(event);
+ perf_event_stop(event);
atomic64_set(&hwc->period_left, 0);
- event->pmu->enable(event);
+ perf_event_start(event);
perf_enable();
I looked at the pahole output:
$ pahole -C hw_perf_event build/arch/x86/kernel/cpu/perf_event.o
struct hw_perf_event {
union {
struct {
u64 config; /* 0 8 */
u64 last_tag; /* 8 8 */
long unsigned int config_base; /* 16 8 */
long unsigned int event_base; /* 24 8 */
int idx; /* 32 4 */
int last_cpu; /* 36 4 */
}; /* 40 */
struct {
s64 remaining; /* 0 8 */
struct hrtimer hrtimer; /* 8 96 */
/* --- cacheline 1 boundary (64 bytes) was 40 bytes ago --- */
}; /* 104 */
union {
struct arch_hw_breakpoint info; /* 24 */
}; /* 24 */
}; /* 0 104 */
/* --- cacheline 1 boundary (64 bytes) was 40 bytes ago --- */
atomic64_t prev_count; /* 104 8 */
u64 sample_period; /* 112 8 */
u64 last_period; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
atomic64_t period_left; /* 128 8 */
u64 interrupts; /* 136 8 */
u64 freq_time_stamp; /* 144 8 */
u64 freq_count_stamp; /* 152 8 */
/* size: 160, cachelines: 3 */
/* last cacheline: 32 bytes */
};
which suggests we still have plenty of room to grow without adding undue
overhead on other architectures, that struct hrtimer is the largest
thing in there.
> It is clear it will need to grow much more to host non-counting features.
> I have played with that myself a few weeks back. So, yes the saved state
> needs to be arch specific.
What do you mean by non-counting features?
perf_events: Add new start/stop PMU callbacks
In certain situations, the kernel may need to stop and start the same
event rapidly. The current PMU callbacks do not distinguish between stop
and release (i.e., stop + free the resource). Thus, a counter may be
released, then it will be immediately re-acquired. Event scheduling will
again take place with no guarantee to assign the same counter. On some
processors, this may event yield to failure to assign the event back due
to competion between cores.
This patch is adding a new pair of callback to stop and restart a counter
without actually release the underlying counter resource. On stop, the
counter is stopped, its values saved and that's it. On start, the value
is reloaded and counter is restarted (on x86, actual restart is delayed
until perf_enable()).
Signed-off-by: Stephane Eranian <era...@google.com>
[ added fallback to ->enable/->disable for all other PMUs
fixed x86_pmu_start() to call x86_pmu.enable()
merged __x86_pmu_disable into x86_pmu_stop() ]
Signed-off-by: Peter Zijlstra <a.p.zi...@chello.nl>
LKML-Reference: <4b703875.0a04d0...@mx.google.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
arch/x86/kernel/cpu/perf_event.c | 24 ++++++++++++++++++++----
include/linux/perf_event.h | 2 ++
kernel/perf_event.c | 20 ++++++++++++++++++--
3 files changed, 40 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index a920f17..9173ea9 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1495,7 +1495,7 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
hwc->last_tag == cpuc->tags[i];
}
-static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc);
+static void x86_pmu_stop(struct perf_event *event);
void hw_perf_enable(void)
{
@@ -1533,7 +1533,7 @@ void hw_perf_enable(void)
match_prev_assignment(hwc, cpuc, i))
continue;
- __x86_pmu_disable(event, cpuc);
+ x86_pmu_stop(event);
hwc->idx = -1;
}
@@ -1801,6 +1801,19 @@ static int x86_pmu_enable(struct perf_event *event)
return 0;
}
+static int x86_pmu_start(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->idx == -1)
+ return -EAGAIN;
+
+ x86_perf_event_set_period(event, hwc, hwc->idx);
+ x86_pmu.enable(hwc, hwc->idx);
+
+ return 0;
+}
+
static void x86_pmu_unthrottle(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1924,8 +1937,9 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
event->pending_kill = POLL_IN;
}
-static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc)
+static void x86_pmu_stop(struct perf_event *event)
{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
@@ -1954,7 +1968,7 @@ static void x86_pmu_disable(struct perf_event *event)
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int i;
- __x86_pmu_disable(event, cpuc);
+ x86_pmu_stop(event);
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event_list[i]) {
@@ -2667,6 +2681,8 @@ static inline void x86_pmu_read(struct perf_event *event)
static const struct pmu pmu = {
.enable = x86_pmu_enable,
.disable = x86_pmu_disable,
+ .start = x86_pmu_start,
+ .stop = x86_pmu_stop,
.read = x86_pmu_read,
.unthrottle = x86_pmu_unthrottle,
};
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 071a7db..b08dfda 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -513,6 +513,8 @@ struct perf_event;
struct pmu {
int (*enable) (struct perf_event *event);
void (*disable) (struct perf_event *event);
+ int (*start) (struct perf_event *event);
+ void (*stop) (struct perf_event *event);
void (*read) (struct perf_event *event);
void (*unthrottle) (struct perf_event *event);
};
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 5a69abb..74c6002 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1493,6 +1493,22 @@ do { \
return div64_u64(dividend, divisor);
}
+static void perf_event_stop(struct perf_event *event)
+{
+ if (!event->pmu->stop)
+ return event->pmu->disable(event);
+
+ return event->pmu->stop(event);
+}
+
+static int perf_event_start(struct perf_event *event)
+{
+ if (!event->pmu->start)
+ return event->pmu->enable(event);
+
+ return event->pmu->start(event);
+}
+
static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
{
struct hw_perf_event *hwc = &event->hw;
@@ -1513,9 +1529,9 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
if (atomic64_read(&hwc->period_left) > 8*sample_period) {
perf_disable();
- event->pmu->disable(event);
+ perf_event_stop(event);
atomic64_set(&hwc->period_left, 0);
- event->pmu->enable(event);
+ perf_event_start(event);