Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[BUG] perf: arch_perf_out_copy_user default

0 views
Skip to first unread message

Peter Zijlstra

unread,
Oct 30, 2013, 10:40:02 AM10/30/13
to
Hi Frederic,

I just spotted:

#ifndef arch_perf_out_copy_user
#define arch_perf_out_copy_user __copy_from_user_inatomic
#endif

vs:

arch/x86/include/asm/perf_event.h:#define arch_perf_out_copy_user copy_from_user_nmi


Now the problem is that copy_from_user_nmi() and
__copy_from_user_inatomic() have different return semantics.

Furthermore, the macro you use them in DEFINE_OUTPUT_COPY() assumes the
return value is the amount of memory copied; as also illustrated by
memcpy_common().

Trouble is, __copy_from_user_inatomic() returns the number of bytes
_NOT_ copied.

With this, my question to Will is, how did your ARM unwind support
patches ever work? AFAICT they end up using the
__copy_from_user_inatomic() thing.


---
kernel/events/internal.h | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index ca6599723be5..d7a0f753e695 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -110,7 +110,8 @@ func_name(struct perf_output_handle *handle, \
return len; \
}

-static inline int memcpy_common(void *dst, const void *src, size_t n)
+static inline unsigned long
+memcpy_common(void *dst, const void *src, unsigned long n)
{
memcpy(dst, src, n);
return n;
@@ -123,7 +124,19 @@ DEFINE_OUTPUT_COPY(__output_copy, memcpy_common)
DEFINE_OUTPUT_COPY(__output_skip, MEMCPY_SKIP)

#ifndef arch_perf_out_copy_user
-#define arch_perf_out_copy_user __copy_from_user_inatomic
+#define arch_perf_out_copy_user arch_perf_out_copy_user
+
+static inline unsigned long
+arch_perf_out_copy_user(void *dst, const void *src, unsigned long n)
+{
+ unsigned long ret;
+
+ pagefault_disable();
+ ret = __copy_from_user_inatomic(to, from, n);
+ pagefault_enable();
+
+ return n - ret;
+}
#endif

DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Peter Zijlstra

unread,
Oct 30, 2013, 2:50:02 PM10/30/13
to
Subject: perf: Fix arch_perf_out_copy_user default implementation

I noticed that the arch_perf_output_copy_user default of
__copy_from_user_inatomic has different return semantics that all other
memcpy functions we use -- notably also copy_from_user_nmi() for x86.

So provide a compatible default function.

Also change all the memcpy functions to use unsigned long.

Signed-off-by: Peter Zijlstra <pet...@infradead.org>
---
kernel/events/internal.h | 31 ++++++++++++++++++++++++-------
1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index ca6599723be5..325e85776406 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -82,14 +82,14 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb)
}

#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \
-static inline unsigned int \
+static inline unsigned long \
func_name(struct perf_output_handle *handle, \
- const void *buf, unsigned int len) \
+ const void *buf, unsigned long len) \
{ \
unsigned long size, written; \
\
do { \
- size = min_t(unsigned long, handle->size, len); \
+ size = min(handle->size, len); \
\
written = memcpy_func(handle->addr, buf, size); \
\
@@ -110,7 +110,8 @@ func_name(struct perf_output_handle *handle, \
return len; \
}

-static inline int memcpy_common(void *dst, const void *src, size_t n)
+static inline unsigned long
+memcpy_common(void *dst, const void *src, unsigned long n)
{
memcpy(dst, src, n);
return n;
@@ -118,12 +119,28 @@ static inline int memcpy_common(void *dst, const void *src, size_t n)

DEFINE_OUTPUT_COPY(__output_copy, memcpy_common)

-#define MEMCPY_SKIP(dst, src, n) (n)
+static inline unsigned long
+memcpy_skip(void *dst, const void *src, unsigned long n)
+{
+ return n;
+}

-DEFINE_OUTPUT_COPY(__output_skip, MEMCPY_SKIP)
+DEFINE_OUTPUT_COPY(__output_skip, memcpy_skip)

#ifndef arch_perf_out_copy_user
-#define arch_perf_out_copy_user __copy_from_user_inatomic
+#define arch_perf_out_copy_user arch_perf_out_copy_user
+
+static inline unsigned long
+arch_perf_out_copy_user(void *dst, const void *src, unsigned long n)
+{
+ unsigned long ret;
+
+ pagefault_disable();
+ ret = __copy_from_user_inatomic(dst, src, n);

Will Deacon

unread,
Oct 30, 2013, 3:40:03 PM10/30/13
to
On Wed, Oct 30, 2013 at 02:37:50PM +0000, Peter Zijlstra wrote:
> Hi Frederic,

Hi Peter,

> I just spotted:
>
> #ifndef arch_perf_out_copy_user
> #define arch_perf_out_copy_user __copy_from_user_inatomic
> #endif
>
> vs:
>
> arch/x86/include/asm/perf_event.h:#define arch_perf_out_copy_user copy_from_user_nmi
>
>
> Now the problem is that copy_from_user_nmi() and
> __copy_from_user_inatomic() have different return semantics.
>
> Furthermore, the macro you use them in DEFINE_OUTPUT_COPY() assumes the
> return value is the amount of memory copied; as also illustrated by
> memcpy_common().
>
> Trouble is, __copy_from_user_inatomic() returns the number of bytes
> _NOT_ copied.
>
> With this, my question to Will is, how did your ARM unwind support
> patches ever work? AFAICT they end up using the
> __copy_from_user_inatomic() thing.

Yeah, that's weird, because they *do* appear to work! In fact, looking at
the code in kernel/events/core.c, it looks like __output_copy_user is
expected to return the number of bytes not copied, so providing the
__copy_from_user_inatomic succeeds first time around, the DEFINE_OUTPUT_COPY
macro will return len (dump_size) and the perf_output_skip will deal with
the buffer pointers for us. The issue then is that dynamic size will be 0,
and the unwind code in perf will never be called (except I know that it *is*
being called).

I'll go dig further...

Will

Peter Zijlstra

unread,
Oct 30, 2013, 3:40:02 PM10/30/13
to
On Wed, Oct 30, 2013 at 07:29:27PM +0000, Will Deacon wrote:
> Yeah, that's weird, because they *do* appear to work! In fact, looking at
> the code in kernel/events/core.c, it looks like __output_copy_user is
> expected to return the number of bytes not copied,

Oh, cute, double fail.. head hurts.

Frederic Weisbecker

unread,
Oct 30, 2013, 4:00:02 PM10/30/13
to
On Wed, Oct 30, 2013 at 03:37:50PM +0100, Peter Zijlstra wrote:
> Hi Frederic,
>
> I just spotted:
>
> #ifndef arch_perf_out_copy_user
> #define arch_perf_out_copy_user __copy_from_user_inatomic
> #endif
>
> vs:
>
> arch/x86/include/asm/perf_event.h:#define arch_perf_out_copy_user copy_from_user_nmi
>
> Now the problem is that copy_from_user_nmi() and
> __copy_from_user_inatomic() have different return semantics.
>
> Furthermore, the macro you use them in DEFINE_OUTPUT_COPY() assumes the
> return value is the amount of memory copied; as also illustrated by
> memcpy_common().
>
> Trouble is, __copy_from_user_inatomic() returns the number of bytes
> _NOT_ copied.

Aie, sorry about that, I did a wrong assumption indeed.
Would it make sense to rather make copy_from_user_nmi() to use a return value
pattern that is closer to those of the existing copy_from_user_*() ?

This way we avoid future mistakes of that kind.

Thanks.

Peter Zijlstra

unread,
Oct 30, 2013, 4:20:01 PM10/30/13
to
On Wed, Oct 30, 2013 at 08:50:28PM +0100, Frederic Weisbecker wrote:
>
> Would it make sense to rather make copy_from_user_nmi() to use a return value
> pattern that is closer to those of the existing copy_from_user_*() ?

Yeah we can do that I suppose; copy_form_user_nmi() actually uses
__copy_from_user_inatomic() since about a week.

Something like so I suppose.. please check, I'm in fail mode.

It looks like DEFINE_OUTPUT_COPY() functions already returned the bytes
not copied, and all its users appear to indeed expect that.

---
arch/x86/kernel/cpu/perf_event.c | 4 ++--
arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +-
arch/x86/kernel/cpu/perf_event_intel_lbr.c | 2 +-
arch/x86/lib/usercopy.c | 2 +-
arch/x86/oprofile/backtrace.c | 4 ++--
kernel/events/internal.h | 35 ++++++++++++++++++++++--------
6 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 94bf3010bb39..0fa7dbe756e6 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1998,7 +1998,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
frame.return_address = 0;

bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
- if (bytes != sizeof(frame))
+ if (bytes != 0)
break;

if (!valid_user_frame(fp, sizeof(frame)))
@@ -2050,7 +2050,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
frame.return_address = 0;

bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
- if (bytes != sizeof(frame))
+ if (bytes != 0)
break;

if (!valid_user_frame(fp, sizeof(frame)))
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index c1760ff3c757..ae96cfa5eddd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -789,7 +789,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)

size = ip - to; /* Must fit our buffer, see above */
bytes = copy_from_user_nmi(buf, (void __user *)to, size);
- if (bytes != size)
+ if (bytes != 0)
return 0;

kaddr = buf;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 90ee6c1d0542..d82d155aca8c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -491,7 +491,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)

/* may fail if text not present */
bytes = copy_from_user_nmi(buf, (void __user *)from, size);
- if (bytes != size)
+ if (bytes != 0)
return X86_BR_NONE;

addr = buf;
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index 5465b8613944..ddf9ecb53cc3 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -31,6 +31,6 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
ret = __copy_from_user_inatomic(to, from, n);
pagefault_enable();

- return n - ret;
+ return ret;
}
EXPORT_SYMBOL_GPL(copy_from_user_nmi);
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index d6aa6e8315d1..5d04be5efb64 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -47,7 +47,7 @@ dump_user_backtrace_32(struct stack_frame_ia32 *head)
unsigned long bytes;

bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
- if (bytes != sizeof(bufhead))
+ if (bytes != 0)
return NULL;

fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
@@ -93,7 +93,7 @@ static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
unsigned long bytes;

bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
- if (bytes != sizeof(bufhead))
+ if (bytes != 0)
return NULL;

oprofile_add_trace(bufhead[0].return_address);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index ca6599723be5..569b218782ad 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -82,16 +82,16 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb)
}

#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \
-static inline unsigned int \
+static inline unsigned long \
func_name(struct perf_output_handle *handle, \
- const void *buf, unsigned int len) \
+ const void *buf, unsigned long len) \
{ \
unsigned long size, written; \
\
do { \
- size = min_t(unsigned long, handle->size, len); \
- \
+ size = min(handle->size, len); \
written = memcpy_func(handle->addr, buf, size); \
+ written = size - written; \
\
len -= written; \
handle->addr += written; \
@@ -110,20 +110,37 @@ func_name(struct perf_output_handle *handle, \
return len; \
}

-static inline int memcpy_common(void *dst, const void *src, size_t n)
+static inline unsigned long
+memcpy_common(void *dst, const void *src, unsigned long n)
{
memcpy(dst, src, n);
- return n;
+ return 0;
}

DEFINE_OUTPUT_COPY(__output_copy, memcpy_common)

-#define MEMCPY_SKIP(dst, src, n) (n)
+static inline unsigned long
+memcpy_skip(void *dst, const void *src, unsigned long n)
+{
+ return 0;
+}

-DEFINE_OUTPUT_COPY(__output_skip, MEMCPY_SKIP)
+DEFINE_OUTPUT_COPY(__output_skip, memcpy_skip)

#ifndef arch_perf_out_copy_user
-#define arch_perf_out_copy_user __copy_from_user_inatomic
+#define arch_perf_out_copy_user arch_perf_out_copy_user
+
+static inline unsigned long
+arch_perf_out_copy_user(void *dst, const void *src, unsigned long n)
+{
+ unsigned long ret;
+
+ pagefault_disable();
+ ret = __copy_from_user_inatomic(dst, src, n);
+ pagefault_enable();
+
+ return ret;

Frederic Weisbecker

unread,
Oct 30, 2013, 4:50:01 PM10/30/13
to
On Wed, Oct 30, 2013 at 09:16:22PM +0100, Peter Zijlstra wrote:
> On Wed, Oct 30, 2013 at 08:50:28PM +0100, Frederic Weisbecker wrote:
> >
> > Would it make sense to rather make copy_from_user_nmi() to use a return value
> > pattern that is closer to those of the existing copy_from_user_*() ?
>
> Yeah we can do that I suppose; copy_form_user_nmi() actually uses
> __copy_from_user_inatomic() since about a week.

Ah! Looks like we can even move copy_from_user_nmi() to asm-generic/uaccess.h then.
Its fallback implementation after this patch is the same than the x86 one anyway.

Hmm but there is the range_not_ok()...
I assume copy_from_user_inatomic() shouldn't return negative value. I'm a bit
confused by it's long return type but I haven't found a place where it returns
anything else that the bytes copied. I only checked a very few archs though.
Ok, that all looks good!

Thanks!

Will Deacon

unread,
Oct 30, 2013, 6:40:03 PM10/30/13
to
Hi Peter,

On Wed, Oct 30, 2013 at 08:16:22PM +0000, Peter Zijlstra wrote:
> On Wed, Oct 30, 2013 at 08:50:28PM +0100, Frederic Weisbecker wrote:
> >
> > Would it make sense to rather make copy_from_user_nmi() to use a return value
> > pattern that is closer to those of the existing copy_from_user_*() ?
>
> Yeah we can do that I suppose; copy_form_user_nmi() actually uses
> __copy_from_user_inatomic() since about a week.
>
> Something like so I suppose.. please check, I'm in fail mode.
>
> It looks like DEFINE_OUTPUT_COPY() functions already returned the bytes
> not copied, and all its users appear to indeed expect that.
>
> ---
> arch/x86/kernel/cpu/perf_event.c | 4 ++--
> arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +-
> arch/x86/kernel/cpu/perf_event_intel_lbr.c | 2 +-
> arch/x86/lib/usercopy.c | 2 +-
> arch/x86/oprofile/backtrace.c | 4 ++--
> kernel/events/internal.h | 35 ++++++++++++++++++++++--------
> 6 files changed, 33 insertions(+), 16 deletions(-)

The core changes look good to me, and I've tested that this still produces
callchain information for ARM. It's odd that this worked so well before...

Acked-by: Will Deacon <will....@arm.com>

Will

tip-bot for Peter Zijlstra

unread,
Nov 6, 2013, 8:30:04 AM11/6/13
to
Commit-ID: 0a196848ca365ec582c6d86659be456be6d4ed96
Gitweb: http://git.kernel.org/tip/0a196848ca365ec582c6d86659be456be6d4ed96
Author: Peter Zijlstra <pet...@infradead.org>
AuthorDate: Wed, 30 Oct 2013 21:16:22 +0100
Committer: Ingo Molnar <mi...@kernel.org>
CommitDate: Wed, 6 Nov 2013 12:34:25 +0100

perf: Fix arch_perf_out_copy_user default

The arch_perf_output_copy_user() default of
__copy_from_user_inatomic() returns bytes not copied, while all other
argument functions given DEFINE_OUTPUT_COPY() return bytes copied.

Since copy_from_user_nmi() is the odd duck out by returning bytes
copied where all other *copy_{to,from}* functions return bytes not
copied, change it over and ammend DEFINE_OUTPUT_COPY() to expect bytes
not copied.

Oddly enough DEFINE_OUTPUT_COPY() already returned bytes not copied
while expecting its worker functions to return bytes copied.

Signed-off-by: Peter Zijlstra <pet...@infradead.org>
Acked-by: will....@arm.com
Cc: Frederic Weisbecker <fwei...@gmail.com>
Link: http://lkml.kernel.org/r/20131030201...@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
arch/x86/kernel/cpu/perf_event.c | 4 ++--
arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +-
arch/x86/kernel/cpu/perf_event_intel_lbr.c | 2 +-
arch/x86/lib/usercopy.c | 2 +-
arch/x86/oprofile/backtrace.c | 4 ++--
kernel/events/internal.h | 35 ++++++++++++++++++++++--------
6 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8a87a32..8e13293 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1989,7 +1989,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
frame.return_address = 0;

bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
- if (bytes != sizeof(frame))
+ if (bytes != 0)
break;

if (!valid_user_frame(fp, sizeof(frame)))
@@ -2041,7 +2041,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
frame.return_address = 0;

bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
- if (bytes != sizeof(frame))
+ if (bytes != 0)
break;

if (!valid_user_frame(fp, sizeof(frame)))
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index c1760ff..ae96cfa 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -789,7 +789,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)

size = ip - to; /* Must fit our buffer, see above */
bytes = copy_from_user_nmi(buf, (void __user *)to, size);
- if (bytes != size)
+ if (bytes != 0)
return 0;

kaddr = buf;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 90ee6c1..d82d155 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -491,7 +491,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)

/* may fail if text not present */
bytes = copy_from_user_nmi(buf, (void __user *)from, size);
- if (bytes != size)
+ if (bytes != 0)
return X86_BR_NONE;

addr = buf;
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index 5465b86..ddf9ecb 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -31,6 +31,6 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
ret = __copy_from_user_inatomic(to, from, n);
pagefault_enable();

- return n - ret;
+ return ret;
}
EXPORT_SYMBOL_GPL(copy_from_user_nmi);
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index d6aa6e8..5d04be5 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -47,7 +47,7 @@ dump_user_backtrace_32(struct stack_frame_ia32 *head)
unsigned long bytes;

bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
- if (bytes != sizeof(bufhead))
+ if (bytes != 0)
return NULL;

fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
@@ -93,7 +93,7 @@ static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
unsigned long bytes;

bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
- if (bytes != sizeof(bufhead))
+ if (bytes != 0)
return NULL;

oprofile_add_trace(bufhead[0].return_address);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index ca65997..569b2187 100644
0 new messages