Account Options

  1. Sign in
The old Google Groups will be going away soon, but your browser is incompatible with the new version.
Google Groups Home
« Groups Home
x86, perf: adds support for the LWP threshold-int
There are currently too many topics in this group that display first. To make this topic appear first, remove this option from another topic.
There was an error processing your request. Please try again.
flag
  Messages 26 - 50 of 54 - Collapse all  -  Translate all to Translated (View all originals) < Older  Newer >
The group you are posting to is a Usenet group. Messages posted to this group will make your email address visible to anyone on the Internet.
Your reply message has not been sent.
Your post was successful
 
From:
To:
Cc:
Followup To:
Add Cc | Add Followup-to | Edit Subject
Subject:
Validation:
For verification purposes please type the characters you see in the picture below or the numbers you hear by clicking the accessibility icon. Listen and type the numbers you hear
 
Hans Rosenfeld  
View profile  
 More options Dec 16 2011, 11:20 am
Newsgroups: linux.kernel
From: Hans Rosenfeld <hans.rosenf...@amd.com>
Date: Fri, 16 Dec 2011 17:20:01 +0100
Local: Fri, Dec 16 2011 11:20 am
Subject: [RFC 5/5] x86, perf: adds support for the LWP threshold-int
From: Benjamin Block <benjamin.bl...@amd.com>

This patch adds support for the LWP threshold-interrupt into the
LWP-integration into perf. For each LWP-event that is written into the
buffer a interrupt is generated and a overflow is reported to perf. If
requested, the LWP-event is also reported as raw-event.

The perf-sample_rate is used as interval for the corresponding
LWP-event. The current implementation restricts the sample_rate to be
between 0xF and 0x1FFFFFF, because we couldn't report raw-LWP-event for
each overflow if the sample_rate would be bigger (period-calculation
could cause a overflow although there was no interrupt).

The interrupt is currently only available to the kernel and not to
userland-software that wants to use LWP without the in-kernel
implementation.

Signed-off-by: Benjamin Block <benjamin.bl...@amd.com>
Signed-off-by: Hans Rosenfeld <hans.rosenf...@amd.com>
---
 arch/x86/include/asm/irq_vectors.h       |    8 +-
 arch/x86/kernel/cpu/Makefile             |    4 +-
 arch/x86/kernel/cpu/perf_event_amd_lwp.c |  318 +++++++++++++++++++++++-------
 arch/x86/kernel/entry_64.S               |    2 +
 4 files changed, 253 insertions(+), 79 deletions(-)

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 7e50f06..c5447f5 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -119,6 +119,12 @@
  */
 #define LOCAL_TIMER_VECTOR             0xef

+/*
+ * Vector-Nr. used by the threshold-interrupt.
+ * Has to be initialized before it is written to MSR_AMD64_LWP_CFG.
+ */
+#define LWP_THRESHOLD_VECTOR           0xee
+
 /* up to 32 vectors used for spreading out TLB flushes: */
 #if NR_CPUS <= 32
 # define NUM_INVALIDATE_TLB_VECTORS    (NR_CPUS)
@@ -126,7 +132,7 @@
 # define NUM_INVALIDATE_TLB_VECTORS    (32)
 #endif

-#define INVALIDATE_TLB_VECTOR_END      (0xee)
+#define INVALIDATE_TLB_VECTOR_END      (0xed)
 #define INVALIDATE_TLB_VECTOR_START    \
        (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1)

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 9973465..6d87bac 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -20,7 +20,7 @@ obj-$(CONFIG_X86_32)  += bugs.o
 obj-$(CONFIG_X86_64)   += bugs_64.o

 obj-$(CONFIG_CPU_SUP_INTEL)            += intel.o
-obj-$(CONFIG_CPU_SUP_AMD)              += amd.o perf_event_amd_lwp.o
+obj-$(CONFIG_CPU_SUP_AMD)              += amd.o
 obj-$(CONFIG_CPU_SUP_CYRIX_32)         += cyrix.o
 obj-$(CONFIG_CPU_SUP_CENTAUR)          += centaur.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32)     += transmeta.o
@@ -31,7 +31,7 @@ obj-$(CONFIG_PERF_EVENTS)             += perf_event.o
 obj-$(CONFIG_X86_MCE)                  += mcheck/
 obj-$(CONFIG_MTRR)                     += mtrr/

-obj-$(CONFIG_X86_LOCAL_APIC)           += perfctr-watchdog.o
+obj-$(CONFIG_X86_LOCAL_APIC)           += perfctr-watchdog.o perf_event_amd_lwp.o

 quiet_cmd_mkcapflags = MKCAP   $@
       cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/arch/x86/kernel/cpu/perf_event_amd_lwp.c b/arch/x86/kernel/cpu/perf_event_amd_lwp.c
index afc6c8d..205245d 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_lwp.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_lwp.c
@@ -10,6 +10,9 @@
 #include <linux/highmem.h>
 #include <linux/bitops.h>

+#include <asm/idle.h>
+#include <asm/desc.h>
+#include <asm/irq_vectors.h>
 #include <asm/xsave.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
@@ -250,6 +253,7 @@ struct lwp_struct {

        /* Cached events that have been read from buffer */
        u64                             *event_counter;
+       struct perf_event               **registered_events;
        /*
         * Cached xsave-values, to prevent lose of already counted but not
         * submitted events.
@@ -270,6 +274,8 @@ static inline int vector_test(unsigned int bit_nr, u32 vector)
 static struct lwp_capabilities lwp_caps;
 static struct pmu              perf_lwp_pmu;

+static DEFINE_PER_CPU(struct lwp_struct *, active_lwp_struct) = 0;
+
 static u16 get_filter_mask_for(u32 eventnr)
 {
        /*
@@ -735,6 +741,16 @@ static struct lwp_struct *lwpcb_new(void)
        }
        memset(l->event_counter, 0, l->eventmax * sizeof(*l->event_counter));

+       l->registered_events =
+               kmalloc(l->eventmax * sizeof(*l->registered_events),
+                       GFP_ATOMIC);
+       if(!l->registered_events) {
+               err = -ENOENT;
+               goto err_event_counter_alloc;
+       }
+       memset(l->registered_events, 0,
+                       l->eventmax * sizeof(*l->registered_events));
+
        l->userspace.mm = get_task_mm(current);

        err = get_userspace_mapping(&l->userspace.lwpcb, l->userspace.mm,
@@ -747,8 +763,11 @@ static struct lwp_struct *lwpcb_new(void)
        if (err)
                goto err_ulwpcb;

-       /* modified on event-start */
-       l->lwpcb.head->flags = 0;
+       /*
+        * Activate only the threshold interrupt,
+        * all other events are activated on pmu-start() off the specific event
+        */
+       l->lwpcb.head->flags = (1U << LWP_CAPS_THRESHOLD);
        l->lwpcb.head->buffer_size = l->buffer.size;
        l->lwpcb.head->buffer_base = (u64) l->userspace.buffer.addr;
        /* currently not supported by this pmu */
@@ -779,6 +798,8 @@ err_ulwpcb:
 err_mm:
        mmput(l->userspace.mm);

+       kfree(l->registered_events);
+err_event_counter_alloc:
        kfree(l->event_counter);
 err_lwpcbbuffer_alloc:
        kfree(l->buffer.buffer_base);
@@ -809,6 +830,7 @@ static void lwpcb_destory(struct kref *kref)
        free_userspace_mapping(&l->userspace.buffer, l->userspace.mm);
        mmput(l->userspace.mm);

+       kfree(l->registered_events);
        kfree(l->event_counter);
        kfree(l->buffer.buffer_base);
        kfree(l->lwpcb.lwpcb_base);
@@ -840,57 +862,46 @@ static void lwpcb_remove_event(struct lwp_struct *lwps, u32 eventnr)
        lwps->lwpcb.events[eventnr-1].counter = 0;
 }

-static int lwpcb_read_buffer(struct lwp_struct *l)
+static int
+lwpcb_update_period(struct lwp_struct *lwps, struct perf_event *event,
+                   u64 period, u64 new_period)
 {
-       u32 bho, bto, bz;
-       int count, i;
-       char *buffer = l->buffer.buffer_base;
-       struct lwp_event *event;
-
-       bz = l->lwpcb.head->buffer_size;
-
-       bto = l->lwpcb.head->buffer_tail_offset;
-       buffer += bto;
-
-       /*
-        * the last two checks are to prevent user-manipulations that could
-        * cause damage
-        */
-       if (lwp_read_head_offset(l, &bho) || (bho > bz) || (bho % l->eventsize))
-               BUG();
-
-       count = (((bho - bto) % bz) / l->eventsize);
-       if(count <= 0)
-               return 0;
-
-       /* todo read only needed chunks */
-       if (userread_buffer(l, bto, bho))
-               BUG();
+       struct hw_perf_event *hwc = &event->hw;
+       u32 event_idx = lwp_config_event_get(event->attr.config) - 1;
+       u64 sample_period = hwc->sample_period;
+       u64 last_period = period;
+       u64 left = local64_read(&hwc->period_left);
+       s64 sleft;
+       int overflow = 0;

-       for (i = 0; i < count; i++) {
-               event = (struct lwp_event *) (buffer + bto);
+       hwc->last_period = last_period;
+       sleft = (new_period - sample_period);

-               /*
-                * The opposite COULD be a programmed lwp-event (id=255), but we
-                * ignore them for now.
-                */
-               if ((event->event_id > LWP_EVENT_INVALID) ||
-                               (event->event_id < LWP_EVENT_MAX)) {
-                       l->event_counter[event->event_id - 1] +=
-                               l->lwpcb.events[event->event_id - 1].interval;
-               }
-
-               bto += l->eventsize;
-               if (bto >= bz)
-                       bto = 0;
+       /* lets test if the change was already enough to trigger a overflow */
+       if (left < -sleft) {
+               overflow = 1;
+               left = new_period + (left + sleft);
+       }
+       else {
+               left += sleft;
        }

-       l->lwpcb.head->buffer_tail_offset = bto;
+       if (left <= last_period) {
+               overflow = 1;
+               left = new_period + (left - last_period);
+               local64_set(&hwc->period_left, left);
+       } else {
+               left -= last_period;
+               local64_set(&hwc->period_left, left);
+       }

-       if (userwrite_buffer_tail_offset(l))
-               BUG();
+       /*
+        * if new_period != hwc->sample_period, then this change
+        * has also to be promoted to lwp via userwrite_lwpcb
+        */
+       lwps->lwpcb.events[event_idx].interval = new_period;

-       return 0;
+       return overflow;
 }

 static void perf_lwp_event_destroy(struct perf_event *event)
@@ -907,6 +918,9 @@ static void perf_lwp_event_destroy(struct perf_event *event)

        raw_spin_lock_irqsave(&l->lock, flags);

+       if(l->registered_events[eventnr-1] != event)
+               goto not_registered;
+
        if (lwp_stop(l))
                BUG();

@@ -917,10 +931,12 @@ static void perf_lwp_event_destroy(struct perf_event *event)

        l->event_counter[eventnr-1] = 0;
        l->xstate_counter[eventnr-1] = 0;
+       l->registered_events[eventnr-1] = 0;

        if ((l->lwpcb.head->flags & LWP_EVENT_MASK) && lwp_start(l, 1))
                BUG();

+not_registered:
        raw_spin_unlock_irqrestore(&l->lock, flags);

        /* for future with cross-lwp-creation this needs to be locked */
@@ -1009,7 +1025,6 @@ perf_lwp_event_init_for(struct perf_event *event, int cpu,
                 * maybe we would better introduce a lwp-field in the
                 * event-context to prevent two events racing this
                 */
-
                rcu_read_unlock();

                lwpcb = lwpcb_new();
@@ -1029,7 +1044,7 @@ perf_lwp_event_init_for(struct perf_event *event, int cpu,

        raw_spin_lock_irqsave(&lwpcb->lock, flags);

-       if (lwpcb->lwpcb.events[eventnr-1].interval) {
+       if (lwpcb->registered_events[eventnr-1]) {
                err = -EINVAL;
                goto err_add_failed;
        }
@@ -1045,6 +1060,7 @@ perf_lwp_event_init_for(struct perf_event *event, int cpu,

        lwpcb->event_counter[eventnr-1] = 0;
        lwpcb->xstate_counter[eventnr-1] = 0;
+       lwpcb->registered_events[eventnr-1] = event;

        event->destroy = perf_lwp_event_destroy;

@@ -1073,25 +1089,15 @@ static void perf_lwp_start(struct perf_event *event, int flags)
        struct lwp_struct *l = (struct lwp_struct *) event->hw.config;
        u32 eventnr = lwp_config_event_get(event->attr.config);
        u32 lwpflags;
+       int overflow;
        unsigned long lockflags = 0;

-       /* update cached values, before updating freq */
-       raw_spin_lock_irqsave(&l->lock, lockflags);
-       lwpcb_read_buffer(l);
-       raw_spin_unlock_irqrestore(&l->lock, lockflags);
-
-       lockflags = 0;
...

read more »


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Discussion subject changed to "x86, perf: implements lwp-perf-integration (rc1)" by Ingo Molnar
Ingo Molnar  
View profile  
 More options Dec 18 2011, 3:10 am
Newsgroups: linux.kernel
From: Ingo Molnar <mi...@elte.hu>
Date: Sun, 18 Dec 2011 09:10:01 +0100
Local: Sun, Dec 18 2011 3:10 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

* Hans Rosenfeld <hans.rosenf...@amd.com> wrote:

> From: Benjamin Block <benjamin.bl...@amd.com>

> Implements a basic integration of LWP into perf. Permits a way
> to create a perf-event that will be backed by LWP. The PMU
> creates the required structures and userspace-memories. The
> PMU also collects the samples from the ring-buffer, but as
> there is currently no interrupt- and overflow-implementation,
> they are not reported (TODO).

Ok, this is a step in the right direction - once the threshold
IRQ flow control mechanism is implemented we are looking at
something that might be mergeable. Any ETA on those bits?

Thanks,

        Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Benjamin Block  
View profile  
 More options Dec 18 2011, 10:30 am
Newsgroups: linux.kernel
From: Benjamin Block <b...@mageta.org>
Date: Sun, 18 Dec 2011 16:30:03 +0100
Local: Sun, Dec 18 2011 10:30 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

The threshold-interrupt is already integrated with patch 5 of this
patch-set.

Maybe I wrote the descriptions a little misleading. Sry for that. :)

best regards,
- Benjamin
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Ingo Molnar  
View profile  
 More options Dec 18 2011, 6:50 pm
Newsgroups: linux.kernel
From: Ingo Molnar <mi...@elte.hu>
Date: Mon, 19 Dec 2011 00:50:02 +0100
Local: Sun, Dec 18 2011 6:50 pm
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

* Benjamin Block <b...@mageta.org> wrote:

> The threshold-interrupt is already integrated with patch 5 of
> this patch-set.

> Maybe I wrote the descriptions a little misleading. Sry for
> that. :)

Okay, i stopped reading at the first patch that claimed that the
threshold irq was not supported ;-)

So the question becomes, how well is it integrated: can
perf 'record -a + perf report', or 'perf top' use LWP,
to do system-wide precise [user-space] profiling and such?

Thanks,

        Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Robert Richter  
View profile  
 More options Dec 19 2011, 4:10 am
Newsgroups: linux.kernel
From: Robert Richter <robert.rich...@amd.com>
Date: Mon, 19 Dec 2011 10:10:01 +0100
Local: Mon, Dec 19 2011 4:10 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)
On 19.12.11 00:43:10, Ingo Molnar wrote:

> So the question becomes, how well is it integrated: can
> perf 'record -a + perf report', or 'perf top' use LWP,
> to do system-wide precise [user-space] profiling and such?

There is only self-monitoring of a process possible, no kernel and
system-wide profiling. This is because we can not allocate memory
regions in the kernel for a thread other than the current. This would
require a complete rework of mm code.

-Robert

--
Advanced Micro Devices, Inc.
Operating System Research Center

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Ingo Molnar  
View profile  
 More options Dec 19 2011, 6:00 am
Newsgroups: linux.kernel
From: Ingo Molnar <mi...@elte.hu>
Date: Mon, 19 Dec 2011 12:00:01 +0100
Local: Mon, Dec 19 2011 6:00 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

* Robert Richter <robert.rich...@amd.com> wrote:

> On 19.12.11 00:43:10, Ingo Molnar wrote:

> > So the question becomes, how well is it integrated: can perf
> > 'record -a + perf report', or 'perf top' use LWP, to do
> > system-wide precise [user-space] profiling and such?

> There is only self-monitoring of a process possible, no kernel
> and system-wide profiling. This is because we can not allocate
> memory regions in the kernel for a thread other than the
> current. This would require a complete rework of mm code.

Hm, i don't think a rework is needed: check the
vmalloc_to_page() code in kernel/events/ring_buffer.c. Right now
CONFIG_PERF_USE_VMALLOC is an ARM, MIPS, SH and Sparc specific
feature, on x86 it turns on if CONFIG_DEBUG_PERF_USE_VMALLOC=y.

That should be good enough for prototyping the kernel/user
shared buffering approach.

Thanks,

        Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Avi Kivity  
View profile  
 More options Dec 19 2011, 6:20 am
Newsgroups: linux.kernel
From: Avi Kivity <a...@redhat.com>
Date: Mon, 19 Dec 2011 12:20:02 +0100
Local: Mon, Dec 19 2011 6:20 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)
On 12/19/2011 12:54 PM, Ingo Molnar wrote:

LWP wants user memory, vmalloc is insufficient.  You need do_mmap() with
a different mm.

You could let a workqueue call use_mm() and then do_mmap().  Even then
it is subject to disruption by the monitored thread (and may disrupt the
monitored thread by playing with its address space).  This is for thread
monitoring only, I don't think system-wide monitoring is possible with LWP.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Ingo Molnar  
View profile  
 More options Dec 19 2011, 6:50 am
Newsgroups: linux.kernel
From: Ingo Molnar <mi...@elte.hu>
Date: Mon, 19 Dec 2011 12:50:02 +0100
Local: Mon, Dec 19 2011 6:50 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

* Avi Kivity <a...@redhat.com> wrote:

Take a look at PERF_USE_VMALLOC, it allows in-kernel allocated
memory to be mmap()ed to user-space. It is basically a
shared/dual user/kernel mode vmalloc implementation.

So all the conceptual pieces are there.

> You could let a workqueue call use_mm() and then do_mmap().  
> Even then it is subject to disruption by the monitored thread
> (and may disrupt the monitored thread by playing with its
> address space). [...]

Injecting this into another thread's context is indeed advanced
stuff:

> [...] This is for thread monitoring only, I don't think
> system-wide monitoring is possible with LWP.

That should be possible too, via two methods:

1) the easy hack: a (per cpu) vmalloc()ed buffer is made ring 3
   accessible (by clearing the system bit in the ptes) - and
   thus accessible to all user-space.

   This is obviously globally writable/readable memory so only a
   debugging/prototyping hack - but would be a great first step
   to prove the concept and see some nice perf top and perf
   record results ...

2) the proper solution: creating a 'user-space vmalloc()' that
   is per mm and that gets inherited transparently, across
   fork() and exec(), and which lies outside the regular vma
   spaces. On 64-bit this should be straightforward.

   These vmas are not actually 'known' to user-space normally -
   the kernel PMU code knows about it and does what we do with
   PEBS: flushes it when necessary and puts it into the
   regular perf event channels.

   This solves the inherited perf record workflow immediately:
   the parent task just creates the buffer, which gets inherited
   across exec() and fork(), into every portion of the workload.

   System-wide profiling is a small additional variant of this:
   creating such a user-vmalloc() area for all tasks in the
   system so that the PMU code has them ready in the
   context-switch code.

Solution #2 has the additional advantage that we could migrate
PEBS to it and could allow interested user-space access to the
'raw' PEBS buffer as well. (currently the PEBS buffer is only
visible to kernel-space.)

I'd suggest the easy hack first, to get things going - we can
then help out with the proper solution.

Thanks,

        Ingo

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Avi Kivity  
View profile  
 More options Dec 19 2011, 7:00 am
Newsgroups: linux.kernel
From: Avi Kivity <a...@redhat.com>
Date: Mon, 19 Dec 2011 13:00:03 +0100
Local: Mon, Dec 19 2011 7:00 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)
On 12/19/2011 01:40 PM, Ingo Molnar wrote:

> 2) the proper solution: creating a 'user-space vmalloc()' that
>    is per mm and that gets inherited transparently, across
>    fork() and exec(), and which lies outside the regular vma
>    spaces. On 64-bit this should be straightforward.

That probably has uses outside perf too, but I can see mm nacks piling up.

>    These vmas are not actually 'known' to user-space normally -
>    the kernel PMU code knows about it and does what we do with
>    PEBS: flushes it when necessary and puts it into the
>    regular perf event channels.

>    This solves the inherited perf record workflow immediately:
>    the parent task just creates the buffer, which gets inherited
>    across exec() and fork(), into every portion of the workload.

The buffer still needs to be managed.  While you may be able to juggle
different threads on the same cpu using different events, threads on
other cpus need to use separate LWP contexts and buffers.

>    System-wide profiling is a small additional variant of this:
>    creating such a user-vmalloc() area for all tasks in the
>    system so that the PMU code has them ready in the
>    context-switch code.

What about security?  Do we want to allow any userspace process to mess
up the buffers?  It can even reprogram the LWP block, so you're counting
different things, or at higher frequencies, or into other processes
ordinary vmas?

You could rebuild the LWP block on every context switch I guess, but you
need to prevent access to other cpus' LWP blocks (since they may be
running other processes).  I think this calls for per-cpu cr3, even for
threads in the same process.

> Solution #2 has the additional advantage that we could migrate
> PEBS to it and could allow interested user-space access to the
> 'raw' PEBS buffer as well. (currently the PEBS buffer is only
> visible to kernel-space.)

That's probably useful for jits.

> I'd suggest the easy hack first, to get things going - we can
> then help out with the proper solution.

I think you're underestimating the complexity there.  LWP wasn't
designed for this.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Benjamin  
View profile  
 More options Dec 19 2011, 1:40 pm
Newsgroups: linux.kernel
From: Benjamin <b...@mageta.org>
Date: Mon, 19 Dec 2011 19:40:02 +0100
Local: Mon, Dec 19 2011 1:40 pm
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)
Am 19.12.2011 12:58, schrieb Avi Kivity:

>> I'd suggest the easy hack first, to get things going - we can
>> then help out with the proper solution.
> I think you're underestimating the complexity there.  LWP wasn't
> designed for this.

LWP is highly limited in its ability's to support more than one
"LWP-Instance" being active for a thread, IOW it is not possible.
You can't activate LWP from a threads context and simultaneously
activate lwp-system-wide-profiling in the way you suggested it,
Ingo. Either do the first xor do the last, because you only have
one xsave-area/msr/lwpcb that is read by the hardware and only one
LWP-Buffer that is written by the hw.

So, if one thread is running LWP, because he wants to
(selfmonitoring and stuff [like for what lwp was designed]) and a
su or u would activate this system-wide-monitoring, both would
frequently interfere with the each other. I don't think you want
this to be possible at all.

Frankly, it was already a pain to get LWP running from in-kernel,
like it is done now. I would expect a much higher pain, if you
would want to do this with a transparent buffer, that gets passed
around each scheduling (and this would permanently eliminate the
"lightweight" in "LWP").

best regards,
- Benjamin
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Ingo Molnar  
View profile  
 More options Dec 20 2011, 4:00 am
Newsgroups: linux.kernel
From: Ingo Molnar <mi...@elte.hu>
Date: Tue, 20 Dec 2011 10:00:02 +0100
Local: Tues, Dec 20 2011 4:00 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

* Benjamin <b...@mageta.org> wrote:
> LWP is highly limited in its ability's to support more than
> one "LWP-Instance" being active for a thread, IOW it is not
> possible.

That's OK, we can deal with various PMU constraints just fine.

> You can't activate LWP from a threads context and
> simultaneously activate lwp-system-wide-profiling in the way
> you suggested it, Ingo. Either do the first xor do the last,

We have other PMU resources that are exclusive in that sense.

> because you only have one xsave-area/msr/lwpcb that is read by
> the hardware and only one LWP-Buffer that is written by the
> hw.

That's similar to PEBS (which we already support), there's only
one Debug Store per CPU, obviously.

> So, if one thread is running LWP, because he wants to
> (selfmonitoring and stuff [like for what lwp was designed])
> and a su or u would activate this system-wide-monitoring, both
> would frequently interfere with the each other. I don't think
> you want this to be possible at all.

THe LWPCB is designed to allow multiple events, and the LWP
ring-buffer is shared between these events.

If the kernel properly manages the lwpcb then no such
'interference' happens during normal use - both outside and
self-installed events can be activated at once, up to the event
limit - similar to how we handle regular PMU events.

[ This is why the threshold IRQ support i requested is key: it
  is needed for the flow of events and for the kernel
  event-demultiplexer to work transparently. ]

> Frankly, it was already a pain to get LWP running from
> in-kernel, like it is done now. I would expect a much higher
> pain, if you would want to do this with a transparent buffer,
> that gets passed around each scheduling (and this would
> permanently eliminate the "lightweight" in "LWP").

There's no heavyweight 'passing around' of a buffer needed at
context switch time. The buffer context has to be flipped - part
of the job of context switching.

So no, i don't think any of your objections have any merit.

Thanks,

        Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Ingo Molnar  
View profile  
 More options Dec 20 2011, 4:20 am
Newsgroups: linux.kernel
From: Ingo Molnar <mi...@elte.hu>
Date: Tue, 20 Dec 2011 10:20:03 +0100
Local: Tues, Dec 20 2011 4:20 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)
* Avi Kivity <a...@redhat.com> wrote:

> On 12/19/2011 01:40 PM, Ingo Molnar wrote:

> > 2) the proper solution: creating a 'user-space vmalloc()' that
> >    is per mm and that gets inherited transparently, across
> >    fork() and exec(), and which lies outside the regular vma
> >    spaces. On 64-bit this should be straightforward.

> That probably has uses outside perf too, but I can see mm nacks piling up.

This can be done in arch/x86/ code if it's too x86 specific -
the platform controls the VM layout and can (and does) use
special per CPU VM areas.

> >    These vmas are not actually 'known' to user-space
> >    normally - the kernel PMU code knows about it and does
> >    what we do with PEBS: flushes it when necessary and puts
> >    it into the regular perf event channels.

> >    This solves the inherited perf record workflow
> >    immediately: the parent task just creates the buffer,
> >    which gets inherited across exec() and fork(), into every
> >    portion of the workload.

> The buffer still needs to be managed. [...]

Of course, like we manage the DS buffer for PEBS.

> [...]  While you may be able to juggle different threads on
> the same cpu using different events, threads on other cpus
> need to use separate LWP contexts and buffers.

Yes, like different threads on different CPUs have different DS
buffers, *here and today*.

Try this on (most) modern Intel CPUs:

  perf top -e cycles:pp

That will activate that exact mechanism.

The LWPCB and the LWP ring-buffer are really just an extension
of that concept: per task buffers which are ring 3 visible.

Note that user-space does not actually have to know about any of
these LWP addresses (but can access them if it wants to - no
strong feelings about that) - in the correctly implemented model
it's fully kernel managed.

In fact the PEBS case had one more complication: there's the BTS
branch-tracing feature which we support as well, and which
overlaps PEBS use of the DS.

All these PMU hardware limitations can be supported, as long as
the instrumentation *capability* adds value to the system in one
way or another.

> >    System-wide profiling is a small additional variant of
> >    this: creating such a user-vmalloc() area for all tasks
> >    in the system so that the PMU code has them ready in the
> >    context-switch code.

> What about security?  Do we want to allow any userspace
> process to mess up the buffers?  It can even reprogram the LWP
> block, so you're counting different things, or at higher
> frequencies, or into other processes ordinary vmas?

In most usecases it's the application messing up its own
profiling - don't do that if it hurts.

I'd argue that future LWP versions should allow kernel-protected
LWP pages, as long as the LWPCB is privileged as well as well.
That would be useful for another purpose as well: LWP could be
allowed to sample kernel-space execution as well, an obviously
useful feature that was left out from LWP for barely explicable
reasons.

Granted, LWP was mis-designed to quite a degree, those AMD chip
engineers should have talked to people who understand how modern
PMU abstractions are added to the OS kernel properly. But this
mis-design does not keep us from utilizing this piece of
hardware intelligently. PEBS/DS/BTS wasnt a beauty either.

> You could rebuild the LWP block on every context switch I
> guess, but you need to prevent access to other cpus' LWP
> blocks (since they may be running other processes).  I think
> this calls for per-cpu cr3, even for threads in the same
> process.

Why would we want to rebuild the LWPCB? Just keep one per task
and do a lightweight switch to it during switch_to() - like we
do it with the PEBS hardware-ring-buffer. It can be in the same
single block of memory with the ring-buffer itself. (PEBS has
similar characteristics)

Thanks,

        Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Avi Kivity  
View profile  
 More options Dec 20 2011, 4:50 am
Newsgroups: linux.kernel
From: Avi Kivity <a...@redhat.com>
Date: Tue, 20 Dec 2011 10:50:02 +0100
Local: Tues, Dec 20 2011 4:50 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)
On 12/20/2011 11:15 AM, Ingo Molnar wrote:

> The LWPCB and the LWP ring-buffer are really just an extension
> of that concept: per task buffers which are ring 3 visible.

No, it's worse.  They are ring 3 writeable, and ring 3 configurable.

> Note that user-space does not actually have to know about any of
> these LWP addresses (but can access them if it wants to - no
> strong feelings about that) - in the correctly implemented model
> it's fully kernel managed.

btw, that means that the intended use case - self-monitoring with no
kernel support - cannot be done.  That's not an issue per se, it depends
on the cost of the kernel support and whether any information is lost
(like the records inserted by the explicit LWP instructions).

> In fact the PEBS case had one more complication: there's the BTS
> branch-tracing feature which we support as well, and which
> overlaps PEBS use of the DS.

(semi-related: both DS and LWP cannot be used by kvm to monitor a guest
from the host, since they both use virtual addresses)

Not in the system profiling case (not that anything truly bad will
happen, but it's not nice to have the kernel supplying data it can't trust).

> I'd argue that future LWP versions should allow kernel-protected
> LWP pages, as long as the LWPCB is privileged as well as well.
> That would be useful for another purpose as well: LWP could be
> allowed to sample kernel-space execution as well, an obviously
> useful feature that was left out from LWP for barely explicable
> reasons.

> Granted, LWP was mis-designed to quite a degree, those AMD chip
> engineers should have talked to people who understand how modern
> PMU abstractions are added to the OS kernel properly. But this
> mis-design does not keep us from utilizing this piece of
> hardware intelligently. PEBS/DS/BTS wasnt a beauty either.

LWP was clearly designed for userspace jits, and clearly designed to
work with minimal kernel support.  For this use case, it wasn't
mis-designed.  Maybe they designed for the wrong requirements and
constraints (for example, it is much harder to get PMU abstractions into
Windows than into Linux), but within those requirements, it appears to
be well done.

I'm worried that shoe-horning LWP into the system profiling role will
result in poor support for that role, *and* prevent its use in the
intended use case.

> > You could rebuild the LWP block on every context switch I
> > guess, but you need to prevent access to other cpus' LWP
> > blocks (since they may be running other processes).  I think
> > this calls for per-cpu cr3, even for threads in the same
> > process.

> Why would we want to rebuild the LWPCB? Just keep one per task
> and do a lightweight switch to it during switch_to() - like we
> do it with the PEBS hardware-ring-buffer. It can be in the same
> single block of memory with the ring-buffer itself. (PEBS has
> similar characteristics)

If it's in globally visible memory, the user can reprogram the LWP from
another thread to thrash ordinary VMAs.  It has to be process local (at
which point, you can just use do_mmap() to allocate it).

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Ingo Molnar  
View profile  
 More options Dec 20 2011, 5:20 am
Newsgroups: linux.kernel
From: Ingo Molnar <mi...@elte.hu>
Date: Tue, 20 Dec 2011 11:20:02 +0100
Local: Tues, Dec 20 2011 5:20 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

* Avi Kivity <a...@redhat.com> wrote:

> On 12/20/2011 11:15 AM, Ingo Molnar wrote:

> > The LWPCB and the LWP ring-buffer are really just an
> > extension of that concept: per task buffers which are ring 3
> > visible.

> No, it's worse.  They are ring 3 writeable, and ring 3
> configurable.

Avi, i know that very well.

> > Note that user-space does not actually have to know about
> > any of these LWP addresses (but can access them if it wants
> > to - no strong feelings about that) - in the correctly
> > implemented model it's fully kernel managed.

> btw, that means that the intended use case - self-monitoring
> with no kernel support - cannot be done. [...]

Arguably many years ago the hardware was designed for brain-dead
instrumentation abstractions.

Note that as i said user-space *can* acccess the area if it
thinks it can do it better than the kernel (and we could export
that information in a well defined way - we could do the same
for PEBS as well) - i have no particular strong feelings about
allowing that other than i think it's an obviously inferior
model - *as long* as proper, generic, usable support is added.

From my perspective there's really just one realistic option to
accept this feature: if it's properly fit into existing, modern
instrumentation abstractions. I made that abundantly clear in my
feedback so far.

It can obviously be done, alongside the suggestions i've given.

That was the condition for Intel PEBS/DS/BTS support as well -
which is hardware that has at least as many brain-dead
constraints and roadblocks as LWP.

> > > You could rebuild the LWP block on every context switch I
> > > guess, but you need to prevent access to other cpus' LWP
> > > blocks (since they may be running other processes).  I
> > > think this calls for per-cpu cr3, even for threads in the
> > > same process.

> > Why would we want to rebuild the LWPCB? Just keep one per
> > task and do a lightweight switch to it during switch_to() -
> > like we do it with the PEBS hardware-ring-buffer. It can be
> > in the same single block of memory with the ring-buffer
> > itself. (PEBS has similar characteristics)

> If it's in globally visible memory, the user can reprogram the
> LWP from another thread to thrash ordinary VMAs. [...]

User-space can smash it and make it not profile or profile the
wrong thing or into the wrong buffer - but LWP itself runs with
ring3 privileges so it won't do anything the user couldnt do
already.

Lack of protection against self-misconfiguration-damage is a
benign hardware mis-feature - something for LWP v2 to specify i
guess.

But i don't want to reject this feature based on this
mis-feature alone - it's a pretty harmless limitation and the
precise, skid-less profiling that LWP offers is obviously
useful.

> [...]  It has to be process local (at which point, you can
> just use do_mmap() to allocate it).

get_unmapped_area() + install_special_mapping() is probably
better, but yeah.

Thanks,

        Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Joerg Roedel  
View profile  
 More options Dec 20 2011, 10:30 am
Newsgroups: linux.kernel
From: Joerg Roedel <j...@8bytes.org>
Date: Tue, 20 Dec 2011 16:30:03 +0100
Local: Tues, Dec 20 2011 10:30 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)
Hi Ingo,

On Tue, Dec 20, 2011 at 11:09:17AM +0100, Ingo Molnar wrote:
> > No, it's worse.  They are ring 3 writeable, and ring 3
> > configurable.

> Avi, i know that very well.

So you agree that your ideas presented in this thread of integrating LWP
into perf have serious security implications?

> > btw, that means that the intended use case - self-monitoring
> > with no kernel support - cannot be done. [...]

> Arguably many years ago the hardware was designed for brain-dead
> instrumentation abstractions.

The point of LWP design is, that it doesn't require abstractions except
for the threshold interrupt.

I am fine with integrating LWP into perf as long as it makes sense and
does not break the intended usage scenario for LWP.

        [ Because LWP is a user-space feature and designed as such,
          forcing it into an abstraction makes software that uses LWP
          unportable. ]

But Ingo, the ideas you presented in this thread are clearly no-gos.
Having a shared per-cpu buffer for LWP data that is read by perf
obviously has very bad security implications, as Avi already pointed
out. It also destroys the intended use-case for LWP because it disturbs
any process that is doing self-profiling with LWP.

> Note that as i said user-space *can* acccess the area if it
> thinks it can do it better than the kernel (and we could export
> that information in a well defined way - we could do the same
> for PEBS as well) - i have no particular strong feelings about
> allowing that other than i think it's an obviously inferior
> model - *as long* as proper, generic, usable support is added.

LWP can't be compared in any serious way with PEBS. The only common
thing is the hardware-managed ring-buffer. But PEBS is an addition to
MSR based performance monitoring resources (for which a kernel
abstraction makes a lot of sense) and can only be controlled from ring 0
while LWP is a complete user-space controlled PMU which has no link at
all to the MSR-based, ring 0 controlled PMU.

> From my perspective there's really just one realistic option to
> accept this feature: if it's properly fit into existing, modern
> instrumentation abstractions. I made that abundantly clear in my
> feedback so far.

The threshold interrupt fits well into the perf-abstraction layer. Even
self-monitoring of processes does, and Hans posted patches from Benjamin
for that.  What do you think about this approach?

> User-space can smash it and make it not profile or profile the
> wrong thing or into the wrong buffer - but LWP itself runs with
> ring3 privileges so it won't do anything the user couldnt do
> already.

The point is, if user-space re-programs LWP it will continue to write
its samples to the new ring-buffer virtual-address set up by user-space.
It will still use that virtual address in another address-space after a
task-switch. This allows processes to corrupt memory of other processes.
There are ways to hack around that but these have a serious impact on
task-switch costs so this is also no way to go.

> Lack of protection against self-misconfiguration-damage is a
> benign hardware mis-feature - something for LWP v2 to specify i
> guess.

So what you are saying is (not just here, also in other emails in this
thread) that every hardware not designed for perf is crap?

> get_unmapped_area() + install_special_mapping() is probably
> better, but yeah.

get_unmapped_area() only works on current. So it can't be used for
that purpose too. Please believe me, we considered and evaluated a lot
of ways to install a mapping into a different process, but none of them
worked out. It is clearly not possible in a sane way without major
changes to the VMM code. Feel free to show us a sane way if you disagree
with that.

So okay, where are we now? We have patches from Hans that make LWP
mostly usable in the way it is intended for. There are already a lot of
people waiting for this to support LWP in the kernel (and they want to
use it in the intended way, not via perf). And we have patches from
Benjamin adding the missing threshold interrupt and a self-monitoring
abstraction of LWP for perf. Monitoring other processes using perf is
not possible because we can't reliably install a mapping into another
process. System wide monitoring has bad security implications and
destroys the intended use-cases. So as I see it, the only abstraction
for integrating LWP into perf that is feasible is posted in this thread.
Can we agree to focus on the posted approach?

Thanks,

        Joerg

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Vince Weaver  
View profile  
 More options Dec 20 2011, 10:50 am
Newsgroups: linux.kernel
From: Vince Weaver <vweav...@eecs.utk.edu>
Date: Tue, 20 Dec 2011 16:50:02 +0100
Local: Tues, Dec 20 2011 10:50 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

On Tue, 20 Dec 2011, Ingo Molnar wrote:
> Granted, LWP was mis-designed to quite a degree, those AMD chip
> engineers should have talked to people who understand how modern
> PMU abstractions are added to the OS kernel properly.

You do realize that LWP was probably in design 5+ years ago, at a time
when most Linux kernel developers wanted nothing to do with perf counters,
and thus anyone they did contact for help would have been from the
since-rejected perfctr or perfmon2 camp.

Also, I'm sure Linux isn't the only Operating System that they had in mind
when designing this functionality.

Running LWP through the kernel is a foolish idea.  Does anyone have any
numbers on what that would do to overhead?

perf_events creates huge overhead when doing self monitoring.  For simple
self-monintoring counter reads it is an *order of magnitude* worse than
doing the same thing with perfctr.
  (see numbers here if you don't believe me:
    http://web.eecs.utk.edu/~vweaver1/projects/perf-events/benchmarks/rdt... )

Vince

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Ingo Molnar  
View profile  
 More options Dec 20 2011, 1:40 pm
Newsgroups: linux.kernel
From: Ingo Molnar <mi...@elte.hu>
Date: Tue, 20 Dec 2011 19:40:02 +0100
Local: Tues, Dec 20 2011 1:40 pm
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

* Vince Weaver <vweav...@eecs.utk.edu> wrote:

> On Tue, 20 Dec 2011, Ingo Molnar wrote:
> > Granted, LWP was mis-designed to quite a degree, those AMD
> > chip engineers should have talked to people who understand
> > how modern PMU abstractions are added to the OS kernel
> > properly.

> You do realize that LWP was probably in design 5+ years ago,
> at a time when most Linux kernel developers wanted nothing to
> do with perf counters, and thus anyone they did contact for
> help would have been from the since-rejected perfctr or
> perfmon2 camp.

That does not really contradict what i said.

> Also, I'm sure Linux isn't the only Operating System that they
> had in mind when designing this functionality.

> Running LWP through the kernel is a foolish idea. Does anyone
> have any numbers on what that would do to overhead?

At most an LLWPCB instruction is needed.

> perf_events creates huge overhead when doing self monitoring.  
> For simple self-monintoring counter reads it is an *order of
> magnitude* worse than doing the same thing with perfctr.

Only if you are comparing apples to oranges: if you compare a
full kernel based read of self-profiling counters with an RDPMC
instruction.

But as we told you previously, you could use RDPMC under perf as
well, last i checked PeterZ posted experimental patches for
that. Peter, what's the status of that?

Thanks,

        Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Ingo Molnar  
View profile  
 More options Dec 20 2011, 1:50 pm
Newsgroups: linux.kernel
From: Ingo Molnar <mi...@elte.hu>
Date: Tue, 20 Dec 2011 19:50:01 +0100
Local: Tues, Dec 20 2011 1:50 pm
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

* Joerg Roedel <j...@8bytes.org> wrote:

> Hi Ingo,

> On Tue, Dec 20, 2011 at 11:09:17AM +0100, Ingo Molnar wrote:

> > > No, it's worse.  They are ring 3 writeable, and ring 3
> > > configurable.

> > Avi, i know that very well.

> So you agree that your ideas presented in this thread of
> integrating LWP into perf have serious security implications?

No, i do not agree at all - you are drastically misrepresending
my position.

> > > btw, that means that the intended use case -
> > > self-monitoring with no kernel support - cannot be done.
> > > [...]

> > Arguably many years ago the hardware was designed for
> > brain-dead instrumentation abstractions.

> The point of LWP design is, that it doesn't require
> abstractions except for the threshold interrupt.

> I am fine with integrating LWP into perf as long as it makes
> sense and does not break the intended usage scenario for LWP.

That's the wrong way around - in reality we'll integrate LWP
upstream only once it makes sense and works well with the
primary instrumentation abstraction we have in the kernel.

Otherwise my "sorry, it's not convincing enough yet" NAK against
the new feature stands.

In fact as per Linus's rules about new kernel features,
maintainers don't even have to justify NAK's by offering an
implementation roadmap that would make the feature acceptable.

Me or PeterZ could just say "this feature is too limited and not
convincing enough yet, sorry".

*You* who are pushing the feature have to convince the objecting
maintainer that the feature is worth integrating.

But i'm being nice and helpful here by giving you a rough
technical outline of how you could overcome my "sorry, this is
not convincing in its current form yet" rejection of the current
LWP patches.

>    [ Because LWP is a user-space feature and designed as such,
>      forcing it into an abstraction makes software that uses LWP
>      unportable. ]

> But Ingo, the ideas you presented in this thread are clearly
> no-gos.

Nonsense.

> Having a shared per-cpu buffer for LWP data that is read by
> perf obviously has very bad security implications, as Avi
> already pointed out. [...]

Stop this stupidity already!

There's no "security implications" whatsoever. LWP is a ring-3
hw feature and it can do nothing that the user-space app could
not already do ...

> [...] It also destroys the intended use-case for LWP because
> it disturbs any process that is doing self-profiling with LWP.

Why would it destroy that? Self-profiling can install events
just fine, the kernel will arbitrate the resource.

The 'intended usecase' is meaningless to me - it was done in
some closed process apparently not talking to anyone who knows a
bit about Linux instrumentation. If you want this code upstream
then you need to convince me that the feature makes sense in the
general and current scheme of things.

I've outlined the (rather workable) technical roadmap for that.

> > Note that as i said user-space *can* acccess the area if it
> > thinks it can do it better than the kernel (and we could
> > export that information in a well defined way - we could do
> > the same for PEBS as well) - i have no particular strong
> > feelings about allowing that other than i think it's an
> > obviously inferior model - *as long* as proper, generic,
> > usable support is added.

> LWP can't be compared in any serious way with PEBS. The only
> common thing is the hardware-managed ring-buffer. [...]

Which ring-buffer is actually happens to be one of the main
things that has to be managed ...

> [...] But PEBS is an addition to MSR based performance
> monitoring resources (for which a kernel abstraction makes a
> lot of sense) and can only be controlled from ring 0 while LWP
> is a complete user-space controlled PMU which has no link at
> all to the MSR-based, ring 0 controlled PMU.

It's a ring-3 controlled PMU feature, not a user-space PMU
feature. It *can* be controlled by user-space - but it obviously
can also (and i argue, it should be) - managed by the kernel,
under Linux.

The kernel is running ring-3 code as well, and it's managing
ring-3 accessible resources as well, there's nothing new about
that.

> > From my perspective there's really just one realistic option
> > to accept this feature: if it's properly fit into existing,
> > modern instrumentation abstractions. I made that abundantly
> > clear in my feedback so far.

> The threshold interrupt fits well into the perf-abstraction
> layer. Even self-monitoring of processes does, and Hans posted
> patches from Benjamin for that.  What do you think about this
> approach?

As as i said it's a promising first step - although the
discussion here convinced me that it needs to be even more
feature complete, i don't really see that you guys understand
how such things should be implemented.

You seem to be dead set on supporting a weird special case
'intended workload' while forgetting the *much* more common
profilin workloads we have under Linux.

I don't mind supporting weird stuff as well, but you have to
keep the common case in mind ...

I'd like to see the ring-buffer and the events managed by the
kernel too, at least so that perf record works fine with this
PMU feature.

> > User-space can smash it and make it not profile or profile
> > the wrong thing or into the wrong buffer - but LWP itself
> > runs with ring3 privileges so it won't do anything the user
> > couldnt do already.

> The point is, if user-space re-programs LWP it will continue
> to write its samples to the new ring-buffer virtual-address
> set up by user-space. It will still use that virtual address
> in another address-space after a task-switch. This allows
> processes to corrupt memory of other processes. [...]

That's nonsense. As i said it my previous mail the LWPC should
be per task and switched on task switch - just like the DS/PEBS
context is.

> [...] There are ways to hack around that but these have a
> serious impact on task-switch costs so this is also no way to
> go.

We are seeing no problems with this approach under PEBS.

> > Lack of protection against self-misconfiguration-damage is a
> > benign hardware mis-feature - something for LWP v2 to
> > specify i guess.

> So what you are saying is (not just here, also in other emails
> in this thread) that every hardware not designed for perf is
> crap?

No - PMU hardware designed to not allow the profiling of the
kernel is obviously a crappy aspect of it. Also, PMU hardware
that does not allow 100% encapsulation by the kernel is
obviously not very wisely done either.

Those limitations are not a big problem for usable Linux support
- and future iterations of the LWP hardware can trivially
address those shortcomings.

> > get_unmapped_area() + install_special_mapping() is probably
> > better, but yeah.

> get_unmapped_area() only works on current. [...]

Which is a perfectly fine first step to support the
'perf record' inheritance-tree case - which is a very
common profiling method.

> [...] So it can't be used for that purpose too. [...]

Hey, i wrote bits of get_unmapped_area(), way back. I had code
on my machine that inserted vmas into other tasks's address
spaces and can confirm that it works. Do you take my word for it
that it's possible?

Firstly, the perf record case - which is an important, primary
workflow - can work with the code as-is just fine.

Secondly, for system-wide profiling vmas can be inserted into
another task's mm context just fine as well: technically we do
that all the time, when a threaded program is running.

Inserting a vma into another task's mm where that mm is not ours
is indeed not typical, but not unprecedented either, UML patches
did that a couple of years ago. (In fact the upcoming uprobes
patches are doing something far more intrusive.)

The VM modification is trivial AFAICS: an 'mm' parameter has to
be added to a new do_mmap() variant, that's all - the code is
already SMP-safe, due to the threaded case.

Otherwise using another task's mm is safe if you acquire it via
get_task_mm()/mmput().

[ Sidenote: as a bonus this would put infrastructure in place to
  have user-space accessible trace buffers, insertable via
  the LWPINS instruction and recoverable via the regular kernel
  perf event processing facilities. LWP has more potential than
  just self-profiling, if we use the right abstractions... ]

Thanks,

        Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Vince Weaver  
View profile  
 More options Dec 20 2011, 5:50 pm
Newsgroups: linux.kernel
From: Vince Weaver <vweav...@eecs.utk.edu>
Date: Tue, 20 Dec 2011 23:50:01 +0100
Local: Tues, Dec 20 2011 5:50 pm
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

Well I'm just assuming that when you say "people who understand
how modern PMU abstractions are added to the OS kernel properly"
you mean yourself and the perf_event crew.

There are many other schools of thought on what kernel PMU abstractions
should look like, and I'm sure AMD conferred with them.

> > Running LWP through the kernel is a foolish idea. Does anyone
> > have any numbers on what that would do to overhead?

> At most an LLWPCB instruction is needed.

you're saying that all the crazy kernel stuff you're proposing will have
no extra overhead when compared to just implementing the proper xsave
context switch code?

> > perf_events creates huge overhead when doing self monitoring.  
> > For simple self-monintoring counter reads it is an *order of
> > magnitude* worse than doing the same thing with perfctr.

> Only if you are comparing apples to oranges: if you compare a
> full kernel based read of self-profiling counters with an RDPMC
> instruction.

The benchmarks I posted show measurements getting *real data* from the
counters.  Yes, on perfctr this is mostly just a rdpmc call plus a quick
access to some mmap'd memory to make sure the context is valid.

perfctr is an order of magnitude less overhead because it was designed
from the beginning to be a very low-overhead way to get self-monitoring
data.  A lot of time and tuning was spent getting it that fast.

perf_event throws everything and the kitchen sink in the the kernel.  I'm
guessing low-overhead self-monitoring was not really one of your primary
design goals, and it shows.

> But as we told you previously, you could use RDPMC under perf as
> well, last i checked PeterZ posted experimental patches for
> that. Peter, what's the status of that?

yes.  If you checked the benchmark results I showed, you'd have seen that
I run tests against that patchset too, and it's really only marginally
better that the current perf_event stuff.  I might have written the
benchmark poorly, but that's mainly because as-posted the documentation
for how to use that patchset is a bit unclear.

Vince
vweav...@eecs.utk.edu

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Joerg Roedel  
View profile  
 More options Dec 20 2011, 7:10 pm
Newsgroups: linux.kernel
From: Joerg Roedel <j...@8bytes.org>
Date: Wed, 21 Dec 2011 01:10:02 +0100
Local: Tues, Dec 20 2011 7:10 pm
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

On Tue, Dec 20, 2011 at 07:40:04PM +0100, Ingo Molnar wrote:
> > I am fine with integrating LWP into perf as long as it makes
> > sense and does not break the intended usage scenario for LWP.

> That's the wrong way around - in reality we'll integrate LWP
> upstream only once it makes sense and works well with the
> primary instrumentation abstraction we have in the kernel.

I still don't see why you want an abstraction for a hardware feature
that clearly doesn't need it. From an enablement perspective LWP is much
closer to AVX than to the MSR based PMU. And nobody really wants or
needs a kernel abstraction for AVX, no?

> Me or PeterZ could just say "this feature is too limited and not
> convincing enough yet, sorry".

This statement shows very clearly the bottom-line of our conflict. You
see this as a perf-topic, for everyone else it is an x86 topic.

> But i'm being nice and helpful here [...]

And I appreciate the discussion. But we have fundamentally different
stand-points.  I hope we can come to an agreement.

> There's no "security implications" whatsoever. LWP is a ring-3
> hw feature and it can do nothing that the user-space app could
> not already do ...

Really? How could an application count DCache misses today without
instrumentation? I guess your answer is 'with perf', but LWP is a much
more light-weight way to do that because it works _completly_ in
hardware when the kernel supports context-switching it.

> > [...] It also destroys the intended use-case for LWP because
> > it disturbs any process that is doing self-profiling with LWP.

> Why would it destroy that? Self-profiling can install events
> just fine, the kernel will arbitrate the resource.

Because you can't reliably hand over the LWPCB management to the kernel.
The instruction to load a new LWPCB is executable in ring-3. Any
kernel-use of LWP will never be reliable.

> > So what you are saying is (not just here, also in other emails
> > in this thread) that every hardware not designed for perf is
> > crap?

> No - PMU hardware designed to not allow the profiling of the
> kernel is obviously a crappy aspect of it. Also, PMU hardware
> that does not allow 100% encapsulation by the kernel is
> obviously not very wisely done either.

Why? Whats wrong with user-space having control over its own PMU in a
safe way? This is what the feature was designed for.

Thanks,

        Joerg

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Gleb Natapov  
View profile  
 More options Dec 21 2011, 6:50 am
Newsgroups: linux.kernel
From: Gleb Natapov <g...@redhat.com>
Date: Wed, 21 Dec 2011 12:50:03 +0100
Local: Wed, Dec 21 2011 6:50 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)
On Tue, Dec 20, 2011 at 07:40:04PM +0100, Ingo Molnar wrote:
> > The point is, if user-space re-programs LWP it will continue
> > to write its samples to the new ring-buffer virtual-address
> > set up by user-space. It will still use that virtual address
> > in another address-space after a task-switch. This allows
> > processes to corrupt memory of other processes. [...]

> That's nonsense. As i said it my previous mail the LWPC should
> be per task and switched on task switch - just like the DS/PEBS
> context is.

Is it? Looking at arch/x86/kernel/cpu/perf_event_intel_ds.c it seems
like DS is per cpu, not per task.

--
                        Gleb.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Ingo Molnar  
View profile  
 More options Dec 21 2011, 7:10 am
Newsgroups: linux.kernel
From: Ingo Molnar <mi...@elte.hu>
Date: Wed, 21 Dec 2011 13:10:02 +0100
Local: Wed, Dec 21 2011 7:10 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

* Vince Weaver <vweav...@eecs.utk.edu> wrote:

> > But as we told you previously, you could use RDPMC under
> > perf as well, last i checked PeterZ posted experimental
> > patches for that. Peter, what's the status of that?

> yes.  If you checked the benchmark results I showed, you'd
> have seen that I run tests against that patchset too, and it's
> really only marginally better that the current perf_event
> stuff.  I might have written the benchmark poorly, [...]

It is significantly faster for the self-monitoring case - which
is a pretty niche usecase btw.

Have a look at how the 'perf test' self-test utilizes RDPMC in
these commits in tip:perf/fast:

 08aa0d1f376e: perf tools: Add x86 RDPMC, RDTSC test
 e3f3541c19c8: perf: Extend the mmap control page with time (TSC) fields
 0c9d42ed4cee: perf, x86: Provide means for disabling userspace RDPMC
 fe4a330885ae: perf, x86: Implement user-space RDPMC support, to allow fast, user-space access to self-monitoring counters
 365a4038486b: perf: Fix mmap_page::offset computation
 35edc2a5095e: perf, arch: Rework perf_event_index()
 9a0f05cb3688: perf: Update the mmap control page on mmap()

You can find these commits in today's -tip. Overhead should be
somewhere around 50 cycles per call (i suspect it could
optimized more), which is a fraction of what a syscall is
costing.

> [...] but that's mainly because as-posted the documentation
> for how to use that patchset is a bit unclear.

In your world there's always someone else to blame.

The thing is, *you* are interested in this niche feature, PeterZ
not so much.

You made a false claim that perf cannot use RDPMC and PeterZ has
proven you wrong once again. Your almost non-stop whining and
the constant misrepresentations you make are not very
productive.

Thanks,

        Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Ingo Molnar  
View profile  
 More options Dec 21 2011, 7:40 am
Newsgroups: linux.kernel
From: Ingo Molnar <mi...@elte.hu>
Date: Wed, 21 Dec 2011 13:40:02 +0100
Local: Wed, Dec 21 2011 7:40 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

* Joerg Roedel <j...@8bytes.org> wrote:

> On Tue, Dec 20, 2011 at 07:40:04PM +0100, Ingo Molnar wrote:

> > > I am fine with integrating LWP into perf as long as it makes
> > > sense and does not break the intended usage scenario for LWP.

> > That's the wrong way around - in reality we'll integrate LWP
> > upstream only once it makes sense and works well with the
> > primary instrumentation abstraction we have in the kernel.

> I still don't see why you want an abstraction for a hardware
> feature [...]

Because if done properly then Linux users and developers will be
able to utilize the hardware feature well beyond the limited
scope these patches are giving it.

A couple of examples:

1) This command:

      perf record -e lwp:instructions ./myapp

   will be possible and will be able to do skid-less profiling.

2) In the long run apps might be able to insert lightweight
   trace entries without entering the kernel, using the LWPINS
   instruction.

3) Maybe LWP will be enhanced with the ability to profile system
   mode execution as well - which we'll be able to support very
   easily.

These features are *far* more interesting than some limited
self-monitoring use of LWP.

I don't mind niches per se, so i don't mind the self-monitoring
usecase either, as long as they are not trying to be the *only*
feature, at the expense of more interesting features.

I think it can all be supported in a consistent way (see my
previous mails) - but the feature as presented today just does
not look useful enough to me if only supports that niche
self-monitoring usecase.

> > > [...] It also destroys the intended use-case for LWP
> > > because it disturbs any process that is doing
> > > self-profiling with LWP.

> > Why would it destroy that? Self-profiling can install events
> > just fine, the kernel will arbitrate the resource.

> Because you can't reliably hand over the LWPCB management to
> the kernel. The instruction to load a new LWPCB is executable
> in ring-3. Any kernel-use of LWP will never be reliable.

It will be reliable for all tasks that don't intentionally
modify their own LWPCB's but stay with the defined APIs and no
task will be able to destroy *another* task's LWPCB (be it in or
outside of any APIs), if properly implemented.

So a task can mess with itself - and it can already do that
today.

So what's your point?

> > > So what you are saying is (not just here, also in other
> > > emails in this thread) that every hardware not designed
> > > for perf is crap?

> > No - PMU hardware designed to not allow the profiling of the
> > kernel is obviously a crappy aspect of it. Also, PMU
> > hardware that does not allow 100% encapsulation by the
> > kernel is obviously not very wisely done either.

> Why? Whats wrong with user-space having control over its own
> PMU in a safe way? This is what the feature was designed for.

Read what i've written: 'PMU hardware designed to not allow the
profiling of the kernel is obviously a crappy aspect of it'.

There is no reason why LWP could not allow profiling of kernel
execution as well, with a simple security model to make sure
unprivileged user-space does not profile kernel execution: such
as a LWP-load-time check whether the LWPCB lies on a system pte
or not.

This would allow everything that is possible today - and more.

Allowing user-space access to the PMU does not preclude a proper
PMU abstraction.

Thanks,

        Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Avi Kivity  
View profile  
 More options Dec 21 2011, 7:50 am
Newsgroups: linux.kernel
From: Avi Kivity <a...@redhat.com>
Date: Wed, 21 Dec 2011 13:50:02 +0100
Local: Wed, Dec 21 2011 7:50 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)
On 12/21/2011 02:34 PM, Ingo Molnar wrote:

> I think it can all be supported in a consistent way (see my
> previous mails) - but the feature as presented today just does
> not look useful enough to me if only supports that niche
> self-monitoring usecase.

I hate to re-enter this thread, but this "niche use case" is exactly
what LWP is designed for.  And once the JVM is adapted to exploit LWP,
its use will dwarf all of the uses of perf put together (except the NMI
watchdog).  You're only causing the developers needless pain by forcing
them to fit this red peg into a green hole.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Vince Weaver  
View profile  
 More options Dec 21 2011, 9:00 am
Newsgroups: linux.kernel
From: Vince Weaver <vweav...@eecs.utk.edu>
Date: Wed, 21 Dec 2011 15:00:02 +0100
Local: Wed, Dec 21 2011 9:00 am
Subject: Re: [RFC 4/5] x86, perf: implements lwp-perf-integration (rc1)

On Wed, 21 Dec 2011, Ingo Molnar wrote:

> * Vince Weaver <vweav...@eecs.utk.edu> wrote:

> Have a look at how the 'perf test' self-test utilizes RDPMC in
> these commits in tip:perf/fast:

I did.  How many times do I have to tell you I already applied, ran, and
benchmarked this code already, and the results were posted on that link in
the previous e-mail.

> You can find these commits in today's -tip. Overhead should be
> somewhere around 50 cycles per call (i suspect it could
> optimized more), which is a fraction of what a syscall is
> costing.

No, it's more than a "50-cycle" call.  To get a value out you need to do
two rdpmc calls plus some mucking about with some mmap'd values.  It still
benchmarks much slower than the perctr implementation.

I'd be glad to see _actual_ numbers for an _actual_ test that measures
useful values.  Until then I'm believing the numbers I measure on three
different architectures which still show that perf_event has high
overhead.

> > [...] but that's mainly because as-posted the documentation
> > for how to use that patchset is a bit unclear.

> In your world there's always someone else to blame.

Yes.  I was blaming myself for not understanding the code well enough to
write a good benchmark.

> The thing is, *you* are interested in this niche feature, PeterZ
> not so much.

The thing *we* are interested in is the main PAPI use case.  It's arguable
that more people use PAPI under Linux than actually use perf.

> You made a false claim that perf cannot use RDPMC and PeterZ has
> proven you wrong once again. Your almost non-stop whining and
> the constant misrepresentations you make are not very
> productive.

I made no such claim.  Please cite.

You made the questionable claim that the AMD devels didn't consult with
any competent perf counter experts.  What you meant was that they didn't
have foresight 5 years that Ingo Molnar would come in late with some NIH
implementation of some niche kernel functionality and take it over.  
Though in retrospect I guess that's inevitable.

Vince

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Messages 26 - 50 of 54 < Older  Newer >
« Back to Discussions « Newer topic     Older topic »