This patch set gets rid of read_barrier_depends() in ftrace in favor of
RCU APIs, increases the RCU CPU stall timeout if CONFIG_PROVE_RCU, and
fixes an x86/mce lockdep splat.
Thanx, Paul
------------------------------------------------------------------------
arch/x86/kernel/cpu/mcheck/mce.c | 12 ++++++++----
kernel/rcutree.h | 22 +++++++++++++++-------
kernel/trace/ftrace.c | 23 +++++++++++++----------
3 files changed, 36 insertions(+), 21 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Cc: Steven Rostedt <ros...@goodmis.org>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Cc: Ingo Molnar <mi...@redhat.com>
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---
kernel/trace/ftrace.c | 22 +++++++++++++---------
1 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8378357..8c5adc0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -27,6 +27,7 @@
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/hash.h>
+#include <linux/rcupdate.h>
#include <trace/events/sched.h>
@@ -88,18 +89,22 @@ ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
#endif
+/*
+ * Traverse the ftrace_list, invoking all entries. The reason that we
+ * can use rcu_dereference_raw() is that elements removed from this list
+ * are simply leaked, so there is no need to interact with a grace-period
+ * mechanism. The rcu_dereference_raw() calls are needed to handle
+ * concurrent insertions into the ftrace_list.
+ *
+ * Silly Alpha and silly pointer-speculation compiler optimizations!
+ */
static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
{
- struct ftrace_ops *op = ftrace_list;
-
- /* in case someone actually ports this to alpha! */
- read_barrier_depends();
+ struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
while (op != &ftrace_list_end) {
- /* silly alpha */
- read_barrier_depends();
op->func(ip, parent_ip);
- op = op->next;
+ op = rcu_dereference_raw(op->next); /*see above*/
};
}
@@ -154,8 +159,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
* the ops->next pointer is valid before another CPU sees
* the ops pointer included into the ftrace_list.
*/
- smp_wmb();
- ftrace_list = ops;
+ rcu_assign_pointer(ftrace_list, ops);
if (ftrace_enabled) {
ftrace_func_t func;
--
1.6.6
Cc: Thomas Gleixner <tg...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: "H. Peter Anvin" <h...@zytor.com>
Cc: x...@kernel.org
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---
arch/x86/kernel/cpu/mcheck/mce.c | 11 ++++++++---
1 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a8aacd4..4442e9e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -46,6 +46,11 @@
#include "mce-internal.h"
+#define rcu_dereference_check_mce(p) \
+ rcu_dereference_check((p), \
+ rcu_read_lock_sched_held() || \
+ lockdep_is_held(&mce_read_mutex))
+
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -158,7 +163,7 @@ void mce_log(struct mce *mce)
mce->finished = 0;
wmb();
for (;;) {
- entry = rcu_dereference(mcelog.next);
+ entry = rcu_dereference_check_mce(mcelog.next);
for (;;) {
/*
* When the buffer fills up discard new entries.
@@ -1500,7 +1505,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
return -ENOMEM;
mutex_lock(&mce_read_mutex);
- next = rcu_dereference(mcelog.next);
+ next = rcu_dereference_check_mce(mcelog.next);
/* Only supports full reads right now */
if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
@@ -1565,7 +1570,7 @@ timeout:
static unsigned int mce_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &mce_wait, wait);
- if (rcu_dereference(mcelog.next))
+ if (rcu_dereference_check_mce(mcelog.next))
return POLLIN | POLLRDNORM;
return 0;
}
--
1.6.6
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---
kernel/rcutree.h | 21 +++++++++++++++------
1 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 1439eb5..4a525a3 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -246,12 +246,21 @@ struct rcu_data {
#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */
-#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */
-#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
- /* to take at least one */
- /* scheduling clock irq */
- /* before ratting on them. */
+
+#ifdef CONFIG_PROVE_RCU
+#define RCU_STALL_DELAY_DELTA (5 * HZ)
+#else
+#define RCU_STALL_DELAY_DELTA 0
+#endif
+
+#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA)
+ /* for rsp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA)
+ /* for rsp->jiffies_stall */
+#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
+ /* to take at least one */
+ /* scheduling clock irq */
+ /* before ratting on them. */
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
--
1.6.6
Acked-by: Steven Rostedt <ros...@goodmis.org>
Thanks Paul!
> Cc: Frederic Weisbecker <fwei...@gmail.com>
> Cc: Ingo Molnar <mi...@redhat.com>
> Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
> ---
>
> @@ -154,8 +159,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
> * the ops->next pointer is valid before another CPU sees
> * the ops pointer included into the ftrace_list.
> */
> - smp_wmb();
> - ftrace_list = ops;
> + rcu_assign_pointer(ftrace_list, ops);
[ Off topic ]
I looked at rcu_assign_pointer() and it is:
#define rcu_assign_pointer(p, v) \
({ \
if (!__builtin_constant_p(v) || \
((v) != NULL)) \
smp_wmb(); \
(p) = (v); \
})
My question is, why that crazy if? The only time that will fail is if we
are assigning the constant NULL to p. What makes NULL so important here?
Can't there be a case when assigning NULL to p will require that wmb()?
-- Steve
>
> if (ftrace_enabled) {
> ftrace_func_t func;
--
The barrier ensures that the reader can't see the new p and the old
*p. Since you can't look at *NULL, that concern doesn't apply.
- Josh Triplett
> > #define rcu_assign_pointer(p, v) \
> > ({ \
> > if (!__builtin_constant_p(v) || \
> > ((v) != NULL)) \
> > smp_wmb(); \
> > (p) = (v); \
> > })
> >
> > My question is, why that crazy if? The only time that will fail is if we
> > are assigning the constant NULL to p. What makes NULL so important here?
> > Can't there be a case when assigning NULL to p will require that wmb()?
>
> The barrier ensures that the reader can't see the new p and the old
> *p. Since you can't look at *NULL, that concern doesn't apply.
Thanks for the explanation.
Question 2)
Then why the !__builtin_constant_p(v)?
If v is NULL, then the same should apply even if it is not a constant?
What am I missing?
-- Steve
Checking for __builtin_constant_p(v) ensures that this test happens at
compile time, and thus no conditional occurs at runtime. Together with
the assumption of compiler constant folding and dead code elimination,
this test means "if you can tell at compile time that the call assigns
NULL, emit no barrier, otherwise emit a barrier". Under no
circumstances will this macro actually emit conditional code.
- Josh Triplett
> > Question 2)
> >
> > Then why the !__builtin_constant_p(v)?
> >
> > If v is NULL, then the same should apply even if it is not a constant?
> > What am I missing?
>
> Checking for __builtin_constant_p(v) ensures that this test happens at
> compile time, and thus no conditional occurs at runtime. Together with
> the assumption of compiler constant folding and dead code elimination,
> this test means "if you can tell at compile time that the call assigns
> NULL, emit no barrier, otherwise emit a barrier". Under no
> circumstances will this macro actually emit conditional code.
Ah OK!
So the benefit of not doing a smb_wmb() when a variable is NULL is
outweighed by the benefits of removing branches and extra code.
Yes it now makes sense. Only remove the wmb() when we can guarantee that
it is never needed, and avoid unnecessary branches when it may not be
needed.
Thanks for clarifying!
-- Steve
ftrace: Replace read_barrier_depends() with rcu_dereference_raw()
Replace the calls to read_barrier_depends() in
ftrace_list_func() with rcu_dereference_raw() to improve
readability. The reason that we use rcu_dereference_raw() here
is that removed entries are never freed, instead they are simply
leaked. This is one of a very few cases where use of
rcu_dereference_raw() is the long-term right answer. And I
don't yet know of any others. ;-)
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
Acked-by: Steven Rostedt <ros...@goodmis.org>
Cc: Frederic Weisbecker <fwei...@gmail.com>
Cc: la...@cn.fujitsu.com
Cc: dipa...@in.ibm.com
Cc: mathieu....@polymtl.ca
Cc: jo...@joshtriplett.org
Cc: dvh...@us.ibm.com
Cc: n...@us.ibm.com
Cc: pet...@infradead.org
Cc: Valdis.K...@vt.edu
Cc: dhow...@redhat.com
LKML-Reference: <1267830207-9474-1-gi...@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
rcu: Increase RCU CPU stall timeouts if PROVE_RCU
CONFIG_PROVE_RCU imposes additional overhead on the kernel, so
increase the RCU CPU stall timeouts in an attempt to allow for
this effect.
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
Cc: la...@cn.fujitsu.com
Cc: dipa...@in.ibm.com
Cc: mathieu....@polymtl.ca
Cc: jo...@joshtriplett.org
Cc: dvh...@us.ibm.com
Cc: n...@us.ibm.com
Cc: pet...@infradead.org
Cc: ros...@goodmis.org
Cc: Valdis.K...@vt.edu
Cc: dhow...@redhat.com
LKML-Reference: <1267830207-9474-2-gi...@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
x86/mce: Fix RCU lockdep splats
Create an rcu_dereference_check_mce() that checks for RCU-sched
read side and mce_read_mutex being held on update side. Replace
uses of rcu_dereference() in arch/x86/kernel/cpu/mcheck/mce.c
with this new macro.
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
Cc: "H. Peter Anvin" <h...@zytor.com>
Cc: x...@kernel.org
Cc: la...@cn.fujitsu.com
Cc: dipa...@in.ibm.com
Cc: mathieu....@polymtl.ca
Cc: jo...@joshtriplett.org
Cc: dvh...@us.ibm.com
Cc: n...@us.ibm.com
Cc: pet...@infradead.org
Cc: ros...@goodmis.org
Cc: Valdis.K...@vt.edu
Cc: dhow...@redhat.com
LKML-Reference: <1267830207-9474-3-gi...@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
x86/mce: Fix build bug with CONFIG_PROVE_LOCKING=y && CONFIG_X86_MCE_INTEL=y
Commit f56e8a076 "x86/mce: Fix RCU lockdep splats" introduced the
following build bug:
arch/x86/kernel/cpu/mcheck/mce.c: In function 'mce_log':
arch/x86/kernel/cpu/mcheck/mce.c:166: error: 'mce_read_mutex' undeclared (first use in this function)
arch/x86/kernel/cpu/mcheck/mce.c:166: error: (Each undeclared identifier is reported only once
arch/x86/kernel/cpu/mcheck/mce.c:166: error: for each function it appears in.)
Move the in-the-middle-of-file lock variable up to the variable
definition section, the top of the .c file.
Cc: Paul E. McKenney <pau...@linux.vnet.ibm.com>
Cc: "H. Peter Anvin" <h...@zytor.com>
Cc: x...@kernel.org
Cc: la...@cn.fujitsu.com
Cc: dipa...@in.ibm.com
Cc: mathieu....@polymtl.ca
Cc: jo...@joshtriplett.org
Cc: dvh...@us.ibm.com
Cc: n...@us.ibm.com
Cc: pet...@infradead.org
Cc: ros...@goodmis.org
Cc: Valdis.K...@vt.edu
Cc: dhow...@redhat.com
LKML-Reference: <1267830207-9474-3-gi...@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
arch/x86/kernel/cpu/mcheck/mce.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index bd58de4..3ab9c88 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -46,6 +46,8 @@
#include "mce-internal.h"
+static DEFINE_MUTEX(mce_read_mutex);
+
#define rcu_dereference_check_mce(p) \
rcu_dereference_check((p), \
rcu_read_lock_sched_held() || \
@@ -1490,8 +1492,6 @@ static void collect_tscs(void *data)
rdtscll(cpu_tsc[smp_processor_id()]);
}
-static DEFINE_MUTEX(mce_read_mutex);
-
static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
loff_t *off)
{