On Wed, May 06, 2015 at 05:44:54AM +0200, Mike Galbraith wrote:
> On Wed, 2015-05-06 at 03:49 +0200, Mike Galbraith wrote:
> > On Mon, 2015-05-04 at 22:54 -0700, Paul E. McKenney wrote:
> >
> > > You have RCU_FAST_NO_HZ=y, correct? Could you please try measuring with
> > > RCU_FAST_NO_HZ=n?
> >
> > FWIW, the syscall numbers I posted were RCU_FAST_NO_HZ=n. (I didn't
> > profile to see where costs lie though)
>
> (did that)
Nice, thank you!!!
So we have rcu_eqs_enter_common(4.69%), rcu_eqs_exit_common(3.89%),
and rcu_eqs_exit(1.21%). Interesting that rcu_eqs_exit() appears at all,
given that it just does very simple operations, and rcu_eqs_exit_common()
is apparently not inlined (OK, perhaps it is partially inlined?).
This suggests that there are useful gains to be had via simple changes,
for exmaple, placing the warnings behind CONFIG_NO_HZ_DEBUG or some such.
Or not, as this overhead might instead be due to cache misses on first
access to the relevant data structures. But worth a try, perhaps.
Does the attached patch help at all? If so, there might be some similar
gains to be had.
Thanx, Paul
------------------------------------------------------------------------
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4e6902005228..3f09e5abb7b0 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -616,7 +616,8 @@ static void rcu_eqs_enter_common(long long oldval, bool user)
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
- if (!user && !is_idle_task(current)) {
+ if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+ !user && !is_idle_task(current)) {
struct task_struct *idle __maybe_unused =
idle_task(smp_processor_id());
@@ -635,7 +636,8 @@ static void rcu_eqs_enter_common(long long oldval, bool user)
smp_mb__before_atomic(); /* See above. */
atomic_inc(&rdtp->dynticks);
smp_mb__after_atomic(); /* Force ordering with next sojourn. */
- WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+ atomic_read(&rdtp->dynticks) & 0x1);
rcu_dynticks_task_enter();
/*
@@ -661,7 +663,8 @@ static void rcu_eqs_enter(bool user)
rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
- WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+ (oldval & DYNTICK_TASK_NEST_MASK) == 0);
if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) {
rdtp->dynticks_nesting = 0;
rcu_eqs_enter_common(oldval, user);
@@ -734,7 +737,8 @@ void rcu_irq_exit(void)
rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting--;
- WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+ rdtp->dynticks_nesting < 0);
if (rdtp->dynticks_nesting)
trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
else
@@ -759,10 +763,12 @@ static void rcu_eqs_exit_common(long long oldval, int user)
atomic_inc(&rdtp->dynticks);
/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
smp_mb__after_atomic(); /* See above. */
- WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+ !(atomic_read(&rdtp->dynticks) & 0x1));
rcu_cleanup_after_idle();
trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
- if (!user && !is_idle_task(current)) {
+ if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+ !user && !is_idle_task(current)) {
struct task_struct *idle __maybe_unused =
idle_task(smp_processor_id());
@@ -786,7 +792,7 @@ static void rcu_eqs_exit(bool user)
rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
- WARN_ON_ONCE(oldval < 0);
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
if (oldval & DYNTICK_TASK_NEST_MASK) {
rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
} else {
@@ -859,7 +865,8 @@ void rcu_irq_enter(void)
rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting++;
- WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+ rdtp->dynticks_nesting == 0);
if (oldval)
trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
else
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c4e1cf04cf57..b908048f8d6a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1373,6 +1373,17 @@ config RCU_TRACE
Say Y here if you want to enable RCU tracing
Say N if you are unsure.
+config RCU_EQS_DEBUG
+ bool "Use this when adding any sort of NO_HZ support to your arch"
+ depends on DEBUG_KERNEL
+ help
+ This option provides consistency checks in RCU's handling of
+ NO_HZ. These checks have proven quite helpful in detecting
+ bugs in arch-specific NO_HZ code.
+
+ Say N here if you need ultimate kernel/user switch latencies
+ Say Y if you are unsure
+
endmenu # "RCU Debugging"
config DEBUG_BLOCK_EXT_DEVT