[PATCH v8 2/6] MCS Lock: Restructure the MCS lock defines and locking

Tim Chen

unread,

Jan 20, 2014, 8:30:02 PM1/20/14

to

We will need the MCS lock code for doing optimistic spinning for rwsem
and queue rwlock. Extracting the MCS code from mutex.c and put into
its own file allow us to reuse this code easily.

Note that using the smp_load_acquire/smp_store_release pair used in
mcs_lock and mcs_unlock is not sufficient to form a full memory barrier
across cpus for many architectures (except x86). For applications that
absolutely need a full barrier across multiple cpus with mcs_unlock and
mcs_lock pair, smp_mb__after_unlock_lock() should be used after mcs_lock.

Signed-off-by: Tim Chen <tim.c...@linux.intel.com>
Signed-off-by: Davidlohr Bueso <davi...@hp.com>
---
include/linux/mcs_spinlock.h | 81 ++++++++++++++++++++++++++++++++++++++++++++
include/linux/mutex.h | 5 +--
kernel/locking/mutex.c | 68 ++++---------------------------------
3 files changed, 91 insertions(+), 63 deletions(-)
create mode 100644 include/linux/mcs_spinlock.h

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
new file mode 100644
index 0000000..23912cb
--- /dev/null
+++ b/include/linux/mcs_spinlock.h
@@ -0,0 +1,81 @@
+/*
+ * MCS lock defines
+ *
+ * This file contains the main data structure and API definitions of MCS lock.
+ *
+ * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
+ * with the desirable properties of being fair, and with each cpu trying
+ * to acquire the lock spinning on a local variable.
+ * It avoids expensive cache bouncings that common test-and-set spin-lock
+ * implementations incur.
+ */
+#ifndef __LINUX_MCS_SPINLOCK_H
+#define __LINUX_MCS_SPINLOCK_H
+
+struct mcs_spinlock {
+ struct mcs_spinlock *next;
+ int locked; /* 1 if lock acquired */
+};
+
+/*
+ * Note: the smp_load_acquire/smp_store_release pair is not
+ * sufficient to form a full memory barrier across
+ * cpus for many architectures (except x86) for mcs_unlock and mcs_lock.
+ * For applications that need a full barrier across multiple cpus
+ * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be
+ * used after mcs_lock.
+ */
+
+/*
+ * We don't inline mcs_spin_lock() so that perf can correctly account for the
+ * time spent in this lock function.
+ */
+static noinline
+void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+{
+ struct mcs_spinlock *prev;
+
+ /* Init node */
+ node->locked = 0;
+ node->next = NULL;
+
+ prev = xchg(lock, node);
+ if (likely(prev == NULL)) {
+ /* Lock acquired */
+ node->locked = 1;
+ return;
+ }
+ ACCESS_ONCE(prev->next) = node;
+ /*
+ * Wait until the lock holder passes the lock down.
+ * Using smp_load_acquire() provides a memory barrier that
+ * ensures subsequent operations happen after the lock is acquired.
+ */
+ while (!(smp_load_acquire(&node->locked)))
+ arch_mutex_cpu_relax();
+}
+
+static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+{
+ struct mcs_spinlock *next = ACCESS_ONCE(node->next);
+
+ if (likely(!next)) {
+ /*
+ * Release the lock by setting it to NULL
+ */
+ if (cmpxchg(lock, node, NULL) == node)
+ return;
+ /* Wait until the next pointer is set */
+ while (!(next = ACCESS_ONCE(node->next)))
+ arch_mutex_cpu_relax();
+ }
+ /*
+ * Pass lock to next waiter.
+ * smp_store_release() provides a memory barrier to ensure
+ * all operations in the critical section has been completed
+ * before unlocking.
+ */
+ smp_store_release(&next->locked, 1);
+}
+
+#endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index d318193..c482e1d 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -46,6 +46,7 @@
* - detects multi-task circular deadlocks and prints out all affected
* locks and tasks (and only those tasks)
*/
+struct mcs_spinlock;
struct mutex {
/* 1: unlocked, 0: locked, negative: locked, possible waiters */
atomic_t count;
@@ -55,7 +56,7 @@ struct mutex {
struct task_struct *owner;
#endif
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
- void *spin_mlock; /* Spinner MCS lock */
+ struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */
#endif
#ifdef CONFIG_DEBUG_MUTEXES
const char *name;
@@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
# define arch_mutex_cpu_relax() cpu_relax()
#endif

-#endif
+#endif /* __LINUX_MUTEX_H */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index fbbd2ed..45fe1b5 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -25,6 +25,7 @@
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/debug_locks.h>
+#include <linux/mcs_spinlock.h>

/*
* In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
INIT_LIST_HEAD(&lock->wait_list);
mutex_clear_owner(lock);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
- lock->spin_mlock = NULL;
+ lock->mcs_lock = NULL;
#endif

debug_mutex_init(lock, name, key);
@@ -111,62 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
* more or less simultaneously, the spinners need to acquire a MCS lock
* first before spinning on the owner field.
*
- * We don't inline mspin_lock() so that perf can correctly account for the
- * time spent in this lock function.
*/
-struct mspin_node {
- struct mspin_node *next ;
- int locked; /* 1 if lock acquired */
-};
-#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock))
-
-static noinline
-void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
-{
- struct mspin_node *prev;
-
- /* Init node */
- node->locked = 0;
- node->next = NULL;
-
- prev = xchg(lock, node);
- if (likely(prev == NULL)) {
- /* Lock acquired */
- node->locked = 1;
- return;
- }
- ACCESS_ONCE(prev->next) = node;
- /*
- * Wait until the lock holder passes the lock down.
- * Using smp_load_acquire() provides a memory barrier that
- * ensures subsequent operations happen after the lock is acquired.
- */
- while (!(smp_load_acquire(&node->locked)))
- arch_mutex_cpu_relax();
-}
-
-static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
-{
- struct mspin_node *next = ACCESS_ONCE(node->next);
-
- if (likely(!next)) {
- /*
- * Release the lock by setting it to NULL
- */
- if (cmpxchg(lock, node, NULL) == node)
- return;
- /* Wait until the next pointer is set */
- while (!(next = ACCESS_ONCE(node->next)))
- arch_mutex_cpu_relax();
- }
- /*
- * Pass lock to next waiter.
- * smp_store_release() provides a memory barrier to ensure
- * all operations in the critical section has been completed
- * before unlocking.
- */
- smp_store_release(&next->locked, 1);
-}

/*
* Mutex spinning code migrated from kernel/sched/core.c
@@ -456,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,

for (;;) {
struct task_struct *owner;
- struct mspin_node node;
+ struct mcs_spinlock node;

if (use_ww_ctx && ww_ctx->acquired > 0) {
struct ww_mutex *ww;
@@ -478,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* If there's an owner, wait for it to either
* release the lock or go to sleep.
*/
- mspin_lock(MLOCK(lock), &node);
+ mcs_spin_lock(&lock->mcs_lock, &node);
owner = ACCESS_ONCE(lock->owner);
if (owner && !mutex_spin_on_owner(lock, owner)) {
- mspin_unlock(MLOCK(lock), &node);
+ mcs_spin_unlock(&lock->mcs_lock, &node);
goto slowpath;
}

@@ -496,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
}

mutex_set_owner(lock);
- mspin_unlock(MLOCK(lock), &node);
+ mcs_spin_unlock(&lock->mcs_lock, &node);
preempt_enable();
return 0;
}
- mspin_unlock(MLOCK(lock), &node);
+ mcs_spin_unlock(&lock->mcs_lock, &node);

/*
* When there's no owner, we might have preempted between the
--
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Tim Chen

unread,

Jan 20, 2014, 8:30:03 PM1/20/14

to

This update to the patch series reorganize the order of the patches
by fixing MCS lock barrier leakage first before making standalone
MCS lock and unlock functions. We also changed the hooks to architecture
specific mcs_spin_lock_contended and mcs_spin_lock_uncontended from
needing Kconfig to generic-asm and putting arch specific asm headers
as needed. Peter, please review the last patch and bless it with your
signed-off if it looks right.

This patch series fixes barriers of MCS lock and perform some optimizations.
Proper passing of the mcs lock is now done with smp_load_acquire() in
mcs_spin_lock() and smp_store_release() in mcs_spin_unlock. Note that
this is not sufficient to form a full memory barrier across cpus on
many architectures (except x86) for the mcs_unlock and mcs_lock pair.
For code that needs a full memory barrier with mcs_unlock and mcs_lock

pair, smp_mb__after_unlock_lock() should be used after mcs_lock.

Will also added hooks to allow for architecture specific
implementation and optimization of the of the contended paths of
lock and unlock of mcs_spin_lock and mcs_spin_unlock functions.

The original mcs lock code has potential leaks between critical sections, which
was not a problem when MCS was embedded within the mutex but needs
to be corrected when allowing the MCS lock to be used by itself for
other locking purposes. The MCS lock code was previously embedded in
the mutex.c and is now sepearted. This allows for easier reuse of MCS
lock in other places like rwsem and qrwlock.

Tim

v8:
1. Move order of patches by putting barrier corrections first.
2. Use generic-asm headers for hooking in arch specific mcs_spin_lock_contended
and mcs_spin_lock_uncontended function.
3. Some minor cleanup and comments added.

v7:
1. Update architecture specific hooks with concise architecture
specific arch_mcs_spin_lock_contended and arch_mcs_spin_lock_uncontended
functions.

v6:
1. Fix a bug of improper xchg_acquire and extra space in barrier
fixing patch.
2. Added extra hooks to allow for architecture specific version
of mcs_spin_lock and mcs_spin_unlock to be used.

v5:
1. Rework barrier correction patch. We now use smp_load_acquire()
in mcs_spin_lock() and smp_store_release() in
mcs_spin_unlock() to allow for architecture dependent barriers to be
automatically used. This is clean and will provide the right
barriers for all architecture.

v4:
1. Move patch series to the latest tip after v3.12

v3:
1. modified memory barriers to support non x86 architectures that have
weak memory ordering.

v2:
1. change export mcs_spin_lock as a GPL export symbol
2. corrected mcs_spin_lock to references

Jason Low (1):
MCS Lock: optimizations and extra comments

Peter Zijlstra (1):
MCS Lock: Allow architecture specific asm files to be used for
contended case

Tim Chen (2):
MCS Lock: Restructure the MCS lock defines and locking
MCS Lock: allow architectures to hook in to contended

Waiman Long (2):
MCS Lock: Barrier corrections
MCS Lock: Move mcs_lock/unlock function into its own

arch/alpha/include/asm/Kbuild | 1 +
arch/arc/include/asm/Kbuild | 1 +
arch/arm/include/asm/Kbuild | 1 +
arch/arm64/include/asm/Kbuild | 1 +
arch/avr32/include/asm/Kbuild | 1 +
arch/blackfin/include/asm/Kbuild | 1 +
arch/c6x/include/asm/Kbuild | 1 +
arch/cris/include/asm/Kbuild | 1 +
arch/frv/include/asm/Kbuild | 1 +
arch/hexagon/include/asm/Kbuild | 1 +
arch/ia64/include/asm/Kbuild | 2 +-
arch/m32r/include/asm/Kbuild | 1 +
arch/m68k/include/asm/Kbuild | 1 +
arch/metag/include/asm/Kbuild | 1 +
arch/microblaze/include/asm/Kbuild | 1 +
arch/mips/include/asm/Kbuild | 1 +
arch/mn10300/include/asm/Kbuild | 1 +
arch/openrisc/include/asm/Kbuild | 1 +
arch/parisc/include/asm/Kbuild | 1 +
arch/powerpc/include/asm/Kbuild | 2 +-
arch/s390/include/asm/Kbuild | 1 +
arch/score/include/asm/Kbuild | 1 +
arch/sh/include/asm/Kbuild | 1 +
arch/sparc/include/asm/Kbuild | 1 +
arch/tile/include/asm/Kbuild | 1 +
arch/um/include/asm/Kbuild | 1 +
arch/unicore32/include/asm/Kbuild | 1 +
arch/x86/include/asm/Kbuild | 1 +
arch/xtensa/include/asm/Kbuild | 1 +
include/asm-generic/mcs_spinlock.h | 13 +++++
include/linux/mcs_spinlock.h | 27 ++++++++++
include/linux/mutex.h | 5 +-
kernel/locking/Makefile | 6 +--
kernel/locking/mcs_spinlock.c | 103 +++++++++++++++++++++++++++++++++++++
kernel/locking/mutex.c | 60 +++------------------
35 files changed, 185 insertions(+), 60 deletions(-)
create mode 100644 include/asm-generic/mcs_spinlock.h
create mode 100644 include/linux/mcs_spinlock.h
create mode 100644 kernel/locking/mcs_spinlock.c

Tim Chen

unread,

Jan 20, 2014, 8:30:02 PM1/20/14

to

From: Waiman Long <Waima...@hp.com>

Create a new mcs_spinlock.c file to contain the
mcs_spin_lock() and mcs_spin_unlock() function.

Signed-off-by: Waiman Long <Waima...@hp.com>
Signed-off-by: Tim Chen <tim.c...@linux.intel.com>
---
include/linux/mcs_spinlock.h | 77 ++--------------------
kernel/locking/Makefile | 6 +-
.../locking/mcs_spinlock.c | 27 ++++----
3 files changed, 18 insertions(+), 92 deletions(-)
copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (82%)

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
index bfe84c6..d54bb23 100644
--- a/include/linux/mcs_spinlock.h
+++ b/include/linux/mcs_spinlock.h
@@ -17,78 +17,9 @@ struct mcs_spinlock {

int locked; /* 1 if lock acquired */

};

-/*
- * Note: the smp_load_acquire/smp_store_release pair is not
- * sufficient to form a full memory barrier across
- * cpus for many architectures (except x86) for mcs_unlock and mcs_lock.
- * For applications that need a full barrier across multiple cpus
- * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be
- * used after mcs_lock.
- */
-
-/*
- * In order to acquire the lock, the caller should declare a local node and
- * pass a reference of the node to this function in addition to the lock.
- * If the lock has already been acquired, then this will proceed to spin
- * on this node->locked until the previous lock holder sets the node->locked
- * in mcs_spin_unlock().
- *
- * We don't inline mcs_spin_lock() so that perf can correctly account for the

- * time spent in this lock function.

- */
-static noinline
-void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
-{
- struct mcs_spinlock *prev;

-
- /* Init node */
- node->locked = 0;
- node->next = NULL;
-
- prev = xchg(lock, node);
- if (likely(prev == NULL)) {

- /* Lock acquired, don't need to set node->locked to 1
- * as lock owner and other contenders won't check this value.
- * If a debug mode is needed to audit lock status, then
- * set node->locked value here.
- */

- return;
- }
- ACCESS_ONCE(prev->next) = node;
- /*
- * Wait until the lock holder passes the lock down.
- * Using smp_load_acquire() provides a memory barrier that
- * ensures subsequent operations happen after the lock is acquired.
- */
- while (!(smp_load_acquire(&node->locked)))
- arch_mutex_cpu_relax();
-}
-

-/*
- * Releases the lock. The caller should pass in the corresponding node that
- * was used to acquire the lock.
- */
-static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
-{
- struct mcs_spinlock *next = ACCESS_ONCE(node->next);

-
- if (likely(!next)) {
- /*
- * Release the lock by setting it to NULL
- */

- if (likely(cmpxchg(lock, node, NULL) == node))

- return;
- /* Wait until the next pointer is set */
- while (!(next = ACCESS_ONCE(node->next)))
- arch_mutex_cpu_relax();
- }
- /*
- * Pass lock to next waiter.
- * smp_store_release() provides a memory barrier to ensure
- * all operations in the critical section has been completed
- * before unlocking.
- */
- smp_store_release(&next->locked, 1);
-}

+extern
+void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
+extern
+void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node);

#endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index baab8e5..20d9d5c 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o
ifeq ($(CONFIG_PROC_FS),y)
obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
endif
-obj-$(CONFIG_SMP) += spinlock.o
-obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
+obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o
+obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o
obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
-obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
+obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c
similarity index 82%
copy from include/linux/mcs_spinlock.h
copy to kernel/locking/mcs_spinlock.c
index bfe84c6..c3ee9cf 100644
--- a/include/linux/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.c
@@ -1,7 +1,5 @@
/*
- * MCS lock defines
- *
- * This file contains the main data structure and API definitions of MCS lock.
+ * MCS lock
*

* The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock

* with the desirable properties of being fair, and with each cpu trying

@@ -9,13 +7,14 @@

* It avoids expensive cache bouncings that common test-and-set spin-lock

* implementations incur.
*/
-#ifndef __LINUX_MCS_SPINLOCK_H
-#define __LINUX_MCS_SPINLOCK_H

-struct mcs_spinlock {
- struct mcs_spinlock *next;

- int locked; /* 1 if lock acquired */
-};

+#include <asm/barrier.h>
+#include <asm/cmpxchg.h>
+#include <asm/processor.h>
+#include <linux/compiler.h>
+#include <linux/mcs_spinlock.h>
+#include <linux/mutex.h>
+#include <linux/export.h>

/*

* Note: the smp_load_acquire/smp_store_release pair is not

@@ -32,11 +31,7 @@ struct mcs_spinlock {
* If the lock has already been acquired, then this will proceed to spin
* on this node->locked until the previous lock holder sets the node->locked
* in mcs_spin_unlock().
- *
- * We don't inline mcs_spin_lock() so that perf can correctly account for the

- * time spent in this lock function.
*/

-static noinline

void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)

{
struct mcs_spinlock *prev;
@@ -63,12 +58,13 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
while (!(smp_load_acquire(&node->locked)))
arch_mutex_cpu_relax();
}
+EXPORT_SYMBOL_GPL(mcs_spin_lock);

/*
* Releases the lock. The caller should pass in the corresponding node that
* was used to acquire the lock.
*/
-static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
{

struct mcs_spinlock *next = ACCESS_ONCE(node->next);

@@ -90,5 +86,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
*/
smp_store_release(&next->locked, 1);
}
-
-#endif /* __LINUX_MCS_SPINLOCK_H */
+EXPORT_SYMBOL_GPL(mcs_spin_unlock);

Tim Chen

unread,

Jan 20, 2014, 8:30:03 PM1/20/14

to

From: Waiman Long <Waima...@hp.com>

This patch corrects the way memory barriers are used in the MCS lock
with smp_load_acquire and smp_store_release fucnction.
It removes ones that are not needed.

Suggested-by: Michel Lespinasse <wal...@google.com>
Signed-off-by: Waiman Long <Waima...@hp.com>
Signed-off-by: Jason Low <jason...@hp.com>

Signed-off-by: Tim Chen <tim.c...@linux.intel.com>
---

kernel/locking/mutex.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 4dd6e4c..fbbd2ed 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -136,9 +136,12 @@ void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
return;
}
ACCESS_ONCE(prev->next) = node;
- smp_wmb();
- /* Wait until the lock holder passes the lock down */
- while (!ACCESS_ONCE(node->locked))

+ /*
+ * Wait until the lock holder passes the lock down.
+ * Using smp_load_acquire() provides a memory barrier that
+ * ensures subsequent operations happen after the lock is acquired.
+ */
+ while (!(smp_load_acquire(&node->locked)))

arch_mutex_cpu_relax();
}

@@ -156,8 +159,13 @@ static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)

while (!(next = ACCESS_ONCE(node->next)))

arch_mutex_cpu_relax();
}
- ACCESS_ONCE(next->locked) = 1;
- smp_wmb();

+ /*
+ * Pass lock to next waiter.
+ * smp_store_release() provides a memory barrier to ensure
+ * all operations in the critical section has been completed
+ * before unlocking.
+ */
+ smp_store_release(&next->locked, 1);
}

/*

Tim Chen

unread,

Jan 20, 2014, 8:30:03 PM1/20/14

to

From: Will Deacon <will....@arm.com>

When contended, architectures may be able to reduce the polling overhead
in ways which aren't expressible using a simple relax() primitive.

This patch allows architectures to hook into the mcs_{lock,unlock}
functions for the contended cases only.

Signed-off-by: Will Deacon <will....@arm.com>

Signed-off-by: Tim Chen <tim.c...@linux.intel.com>
---

kernel/locking/mcs_spinlock.c | 42 ++++++++++++++++++++++++++++--------------
1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
index c3ee9cf..e12ed32 100644
--- a/kernel/locking/mcs_spinlock.c
+++ b/kernel/locking/mcs_spinlock.c
@@ -16,6 +16,28 @@
#include <linux/mutex.h>
#include <linux/export.h>

+#ifndef arch_mcs_spin_lock_contended
+/*
+ * Using smp_load_acquire() provides a memory barrier that ensures
+ * subsequent operations happen after the lock is acquired.
+ */
+#define arch_mcs_spin_lock_contended(l) \
+do { \
+ while (!(smp_load_acquire(l))) \
+ arch_mutex_cpu_relax(); \
+} while (0)
+#endif
+
+#ifndef arch_mcs_spin_unlock_contended
+/*
+ * smp_store_release() provides a memory barrier to ensure all
+ * operations in the critical section has been completed before
+ * unlocking.
+ */
+#define arch_mcs_spin_unlock_contended(l) \
+ smp_store_release((l), 1)
+#endif
+

/*
* Note: the smp_load_acquire/smp_store_release pair is not

* sufficient to form a full memory barrier across

@@ -50,13 +72,9 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)

return;
}
ACCESS_ONCE(prev->next) = node;

- /*
- * Wait until the lock holder passes the lock down.
- * Using smp_load_acquire() provides a memory barrier that
- * ensures subsequent operations happen after the lock is acquired.
- */
- while (!(smp_load_acquire(&node->locked)))
- arch_mutex_cpu_relax();

+
+ /* Wait until the lock holder passes the lock down. */
+ arch_mcs_spin_lock_contended(&node->locked);
}
EXPORT_SYMBOL_GPL(mcs_spin_lock);

@@ -78,12 +96,8 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)

while (!(next = ACCESS_ONCE(node->next)))
arch_mutex_cpu_relax();
}

- /*
- * Pass lock to next waiter.
- * smp_store_release() provides a memory barrier to ensure
- * all operations in the critical section has been completed
- * before unlocking.
- */
- smp_store_release(&next->locked, 1);

+
+ /* Pass lock to next waiter. */
+ arch_mcs_spin_unlock_contended(&next->locked);
}
EXPORT_SYMBOL_GPL(mcs_spin_unlock);

Tim Chen

unread,

Jan 20, 2014, 8:30:03 PM1/20/14

to

From: Jason Low <jason...@hp.com>

Remove unnecessary operation to assign locked status to 1 if lock is
acquired without contention as this value will not be checked by lock
holder again and other potential lock contenders will not be looking at
their own lock status.

Make the cmpxchg(lock, node, NULL) == node check in mcs_spin_unlock()
likely() as it is likely that a race did not occur most of the time.

Also add in more comments describing how the local node is used in MCS locks.

Reviewed-by: Tim Chen <tim.c...@linux.intel.com>
Signed-off-by: Jason Low <jason...@hp.com>

Signed-off-by: Tim Chen <tim.c...@linux.intel.com>
---

include/linux/mcs_spinlock.h | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
index 23912cb..bfe84c6 100644
--- a/include/linux/mcs_spinlock.h
+++ b/include/linux/mcs_spinlock.h
@@ -27,6 +27,12 @@ struct mcs_spinlock {
*/

/*
+ * In order to acquire the lock, the caller should declare a local node and
+ * pass a reference of the node to this function in addition to the lock.
+ * If the lock has already been acquired, then this will proceed to spin
+ * on this node->locked until the previous lock holder sets the node->locked
+ * in mcs_spin_unlock().
+ *

* We don't inline mcs_spin_lock() so that perf can correctly account for the

* time spent in this lock function.
*/

@@ -41,8 +47,11 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)

prev = xchg(lock, node);
if (likely(prev == NULL)) {
- /* Lock acquired */
- node->locked = 1;
+ /* Lock acquired, don't need to set node->locked to 1
+ * as lock owner and other contenders won't check this value.
+ * If a debug mode is needed to audit lock status, then
+ * set node->locked value here.
+ */

return;
}
ACCESS_ONCE(prev->next) = node;

@@ -55,6 +64,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
arch_mutex_cpu_relax();
}

+/*
+ * Releases the lock. The caller should pass in the corresponding node that
+ * was used to acquire the lock.
+ */
static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)

{
struct mcs_spinlock *next = ACCESS_ONCE(node->next);

@@ -63,7 +76,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
/*

* Release the lock by setting it to NULL

*/
- if (cmpxchg(lock, node, NULL) == node)

+ if (likely(cmpxchg(lock, node, NULL) == node))
return;

/* Wait until the next pointer is set */

while (!(next = ACCESS_ONCE(node->next)))

Tim Chen

unread,

Jan 20, 2014, 8:30:03 PM1/20/14

to

From: Peter Zijlstra <pet...@infradead.org>

This patch allows each architecture to add its specific assembly optimized
arch_mcs_spin_lock_contended and arch_mcs_spinlock_uncontended for

MCS lock and unlock functions.

Signed-off-by: Tim Chen <tim.c...@linux.intel.com>
---

include/asm-generic/mcs_spinlock.h | 13 +++++++++++++
include/linux/mcs_spinlock.h | 2 ++
31 files changed, 44 insertions(+), 2 deletions(-)
create mode 100644 include/asm-generic/mcs_spinlock.h

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index f01fb50..14cbbbc 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -4,3 +4,4 @@ generic-y += clkdev.h
generic-y += exec.h
generic-y += trace_clock.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 9ae21c1..c0773a5 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -48,3 +48,4 @@ generic-y += user.h
generic-y += vga.h
generic-y += xor.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index c38b58c..c68cfdd 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -34,3 +34,4 @@ generic-y += timex.h
generic-y += trace_clock.h
generic-y += unaligned.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 519f89f..24a3c10 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -51,3 +51,4 @@ generic-y += user.h
generic-y += vga.h
generic-y += xor.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index 658001b..466e13d 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -18,3 +18,4 @@ generic-y += sections.h
generic-y += topology.h
generic-y += trace_clock.h
generic-y += xor.h
+generic-y += mcs_spinlock.h
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index f2b4347..0bd1c5c 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -45,3 +45,4 @@ generic-y += unaligned.h
generic-y += user.h
generic-y += xor.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index fc0b3c3..21d7100 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -57,3 +57,4 @@ generic-y += user.h
generic-y += vga.h
generic-y += xor.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 199b1a9..c571cc1 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -13,3 +13,4 @@ generic-y += trace_clock.h
generic-y += vga.h
generic-y += xor.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 74742dc..ccca92e 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
generic-y += exec.h
generic-y += trace_clock.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index ada843c..553077d 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -55,3 +55,4 @@ generic-y += ucontext.h
generic-y += unaligned.h
generic-y += xor.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index f93ee08..25aed55 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -4,4 +4,4 @@ generic-y += exec.h
generic-y += kvm_para.h
generic-y += trace_clock.h
generic-y += preempt.h
-generic-y += vtime.h
\ No newline at end of file
+generic-y += vtime.hgeneric-y += mcs_spinlock.h
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index 2b58c5f..d64fdd1 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -4,3 +4,4 @@ generic-y += exec.h
generic-y += module.h
generic-y += trace_clock.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index a5d27f2..1f4d44c 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -32,3 +32,4 @@ generic-y += types.h
generic-y += word-at-a-time.h
generic-y += xor.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index 84d0c1d..ae0ae6e 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -53,3 +53,4 @@ generic-y += user.h
generic-y += vga.h
generic-y += xor.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index a824265..6eb70bd 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -5,3 +5,4 @@ generic-y += exec.h
generic-y += trace_clock.h
generic-y += syscalls.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 1acbb8b..c718d63 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -14,3 +14,4 @@ generic-y += trace_clock.h
generic-y += preempt.h
generic-y += ucontext.h
generic-y += xor.h
+generic-y += mcs_spinlock.h
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 032143e..1393ae5 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -4,3 +4,4 @@ generic-y += clkdev.h
generic-y += exec.h
generic-y += trace_clock.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index da1951a..7e049d3 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -69,3 +69,4 @@ generic-y += vga.h
generic-y += word-at-a-time.h
generic-y += xor.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 34b0be4..ebe1649 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -6,3 +6,4 @@ generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \
poll.h xor.h clkdev.h exec.h
generic-y += trace_clock.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index d8f9d2f..426001b 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -3,4 +3,4 @@ generic-y += clkdev.h
generic-y += rwsem.h
generic-y += trace_clock.h
generic-y += preempt.h
-generic-y += vtime.h
\ No newline at end of file
+generic-y += vtime.hgeneric-y += mcs_spinlock.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 7a5288f..8508913 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -3,3 +3,4 @@
generic-y += clkdev.h
generic-y += trace_clock.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index fe7471e..8e39afc 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -6,3 +6,4 @@ generic-y += clkdev.h
generic-y += trace_clock.h
generic-y += xor.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 231efbb..1aed131 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -35,3 +35,4 @@ generic-y += trace_clock.h
generic-y += ucontext.h
generic-y += xor.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index bf39066..8843299 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -17,3 +17,4 @@ generic-y += trace_clock.h
generic-y += types.h
generic-y += word-at-a-time.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 22f3bd1..152fc48 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -39,3 +39,4 @@ generic-y += trace_clock.h
generic-y += types.h
generic-y += xor.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index fdde187..620d729 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -4,3 +4,4 @@ generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h
generic-y += switch_to.h clkdev.h
generic-y += trace_clock.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 00045cb..cd7822c 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -61,3 +61,4 @@ generic-y += user.h
generic-y += vga.h
generic-y += xor.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 7f66985..a8fee07 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,3 +5,4 @@ genhdr-y += unistd_64.h
genhdr-y += unistd_x32.h

generic-y += clkdev.h
+generic-y += mcs_spinlock.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 228d6ae..9653e5c 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -29,3 +29,4 @@ generic-y += topology.h
generic-y += trace_clock.h
generic-y += xor.h
generic-y += preempt.h
+generic-y += mcs_spinlock.h
diff --git a/include/asm-generic/mcs_spinlock.h b/include/asm-generic/mcs_spinlock.h
new file mode 100644
index 0000000..10cd4ff
--- /dev/null
+++ b/include/asm-generic/mcs_spinlock.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_MCS_SPINLOCK_H
+#define __ASM_MCS_SPINLOCK_H
+
+/*
+ * Architectures can define their own:
+ *
+ * arch_mcs_spin_lock_contended(l)
+ * arch_mcs_spin_unlock_contended(l)
+ *
+ * See kernel/locking/mcs_spinlock.c.
+ */
+
+#endif /* __ASM_MCS_SPINLOCK_H */
diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
index d54bb23..164298a 100644
--- a/include/linux/mcs_spinlock.h
+++ b/include/linux/mcs_spinlock.h
@@ -12,6 +12,8 @@
#ifndef __LINUX_MCS_SPINLOCK_H
#define __LINUX_MCS_SPINLOCK_H

+#include <asm/mcs_spinlock.h>
+
struct mcs_spinlock {
struct mcs_spinlock *next;

int locked; /* 1 if lock acquired */

Peter Zijlstra

unread,

Jan 21, 2014, 5:20:01 AM1/21/14

to

On Mon, Jan 20, 2014 at 05:24:31PM -0800, Tim Chen wrote:
> +EXPORT_SYMBOL_GPL(mcs_spin_lock);
> +EXPORT_SYMBOL_GPL(mcs_spin_unlock);

Do we really need the EXPORTs? The only user so far is mutex and that's
core code. The other planned users are rwsems and rwlocks, for both it
would be in the slow path, which is also core code.

We should generally only add EXPORTs once theres a need.

Peter Zijlstra

unread,

Jan 21, 2014, 5:20:02 AM1/21/14

to

On Mon, Jan 20, 2014 at 05:24:28PM -0800, Tim Chen wrote:
> @@ -41,8 +47,11 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>
> prev = xchg(lock, node);
> if (likely(prev == NULL)) {
> - /* Lock acquired */
> - node->locked = 1;
> + /* Lock acquired, don't need to set node->locked to 1
> + * as lock owner and other contenders won't check this value.
> + * If a debug mode is needed to audit lock status, then
> + * set node->locked value here.
> + */

Fail in comment style.

> return;
> }
> ACCESS_ONCE(prev->next) = node;

Peter Zijlstra

unread,

Jan 21, 2014, 5:30:02 AM1/21/14

to

On Mon, Jan 20, 2014 at 05:24:28PM -0800, Tim Chen wrote:

> From: Jason Low <jason...@hp.com>
>
> Remove unnecessary operation to assign locked status to 1 if lock is
> acquired without contention as this value will not be checked by lock
> holder again and other potential lock contenders will not be looking at
> their own lock status.

Ha, read that again :-)

Peter Zijlstra

unread,

Jan 21, 2014, 5:30:02 AM1/21/14

to

On Mon, Jan 20, 2014 at 05:24:39PM -0800, Tim Chen wrote:
> diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
> index f01fb50..14cbbbc 100644
> --- a/arch/alpha/include/asm/Kbuild
> +++ b/arch/alpha/include/asm/Kbuild
> @@ -4,3 +4,4 @@ generic-y += clkdev.h
> generic-y += exec.h
> generic-y += trace_clock.h
> generic-y += preempt.h
> +generic-y += mcs_spinlock.h

m < p

> --- a/arch/ia64/include/asm/Kbuild
> +++ b/arch/ia64/include/asm/Kbuild
> @@ -4,4 +4,4 @@ generic-y += exec.h
> generic-y += kvm_para.h
> generic-y += trace_clock.h
> generic-y += preempt.h
> -generic-y += vtime.h
> \ No newline at end of file
> +generic-y += vtime.hgeneric-y += mcs_spinlock.h

EOL fail

Tim Chen

unread,

Jan 21, 2014, 12:40:01 PM1/21/14

to

On Tue, 2014-01-21 at 11:26 +0100, Peter Zijlstra wrote:
> On Mon, Jan 20, 2014 at 05:24:28PM -0800, Tim Chen wrote:
> > From: Jason Low <jason...@hp.com>
> >
> > Remove unnecessary operation to assign locked status to 1 if lock is
> > acquired without contention as this value will not be checked by lock
> > holder again and other potential lock contenders will not be looking at
> > their own lock status.

should be "lock contenders will not be looking at lock holder's
lock status"

Thanks for catching it.

Tim

Tim Chen

unread,

Jan 21, 2014, 5:30:02 PM1/21/14

to

On Tue, 2014-01-21 at 11:20 +0100, Peter Zijlstra wrote:
> On Mon, Jan 20, 2014 at 05:24:39PM -0800, Tim Chen wrote:
> > diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
> > index f01fb50..14cbbbc 100644
> > --- a/arch/alpha/include/asm/Kbuild
> > +++ b/arch/alpha/include/asm/Kbuild
> > @@ -4,3 +4,4 @@ generic-y += clkdev.h
> > generic-y += exec.h
> > generic-y += trace_clock.h
> > generic-y += preempt.h
> > +generic-y += mcs_spinlock.h
>
> m < p
>
> > --- a/arch/ia64/include/asm/Kbuild
> > +++ b/arch/ia64/include/asm/Kbuild
> > @@ -4,4 +4,4 @@ generic-y += exec.h
> > generic-y += kvm_para.h
> > generic-y += trace_clock.h
> > generic-y += preempt.h
> > -generic-y += vtime.h
> > \ No newline at end of file

The no newline compliant apparently caused by
existing Kbuild file.

Tim