I tried to independently create a case to report when the race happens
(see patch below). It does crash with my setup, during boot, with a
default config. So I'm very certain that the tool correctly reported
concurrency here.
> But yes, at least all the rb-tree and list crud should be re-initialized
> for the object after copy.
Ok thanks for the clarification.
-- Marco
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index c4159bcc05d9..641981815f60 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2379,7 +2379,7 @@ static int __init eventpoll_init(void)
* We can have many thousands of epitems, so prevent this from
* using an extra cache line on 64-bit (and smaller) CPUs
*/
- BUILD_BUG_ON(sizeof(void *) <= 8 && sizeof(struct epitem) > 128);
+// BUILD_BUG_ON(sizeof(void *) <= 8 && sizeof(struct epitem) > 128);
/* Allocates slab cache used to allocate "struct epitem" items */
epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem),
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 1fd61a9af45c..02c33439bbeb 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -20,11 +20,14 @@
#include <linux/kernel.h>
#include <linux/stddef.h>
#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
struct rb_node {
unsigned long __rb_parent_color;
struct rb_node *rb_right;
struct rb_node *rb_left;
+ spinlock_t debug_spinlock;
+ atomic_t race_count;
} __attribute__((aligned(sizeof(long))));
/* The alignment might seem pointless, but allegedly CRIS needs it */
@@ -71,6 +74,7 @@ static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
{
node->__rb_parent_color = (unsigned long)parent;
node->rb_left = node->rb_right = NULL;
+ spin_lock_init(&node->debug_spinlock);
*rb_link = node;
}
@@ -80,6 +84,7 @@ static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent
{
node->__rb_parent_color = (unsigned long)parent;
node->rb_left = node->rb_right = NULL;
+ spin_lock_init(&node->debug_spinlock);
rcu_assign_pointer(*rb_link, node);
}
diff --git a/kernel/fork.c b/kernel/fork.c
index bcdf53125210..5894bc339875 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
#include <linux/livepatch.h>
#include <linux/thread_info.h>
#include <linux/stackleak.h>
+#include <linux/delay.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -356,6 +357,10 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (new) {
+ if (spin_is_locked(&orig->shared.rb.debug_spinlock)) {
+ atomic_inc(&orig->shared.rb.race_count);
+ pr_err("race in %s\n", __func__);
+ }
*new = *orig;
INIT_LIST_HEAD(&new->anon_vma_chain);
}
diff --git a/lib/rbtree.c b/lib/rbtree.c
index abc86c6a3177..89526c90d29f 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -11,6 +11,7 @@
#include <linux/rbtree_augmented.h>
#include <linux/export.h>
+#include <linux/delay.h>
/*
* red-black trees properties:
http://en.wikipedia.org/wiki/Rbtree
@@ -76,6 +77,15 @@ __rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
struct rb_root *root, int color)
{
struct rb_node *parent = rb_parent(old);
+ unsigned long flags;
+ int race_count;
+
+ spin_lock_irqsave(&new->debug_spinlock, flags);
+ race_count = atomic_read(&new->race_count);
+ udelay(100);
+ BUG_ON(race_count != atomic_read(&new->race_count));
+ spin_unlock_irqrestore(&new->debug_spinlock, flags);
+
new->__rb_parent_color = old->__rb_parent_color;
rb_set_parent_color(old, new, color);
__rb_change_child(old, new, parent, root);