Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH tip/core/rcu 0/13] rcu: add lockdep checking, doc update, dyntick GP acceleration

0 views
Skip to first unread message

Paul E. McKenney

unread,
Feb 11, 2010, 7:10:02 PM2/11/10
to
Hello!

The first 11 patches extend lockdep to check for proper protection
of rcu_dereference(), described in http://lwn.net/Articles/371986/,
and apply these lockdep extensions in a number of areas in the kernel.
There are likely more such changes required, as I am limited to those
located by the systems I have access to.

The twelfth patch is a documentation update, and the last patch
accelerates grace periods when the current CPU is the last
non-dyntick-idle CPU in the system, which is important for some
multi-core battery-powered devices.

b/Documentation/RCU/00-INDEX | 2
b/Documentation/RCU/RTFP.txt | 6 +
b/Documentation/RCU/checklist.txt | 34 ++++++----
b/Documentation/RCU/lockdep.txt | 67 ++++++++++++++++++++
b/Documentation/RCU/whatisRCU.txt | 14 ++--
b/fs/file.c | 2
b/fs/proc/array.c | 2
b/fs/proc/base.c | 6 +
b/include/linux/cgroup.h | 5 +
b/include/linux/cpumask.h | 14 ++++
b/include/linux/cred.h | 2
b/include/linux/fdtable.h | 9 +-
b/include/linux/lockdep.h | 4 +
b/include/linux/rculist.h | 14 ++--
b/include/linux/rculist_nulls.h | 5 -
b/include/linux/rcupdate.h | 126 ++++++++++++++++++++++++++++++++++----
b/include/linux/rtnetlink.h | 3
b/include/linux/srcu.h | 87 +++++++++++++++++++++++++-
b/include/net/addrconf.h | 4 -
b/init/Kconfig | 16 ++++
b/init/main.c | 2
b/kernel/cgroup.c | 12 +++
b/kernel/exit.c | 14 +++-
b/kernel/fork.c | 1
b/kernel/lockdep.c | 19 +++++
b/kernel/notifier.c | 6 -
b/kernel/pid.c | 2
b/kernel/rcupdate.c | 10 +++
b/kernel/rcutorture.c | 12 +++
b/kernel/rcutree.c | 5 -
b/kernel/rcutree_plugin.h | 70 ++++++++++++++++++++-
b/kernel/sched.c | 12 ++-
b/kernel/srcu.c | 50 +++++++++------
b/lib/Kconfig.debug | 12 +++
b/lib/debug_locks.c | 2
b/lib/idr.c | 9 +-
b/lib/radix-tree.c | 25 +++----
b/net/core/dev.c | 2
b/net/core/filter.c | 6 -
b/net/core/rtnetlink.c | 8 ++
b/net/core/sock.c | 3
b/net/decnet/dn_route.c | 14 ++--
b/net/ipv4/route.c | 14 ++--
b/net/packet/af_packet.c | 3
b/security/keys/gc.c | 3
b/security/keys/keyring.c | 5 -
include/linux/rcupdate.h | 45 ++++++++++---
include/linux/srcu.h | 9 ++
48 files changed, 657 insertions(+), 140 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Paul E. McKenney

unread,
Feb 11, 2010, 7:10:01 PM2/11/10
to
Apply lockdep-ified RCU primitives to key_gc_keyring() and
keyring_destroy().

Cc: David Howells <dhow...@redhat.com>
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---
security/keys/gc.c | 3 ++-
security/keys/keyring.c | 4 +++-
2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/security/keys/gc.c b/security/keys/gc.c
index 4770be3..1990231 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -77,7 +77,8 @@ static bool key_gc_keyring(struct key *keyring, time_t limit)
goto dont_gc;

/* scan the keyring looking for dead keys */
- klist = rcu_dereference(keyring->payload.subscriptions);
+ klist = rcu_dereference_check(keyring->payload.subscriptions,
+ lockdep_is_held(&key_serial_lock));
if (!klist)
goto dont_gc;

diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 8ec0274..e814d21 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -151,7 +151,9 @@ static void keyring_destroy(struct key *keyring)
write_unlock(&keyring_name_lock);
}

- klist = rcu_dereference(keyring->payload.subscriptions);
+ klist = rcu_dereference_check(keyring->payload.subscriptions,
+ rcu_read_lock_held() ||
+ atomic_read(&keyring->usage) == 0);
if (klist) {
for (loop = klist->nkeys - 1; loop >= 0; loop--)
key_put(klist->keys[loop]);
--
1.6.6

Paul E. McKenney

unread,
Feb 11, 2010, 7:10:02 PM2/11/10
to
Because the radix tree is used with many different locking designs, we
cannot do any effective checking without changing the radix-tree APIs.
It might make sense to do this later, but only if the RCU lockdep checking
proves itself sufficiently valuable.

Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---

lib/radix-tree.c | 24 ++++++++++++------------
1 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 92cdd99..6b9670d 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -364,7 +364,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
unsigned int height, shift;
struct radix_tree_node *node, **slot;

- node = rcu_dereference(root->rnode);
+ node = rcu_dereference_raw(root->rnode);
if (node == NULL)
return NULL;

@@ -384,7 +384,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
do {
slot = (struct radix_tree_node **)
(node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
- node = rcu_dereference(*slot);
+ node = rcu_dereference_raw(*slot);
if (node == NULL)
return NULL;

@@ -568,7 +568,7 @@ int radix_tree_tag_get(struct radix_tree_root *root,
if (!root_tag_get(root, tag))
return 0;

- node = rcu_dereference(root->rnode);
+ node = rcu_dereference_raw(root->rnode);
if (node == NULL)
return 0;

@@ -602,7 +602,7 @@ int radix_tree_tag_get(struct radix_tree_root *root,
BUG_ON(ret && saw_unset_tag);
return !!ret;
}
- node = rcu_dereference(node->slots[offset]);
+ node = rcu_dereference_raw(node->slots[offset]);
shift -= RADIX_TREE_MAP_SHIFT;
height--;
}
@@ -711,7 +711,7 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
}

shift -= RADIX_TREE_MAP_SHIFT;
- slot = rcu_dereference(slot->slots[i]);
+ slot = rcu_dereference_raw(slot->slots[i]);
if (slot == NULL)
goto out;
}
@@ -758,7 +758,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
unsigned long cur_index = first_index;
unsigned int ret;

- node = rcu_dereference(root->rnode);
+ node = rcu_dereference_raw(root->rnode);
if (!node)
return 0;

@@ -787,7 +787,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
slot = *(((void ***)results)[ret + i]);
if (!slot)
continue;
- results[ret + nr_found] = rcu_dereference(slot);
+ results[ret + nr_found] = rcu_dereference_raw(slot);
nr_found++;
}
ret += nr_found;
@@ -826,7 +826,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
unsigned long cur_index = first_index;
unsigned int ret;

- node = rcu_dereference(root->rnode);
+ node = rcu_dereference_raw(root->rnode);
if (!node)
return 0;

@@ -915,7 +915,7 @@ __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
}
}
shift -= RADIX_TREE_MAP_SHIFT;
- slot = rcu_dereference(slot->slots[i]);
+ slot = rcu_dereference_raw(slot->slots[i]);
if (slot == NULL)
break;
}
@@ -951,7 +951,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
if (!root_tag_get(root, tag))
return 0;

- node = rcu_dereference(root->rnode);
+ node = rcu_dereference_raw(root->rnode);
if (!node)
return 0;

@@ -980,7 +980,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
slot = *(((void ***)results)[ret + i]);
if (!slot)
continue;
- results[ret + nr_found] = rcu_dereference(slot);
+ results[ret + nr_found] = rcu_dereference_raw(slot);
nr_found++;
}
ret += nr_found;
@@ -1020,7 +1020,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
if (!root_tag_get(root, tag))
return 0;

- node = rcu_dereference(root->rnode);
+ node = rcu_dereference_raw(root->rnode);
if (!node)
return 0;

--
1.6.6

Paul E. McKenney

unread,
Feb 11, 2010, 7:10:03 PM2/11/10
to
Update the rcu_dereference() usages to take advantage of the new
lockdep-based checking.

Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---

include/linux/cgroup.h | 5 ++++-
include/linux/cred.h | 2 +-
init/main.c | 2 ++
kernel/cgroup.c | 12 ++++++++++++
kernel/exit.c | 14 +++++++++++---
kernel/fork.c | 1 +
kernel/notifier.c | 6 +++---
kernel/pid.c | 2 +-
kernel/sched.c | 11 ++++++++---
9 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 0008dee..c9bbcb2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -28,6 +28,7 @@ struct css_id;
extern int cgroup_init_early(void);
extern int cgroup_init(void);
extern void cgroup_lock(void);
+extern int cgroup_lock_is_held(void);
extern bool cgroup_lock_live_group(struct cgroup *cgrp);
extern void cgroup_unlock(void);
extern void cgroup_fork(struct task_struct *p);
@@ -486,7 +487,9 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state(
static inline struct cgroup_subsys_state *task_subsys_state(
struct task_struct *task, int subsys_id)
{
- return rcu_dereference(task->cgroups->subsys[subsys_id]);
+ return rcu_dereference_check(task->cgroups->subsys[subsys_id],
+ rcu_read_lock_held() ||
+ cgroup_lock_is_held());
}

static inline struct cgroup* task_cgroup(struct task_struct *task,
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4e3387a..4db09f8 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -280,7 +280,7 @@ static inline void put_cred(const struct cred *_cred)
* task or by holding tasklist_lock to prevent it from being unlinked.
*/
#define __task_cred(task) \
- ((const struct cred *)(rcu_dereference((task)->real_cred)))
+ ((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock))))

/**
* get_task_cred - Get another task's objective credentials
diff --git a/init/main.c b/init/main.c
index dac44a9..ea6280c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -416,7 +416,9 @@ static noinline void __init_refok rest_init(void)
kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
numa_default_policy();
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
+ rcu_read_lock();
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
+ rcu_read_unlock();
unlock_kernel();

/*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1fbcc74..1b1373c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -166,6 +166,18 @@ static DEFINE_SPINLOCK(hierarchy_id_lock);
*/
static int need_forkexit_callback __read_mostly;

+#ifdef CONFIG_PROVE_LOCKING
+int cgroup_lock_is_held(void)
+{
+ return lockdep_is_held(&cgroup_mutex);
+}
+#else /* #ifdef CONFIG_PROVE_LOCKING */
+int cgroup_lock_is_held(void)
+{
+ return mutex_is_locked(&cgroup_mutex);
+}
+#endif /* #else #ifdef CONFIG_PROVE_LOCKING */
+
/* convenient tests for these bits */
inline int cgroup_is_removed(const struct cgroup *cgrp)
{
diff --git a/kernel/exit.c b/kernel/exit.c
index 546774a..45ed043 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -85,7 +85,9 @@ static void __exit_signal(struct task_struct *tsk)
BUG_ON(!sig);
BUG_ON(!atomic_read(&sig->count));

- sighand = rcu_dereference(tsk->sighand);
+ sighand = rcu_dereference_check(tsk->sighand,
+ rcu_read_lock_held() ||
+ lockdep_is_held(&tasklist_lock));
spin_lock(&sighand->siglock);

posix_cpu_timers_exit(tsk);
@@ -170,8 +172,10 @@ void release_task(struct task_struct * p)
repeat:
tracehook_prepare_release_task(p);
/* don't need to get the RCU readlock here - the process is dead and
- * can't be modifying its own credentials */
+ * can't be modifying its own credentials. But shut RCU-lockdep up */
+ rcu_read_lock();
atomic_dec(&__task_cred(p)->user->processes);
+ rcu_read_unlock();

proc_flush_task(p);

@@ -473,9 +477,11 @@ static void close_files(struct files_struct * files)
/*
* It is safe to dereference the fd table without RCU or
* ->file_lock because this is the last reference to the
- * files structure.
+ * files structure. But use RCU to shut RCU-lockdep up.
*/
+ rcu_read_lock();
fdt = files_fdtable(files);
+ rcu_read_unlock();
for (;;) {
unsigned long set;
i = j * __NFDBITS;
@@ -521,10 +527,12 @@ void put_files_struct(struct files_struct *files)
* at the end of the RCU grace period. Otherwise,
* you can free files immediately.
*/
+ rcu_read_lock();
fdt = files_fdtable(files);
if (fdt != &files->fdtab)
kmem_cache_free(files_cachep, files);
free_fdtable(fdt);
+ rcu_read_unlock();
}
}

diff --git a/kernel/fork.c b/kernel/fork.c
index 5b2959b..e01ec3e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -86,6 +86,7 @@ int max_threads; /* tunable limit on nr_threads */
DEFINE_PER_CPU(unsigned long, process_counts) = 0;

__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
+EXPORT_SYMBOL_GPL(tasklist_lock);

int nr_processes(void)
{
diff --git a/kernel/notifier.c b/kernel/notifier.c
index acd24e7..2488ba7 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -78,10 +78,10 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
int ret = NOTIFY_DONE;
struct notifier_block *nb, *next_nb;

- nb = rcu_dereference(*nl);
+ nb = rcu_dereference_raw(*nl);

while (nb && nr_to_call) {
- next_nb = rcu_dereference(nb->next);
+ next_nb = rcu_dereference_raw(nb->next);

#ifdef CONFIG_DEBUG_NOTIFIERS
if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
@@ -309,7 +309,7 @@ int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
* racy then it does not matter what the result of the test
* is, we re-check the list after having taken the lock anyway:
*/
- if (rcu_dereference(nh->head)) {
+ if (rcu_dereference_raw(nh->head)) {
down_read(&nh->rwsem);
ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
nr_calls);
diff --git a/kernel/pid.c b/kernel/pid.c
index 2e17c9c..b08e697 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -367,7 +367,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
struct task_struct *result = NULL;
if (pid) {
struct hlist_node *first;
- first = rcu_dereference(pid->tasks[type].first);
+ first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock));
if (first)
result = hlist_entry(first, struct task_struct, pids[(type)].node);
}
diff --git a/kernel/sched.c b/kernel/sched.c
index c535cc4..ad419d9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -645,6 +645,11 @@ static inline int cpu_of(struct rq *rq)
#endif
}

+#define for_each_domain_rd(p) \
+ rcu_dereference_check((p), \
+ rcu_read_lock_sched_held() || \
+ lockdep_is_held(&sched_domains_mutex))
+
/*
* The domain tree (rq->sd) is protected by RCU's quiescent state transition.
* See detach_destroy_domains: synchronize_sched for details.
@@ -653,7 +658,7 @@ static inline int cpu_of(struct rq *rq)
* preempt-disabled sections.
*/
#define for_each_domain(cpu, __sd) \
- for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
+ for (__sd = for_each_domain_rd(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)

#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
#define this_rq() (&__get_cpu_var(runqueues))
@@ -1531,7 +1536,7 @@ static unsigned long target_load(int cpu, int type)

static struct sched_group *group_of(int cpu)
{
- struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);
+ struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd);

if (!sd)
return NULL;
@@ -4877,7 +4882,7 @@ static void run_rebalance_domains(struct softirq_action *h)

static inline int on_null_domain(int cpu)
{
- return !rcu_dereference(cpu_rq(cpu)->sd);
+ return !rcu_dereference_sched(cpu_rq(cpu)->sd);
}

/*
--
1.6.6

Peter Zijlstra

unread,
Feb 14, 2010, 5:20:02 AM2/14/10
to
On Thu, 2010-02-11 at 16:00 -0800, Paul E. McKenney wrote:
> + first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock));
> if (first)
> result = hlist_entry(first, struct task_struct, pids[(type)].node);
> }

I've seen that particular combination a few times in this patch, would
it make sense to create rcu_dereference_task()?

> diff --git a/kernel/sched.c b/kernel/sched.c
> index c535cc4..ad419d9 100644
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -645,6 +645,11 @@ static inline int cpu_of(struct rq *rq)
> #endif
> }
>
> +#define for_each_domain_rd(p) \
> + rcu_dereference_check((p), \
> + rcu_read_lock_sched_held() || \
> + lockdep_is_held(&sched_domains_mutex))
> +

Would rcu_dereference_rd() not be a better name?

Paul E. McKenney

unread,
Feb 14, 2010, 12:50:02 PM2/14/10
to
On Sun, Feb 14, 2010 at 11:12:12AM +0100, Peter Zijlstra wrote:
> On Thu, 2010-02-11 at 16:00 -0800, Paul E. McKenney wrote:
> > + first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock));
> > if (first)
> > result = hlist_entry(first, struct task_struct, pids[(type)].node);
> > }
>
> I've seen that particular combination a few times in this patch, would
> it make sense to create rcu_dereference_task()?
>
> > diff --git a/kernel/sched.c b/kernel/sched.c
> > index c535cc4..ad419d9 100644
> > --- a/kernel/sched.c
> > +++ b/kernel/sched.c
> > @@ -645,6 +645,11 @@ static inline int cpu_of(struct rq *rq)
> > #endif
> > }
> >
> > +#define for_each_domain_rd(p) \
> > + rcu_dereference_check((p), \
> > + rcu_read_lock_sched_held() || \
> > + lockdep_is_held(&sched_domains_mutex))
> > +
>
> Would rcu_dereference_rd() not be a better name?

We are probably going to need per-subsystem name spaces, so how about
rcu_dereference_check_sched_domain()? Again, if agreeable, will send
updated patch stack.

Thanx, Paul

------------------------------------------------------------------------

sched: better name for for_each_domain_rd

As suggested by Peter Ziljstra, make better choice of name
for for_each_domain_rd(), containing "rcu_dereference", given
that it is but a wrapper for rcu_dereference_check(). The name
rcu_dereference_check_sched_domain() does that and provides a separate
per-subsystem name space.

Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---

diff --git a/kernel/sched.c b/kernel/sched.c
index ad419d9..478fc7d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -645,7 +645,7 @@ static inline int cpu_of(struct rq *rq)
#endif
}

-#define for_each_domain_rd(p) \
+#define rcu_dereference_check_sched_domain(p) \
rcu_dereference_check((p), \
rcu_read_lock_sched_held() || \
lockdep_is_held(&sched_domains_mutex))
@@ -658,7 +658,7 @@ static inline int cpu_of(struct rq *rq)


* preempt-disabled sections.
*/
#define for_each_domain(cpu, __sd) \

- for (__sd = for_each_domain_rd(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
+ for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)



#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
#define this_rq() (&__get_cpu_var(runqueues))

0 new messages