Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---
include/linux/rtnetlink.h | 3 +++
include/net/addrconf.h | 4 +++-
net/core/dev.c | 2 +-
net/core/filter.c | 6 +++---
net/core/rtnetlink.c | 8 ++++++++
net/core/sock.c | 3 ++-
net/decnet/dn_route.c | 14 +++++++-------
net/ipv4/route.c | 14 +++++++-------
net/packet/af_packet.c | 2 +-
9 files changed, 35 insertions(+), 21 deletions(-)
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 05330fc..5c52fa4 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -735,6 +735,9 @@ extern void rtnl_lock(void);
extern void rtnl_unlock(void);
extern int rtnl_trylock(void);
extern int rtnl_is_locked(void);
+#ifdef CONFIG_PROVE_LOCKING
+extern int lockdep_rtnl_is_held(void);
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
extern void rtnetlink_init(void);
extern void __rtnl_unlock(void);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 0f7c378..45375b4 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -177,7 +177,9 @@ extern int unregister_inet6addr_notifier(struct notifier_block *nb);
static inline struct inet6_dev *
__in6_dev_get(struct net_device *dev)
{
- return rcu_dereference(dev->ip6_ptr);
+ return rcu_dereference_check(dev->ip6_ptr,
+ rcu_read_lock_held() ||
+ lockdep_rtnl_is_held());
}
static inline struct inet6_dev *
diff --git a/net/core/dev.c b/net/core/dev.c
index be9924f..0d0ff82 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2041,7 +2041,7 @@ gso:
rcu_read_lock_bh();
txq = dev_pick_tx(dev, skb);
- q = rcu_dereference(txq->qdisc);
+ q = rcu_dereference_bh(txq->qdisc);
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
diff --git a/net/core/filter.c b/net/core/filter.c
index 08db7b9..3541aa4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -86,7 +86,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
return err;
rcu_read_lock_bh();
- filter = rcu_dereference(sk->sk_filter);
+ filter = rcu_dereference_bh(sk->sk_filter);
if (filter) {
unsigned int pkt_len = sk_run_filter(skb, filter->insns,
filter->len);
@@ -521,7 +521,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
}
rcu_read_lock_bh();
- old_fp = rcu_dereference(sk->sk_filter);
+ old_fp = rcu_dereference_bh(sk->sk_filter);
rcu_assign_pointer(sk->sk_filter, fp);
rcu_read_unlock_bh();
@@ -536,7 +536,7 @@ int sk_detach_filter(struct sock *sk)
struct sk_filter *filter;
rcu_read_lock_bh();
- filter = rcu_dereference(sk->sk_filter);
+ filter = rcu_dereference_bh(sk->sk_filter);
if (filter) {
rcu_assign_pointer(sk->sk_filter, NULL);
sk_filter_delayed_uncharge(sk, filter);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 794bcb8..4c7d3f6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -89,6 +89,14 @@ int rtnl_is_locked(void)
}
EXPORT_SYMBOL(rtnl_is_locked);
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_rtnl_is_held(void)
+{
+ return lockdep_is_held(&rtnl_mutex);
+}
+EXPORT_SYMBOL(lockdep_rtnl_is_held);
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+
static struct rtnl_link *rtnl_msg_handlers[NPROTO];
static inline int rtm_msgindex(int msgtype)
diff --git a/net/core/sock.c b/net/core/sock.c
index e1f6f22..305cba4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1073,7 +1073,8 @@ static void __sk_free(struct sock *sk)
if (sk->sk_destruct)
sk->sk_destruct(sk);
- filter = rcu_dereference(sk->sk_filter);
+ filter = rcu_dereference_check(sk->sk_filter,
+ atomic_read(&sk->sk_wmem_alloc) == 0);
if (filter) {
sk_filter_uncharge(sk, filter);
rcu_assign_pointer(sk->sk_filter, NULL);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index a032840..a7bf03c 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1155,8 +1155,8 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
if (!(flags & MSG_TRYHARD)) {
rcu_read_lock_bh();
- for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt;
- rt = rcu_dereference(rt->u.dst.dn_next)) {
+ for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
+ rt = rcu_dereference_bh(rt->u.dst.dn_next)) {
if ((flp->fld_dst == rt->fl.fld_dst) &&
(flp->fld_src == rt->fl.fld_src) &&
(flp->mark == rt->fl.mark) &&
@@ -1618,9 +1618,9 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (h > s_h)
s_idx = 0;
rcu_read_lock_bh();
- for(rt = rcu_dereference(dn_rt_hash_table[h].chain), idx = 0;
+ for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
rt;
- rt = rcu_dereference(rt->u.dst.dn_next), idx++) {
+ rt = rcu_dereference_bh(rt->u.dst.dn_next), idx++) {
if (idx < s_idx)
continue;
skb_dst_set(skb, dst_clone(&rt->u.dst));
@@ -1654,12 +1654,12 @@ static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
for(s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
rcu_read_lock_bh();
- rt = dn_rt_hash_table[s->bucket].chain;
+ rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
if (rt)
break;
rcu_read_unlock_bh();
}
- return rcu_dereference(rt);
+ return rt;
}
static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt)
@@ -1674,7 +1674,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
rcu_read_lock_bh();
rt = dn_rt_hash_table[s->bucket].chain;
}
- return rcu_dereference(rt);
+ return rcu_dereference_bh(rt);
}
static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e446496..3476b3b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -287,12 +287,12 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
if (!rt_hash_table[st->bucket].chain)
continue;
rcu_read_lock_bh();
- r = rcu_dereference(rt_hash_table[st->bucket].chain);
+ r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
while (r) {
if (dev_net(r->u.dst.dev) == seq_file_net(seq) &&
r->rt_genid == st->genid)
return r;
- r = rcu_dereference(r->u.dst.rt_next);
+ r = rcu_dereference_bh(r->u.dst.rt_next);
}
rcu_read_unlock_bh();
}
@@ -314,7 +314,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
rcu_read_lock_bh();
r = rt_hash_table[st->bucket].chain;
}
- return rcu_dereference(r);
+ return rcu_dereference_bh(r);
}
static struct rtable *rt_cache_get_next(struct seq_file *seq,
@@ -2687,8 +2687,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
rcu_read_lock_bh();
- for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
- rth = rcu_dereference(rth->u.dst.rt_next)) {
+ for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
+ rth = rcu_dereference_bh(rth->u.dst.rt_next)) {
if (rth->fl.fl4_dst == flp->fl4_dst &&
rth->fl.fl4_src == flp->fl4_src &&
rth->fl.iif == 0 &&
@@ -3006,8 +3006,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (!rt_hash_table[h].chain)
continue;
rcu_read_lock_bh();
- for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
- rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
+ for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
+ rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) {
if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
continue;
if (rt_is_expired(rt))
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f126d18..939471e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -508,7 +508,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
struct sk_filter *filter;
rcu_read_lock_bh();
- filter = rcu_dereference(sk->sk_filter);
+ filter = rcu_dereference_bh(sk->sk_filter);
if (filter != NULL)
res = sk_run_filter(skb, filter->insns, filter->len);
rcu_read_unlock_bh();
--
1.6.6
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Cc: Alexander Viro <vi...@zeniv.linux.org.uk>
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---
fs/file.c | 2 +-
fs/proc/array.c | 2 ++
fs/proc/base.c | 6 +++++-
include/linux/fdtable.h | 8 ++++++--
4 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/fs/file.c b/fs/file.c
index 87e1290..38039af 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -478,7 +478,7 @@ repeat:
error = fd;
#if 1
/* Sanity check */
- if (rcu_dereference(fdt->fd[fd]) != NULL) {
+ if (rcu_dereference_raw(fdt->fd[fd]) != NULL) {
printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
rcu_assign_pointer(fdt->fd[fd], NULL);
}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 13b5d07..18e20fe 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -270,7 +270,9 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
blocked = p->blocked;
collect_sigign_sigcatch(p, &ignored, &caught);
num_threads = atomic_read(&p->signal->count);
+ rcu_read_lock(); /* FIXME: is this correct? */
qsize = atomic_read(&__task_cred(p)->user->sigpending);
+ rcu_read_unlock();
qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
unlock_task_sighand(p, &flags);
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 18d5cc6..db19ca8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1095,8 +1095,12 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
if (!capable(CAP_AUDIT_CONTROL))
return -EPERM;
- if (current != pid_task(proc_pid(inode), PIDTYPE_PID))
+ rcu_read_lock();
+ if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
+ rcu_read_unlock();
return -EPERM;
+ }
+ rcu_read_unlock();
if (count >= PAGE_SIZE)
count = PAGE_SIZE - 1;
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index a2ec74b..144412f 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -57,7 +57,11 @@ struct files_struct {
struct file * fd_array[NR_OPEN_DEFAULT];
};
-#define files_fdtable(files) (rcu_dereference((files)->fdt))
+#define files_fdtable(files) \
+ (rcu_dereference_check((files)->fdt, \
+ rcu_read_lock_held() || \
+ lockdep_is_held(&(files)->file_lock) || \
+ atomic_read(&files->count) == 1))
struct file_operations;
struct vfsmount;
@@ -78,7 +82,7 @@ static inline struct file * fcheck_files(struct files_struct *files, unsigned in
struct fdtable *fdt = files_fdtable(files);
if (fd < fdt->max_fds)
- file = rcu_dereference(fdt->fd[fd]);
+ file = rcu_dereference_check(fdt->fd[fd], rcu_read_lock_held() || lockdep_is_held(&files->file_lock) || atomic_read(&files->count) == 1);
return file;
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---
include/linux/lockdep.h | 4 ++++
include/linux/rcupdate.h | 4 ++--
kernel/lockdep.c | 18 ++++++++++++++++++
3 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 9ccf0e2..10206a8 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -534,4 +534,8 @@ do { \
# define might_lock_read(lock) do { } while (0)
#endif
+#ifdef CONFIG_PROVE_RCU
+extern void lockdep_rcu_dereference(const char *file, const int line);
+#endif
+
#endif /* __LINUX_LOCKDEP_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 839d296..1a4de31 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -182,8 +182,8 @@ static inline int rcu_read_lock_sched_held(void)
*/
#define rcu_dereference_check(p, c) \
({ \
- if (debug_locks) \
- WARN_ON_ONCE(!(c)); \
+ if (debug_locks && !(c)) \
+ lockdep_rcu_dereference(__FILE__, __LINE__); \
rcu_dereference_raw(p); \
})
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 5feaddc..a44c1b2 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3809,3 +3809,21 @@ void lockdep_sys_exit(void)
lockdep_print_held_locks(curr);
}
}
+
+void lockdep_rcu_dereference(const char *file, const int line)
+{
+ struct task_struct *curr = current;
+
+ if (!debug_locks_off())
+ return;
+ printk("\n==============================================\n");
+ printk( "[ BUG: Unsafe rcu_dereference_check() usage! ]\n");
+ printk( "----------------------------------------------\n");
+ printk("%s:%d invoked rcu_dereference_check() without protection!\n",
+ file, line);
+ printk("\nother info that might help us debug this:\n\n");
+ lockdep_print_held_locks(curr);
+ printk("\nstack backtrace:\n");
+ dump_stack();
+}
+EXPORT_SYMBOL_GPL(lockdep_rcu_dereference);
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt
index 5051209..96b7c00 100644
--- a/Documentation/RCU/RTFP.txt
+++ b/Documentation/RCU/RTFP.txt
@@ -864,9 +864,12 @@ Revised:
}
@phdthesis{MathieuDesnoyersPhD
-, title = "Low-impact Operating System Tracing"
+, title = "Low-Impact Operating System Tracing"
, author = "Mathieu Desnoyers"
, school = "Ecole Polytechnique de Montr\'{e}al"
, month = "December"
, year = 2009
+,note="Available:
+\url{http://www.lttng.org/pub/thesis/desnoyers-dissertation-2009-12-v24.pdf}
+[Viewed December 9, 2009]"
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---
lib/idr.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/lib/idr.c b/lib/idr.c
index 1cac726..4877fce 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -502,7 +502,7 @@ void *idr_find(struct idr *idp, int id)
int n;
struct idr_layer *p;
- p = rcu_dereference(idp->top);
+ p = rcu_dereference_raw(idp->top);
if (!p)
return NULL;
n = (p->layer+1) * IDR_BITS;
@@ -517,7 +517,7 @@ void *idr_find(struct idr *idp, int id)
while (n > 0 && p) {
n -= IDR_BITS;
BUG_ON(n != p->layer*IDR_BITS);
- p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
+ p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
}
return((void *)p);
}
@@ -550,7 +550,7 @@ int idr_for_each(struct idr *idp,
struct idr_layer **paa = &pa[0];
n = idp->layers * IDR_BITS;
- p = rcu_dereference(idp->top);
+ p = rcu_dereference_raw(idp->top);
max = 1 << n;
id = 0;
@@ -558,7 +558,7 @@ int idr_for_each(struct idr *idp,
while (n > 0 && p) {
n -= IDR_BITS;
*paa++ = p;
- p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
+ p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
}
if (p) {
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---
include/linux/rcupdate.h | 41 ++++++++++++++++++++++++++++++++++-------
include/linux/srcu.h | 8 ++++++++
2 files changed, 42 insertions(+), 7 deletions(-)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e3d37ef..839d296 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -184,12 +184,12 @@ static inline int rcu_read_lock_sched_held(void)
({ \
if (debug_locks) \
WARN_ON_ONCE(!(c)); \
- rcu_dereference(p); \
+ rcu_dereference_raw(p); \
})
#else /* #ifdef CONFIG_PROVE_RCU */
-#define rcu_dereference_check(p, c) rcu_dereference(p)
+#define rcu_dereference_check(p, c) rcu_dereference_raw(p)
#endif /* #else #ifdef CONFIG_PROVE_RCU */
@@ -325,22 +325,49 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
/**
- * rcu_dereference - fetch an RCU-protected pointer in an
- * RCU read-side critical section. This pointer may later
- * be safely dereferenced.
+ * rcu_dereference_raw - fetch an RCU-protected pointer
+ *
+ * The caller must be within some flavor of RCU read-side critical
+ * section, or must be otherwise preventing the pointer from changing,
+ * for example, by holding an appropriate lock. This pointer may later
+ * be safely dereferenced. It is the caller's responsibility to have
+ * done the right thing, as this primitive does no checking of any kind.
*
* Inserts memory barriers on architectures that require them
* (currently only the Alpha), and, more importantly, documents
* exactly which pointers are protected by RCU.
*/
-
-#define rcu_dereference(p) ({ \
+#define rcu_dereference_raw(p) ({ \
typeof(p) _________p1 = ACCESS_ONCE(p); \
smp_read_barrier_depends(); \
(_________p1); \
})
/**
+ * rcu_dereference - fetch an RCU-protected pointer, checking for RCU
+ *
+ * Makes rcu_dereference_check() do the dirty work.
+ */
+#define rcu_dereference(p) \
+ rcu_dereference_check(p, rcu_read_lock_held())
+
+/**
+ * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh
+ *
+ * Makes rcu_dereference_check() do the dirty work.
+ */
+#define rcu_dereference_bh(p) \
+ rcu_dereference_check(p, rcu_read_lock_bh_held())
+
+/**
+ * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched
+ *
+ * Makes rcu_dereference_check() do the dirty work.
+ */
+#define rcu_dereference_sched(p) \
+ rcu_dereference_check(p, rcu_read_lock_sched_held())
+
+/**
* rcu_assign_pointer - assign (publicize) a pointer to a newly
* initialized structure that will be dereferenced by RCU read-side
* critical sections. Returns the value assigned.
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index adbe167..3084f80 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -106,6 +106,14 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
/**
+ * srcu_dereference - fetch SRCU-protected pointer with checking
+ *
+ * Makes rcu_dereference_check() do the dirty work.
+ */
+#define srcu_dereference(p, sp) \
+ rcu_dereference_check(p, srcu_read_lock_held(sp))
+
+/**
* srcu_read_lock - register a new reader for an SRCU-protected structure.
* @sp: srcu_struct in which to register the new reader.
*
This patch therefore repeatedly invokes the RCU grace-period machinery
in order to force any needed grace periods to complete quickly. It does
so a limited number of times in order to prevent starvation by an RCU
callback function that might pass itself to call_rcu().
However, if any CPU other than the current one is not in dyntick-idle
mode, fall back to simply checking (with fix to bug noted by Lai
Jiangshan). Also, take advantage of last grace-period forcing, the
opportunity to do so noted by Steve Rostedt. And apply simplified #ifdef
condition suggested by Frederic Weisbecker.
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---
include/linux/cpumask.h | 14 +++++++++
init/Kconfig | 16 +++++++++++
kernel/rcutree.c | 5 +--
kernel/rcutree_plugin.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 101 insertions(+), 3 deletions(-)
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d77b547..dbcee76 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -143,6 +143,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask,
#define for_each_cpu(cpu, mask) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#define for_each_cpu_not(cpu, mask) \
+ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
#define for_each_cpu_and(cpu, mask, and) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
#else
@@ -203,6 +205,18 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
(cpu) < nr_cpu_ids;)
/**
+ * for_each_cpu_not - iterate over every cpu in a complemented mask
+ * @cpu: the (optionally unsigned) integer iterator
+ * @mask: the cpumask pointer
+ *
+ * After the loop, cpu is >= nr_cpu_ids.
+ */
+#define for_each_cpu_not(cpu, mask) \
+ for ((cpu) = -1; \
+ (cpu) = cpumask_next_zero((cpu), (mask)), \
+ (cpu) < nr_cpu_ids;)
+
+/**
* for_each_cpu_and - iterate over every cpu in both masks
* @cpu: the (optionally unsigned) integer iterator
* @mask: the first cpumask pointer
diff --git a/init/Kconfig b/init/Kconfig
index d95ca7c..42bf914 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -396,6 +396,22 @@ config RCU_FANOUT_EXACT
Say N if unsure.
+config RCU_FAST_NO_HZ
+ bool "Accelerate last non-dyntick-idle CPU's grace periods"
+ depends on TREE_RCU && NO_HZ && SMP
+ default n
+ help
+ This option causes RCU to attempt to accelerate grace periods
+ in order to allow the final CPU to enter dynticks-idle state
+ more quickly. On the other hand, this option increases the
+ overhead of the dynticks-idle checking, particularly on systems
+ with large numbers of CPUs.
+
+ Say Y if energy efficiency is critically important, particularly
+ if you have relatively few CPUs.
+
+ Say N if you are unsure.
+
config TREE_RCU_TRACE
def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU )
select DEBUG_FS
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 099a255..29d88c0 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1550,10 +1550,9 @@ static int rcu_pending(int cpu)
/*
* Check to see if any future RCU-related work will need to be done
* by the current CPU, even if none need be done immediately, returning
- * 1 if so. This function is part of the RCU implementation; it is -not-
- * an exported member of the RCU API.
+ * 1 if so.
*/
-int rcu_needs_cpu(int cpu)
+static int rcu_needs_cpu_quick_check(int cpu)
{
/* RCU callbacks either ready or pending? */
return per_cpu(rcu_sched_data, cpu).nxtlist ||
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index e77cdf3..a825666 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -906,3 +906,72 @@ static void __init __rcu_init_preempt(void)
}
#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
+
+#if !defined(CONFIG_RCU_FAST_NO_HZ)
+
+/*
+ * Check to see if any future RCU-related work will need to be done
+ * by the current CPU, even if none need be done immediately, returning
+ * 1 if so. This function is part of the RCU implementation; it is -not-
+ * an exported member of the RCU API.
+ *
+ * Because we have preemptible RCU, just check whether this CPU needs
+ * any flavor of RCU. Do not chew up lots of CPU cycles with preemption
+ * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
+ */
+int rcu_needs_cpu(int cpu)
+{
+ return rcu_needs_cpu_quick_check(cpu);
+}
+
+#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
+
+#define RCU_NEEDS_CPU_FLUSHES 5
+
+/*
+ * Check to see if any future RCU-related work will need to be done
+ * by the current CPU, even if none need be done immediately, returning
+ * 1 if so. This function is part of the RCU implementation; it is -not-
+ * an exported member of the RCU API.
+ *
+ * Because we are not supporting preemptible RCU, attempt to accelerate
+ * any current grace periods so that RCU no longer needs this CPU, but
+ * only if all other CPUs are already in dynticks-idle mode. This will
+ * allow the CPU cores to be powered down immediately, as opposed to after
+ * waiting many milliseconds for grace periods to elapse.
+ */
+int rcu_needs_cpu(int cpu)
+{
+ int c = 1;
+ int i;
+ int thatcpu;
+
+ /* Don't bother unless we are the last non-dyntick-idle CPU. */
+ for_each_cpu_not(thatcpu, nohz_cpu_mask)
+ if (thatcpu != cpu)
+ return rcu_needs_cpu_quick_check(cpu);
+
+ /* Try to push remaining RCU-sched and RCU-bh callbacks through. */
+ for (i = 0; i < RCU_NEEDS_CPU_FLUSHES && c; i++) {
+ c = 0;
+ if (per_cpu(rcu_sched_data, cpu).nxtlist) {
+ rcu_sched_qs(cpu);
+ force_quiescent_state(&rcu_sched_state, 0);
+ __rcu_process_callbacks(&rcu_sched_state,
+ &per_cpu(rcu_sched_data, cpu));
+ c = !!per_cpu(rcu_sched_data, cpu).nxtlist;
+ }
+ if (per_cpu(rcu_bh_data, cpu).nxtlist) {
+ rcu_bh_qs(cpu);
+ force_quiescent_state(&rcu_bh_state, 0);
+ __rcu_process_callbacks(&rcu_bh_state,
+ &per_cpu(rcu_bh_data, cpu));
+ c = !!per_cpu(rcu_bh_data, cpu).nxtlist;
+ }
+ }
+
+ /* If RCU callbacks are still pending, RCU still needs this CPU. */
+ return c;
+}
+
+#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---
include/linux/rculist.h | 14 +++++++-------
include/linux/rculist_nulls.h | 4 ++--
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 1bf0f70..779d707 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -208,7 +208,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
* primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
*/
#define list_entry_rcu(ptr, type, member) \
- container_of(rcu_dereference(ptr), type, member)
+ container_of(rcu_dereference_raw(ptr), type, member)
/**
* list_first_entry_rcu - get the first element from a list
@@ -225,9 +225,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
list_entry_rcu((ptr)->next, type, member)
#define __list_for_each_rcu(pos, head) \
- for (pos = rcu_dereference((head)->next); \
+ for (pos = rcu_dereference_raw((head)->next); \
pos != (head); \
- pos = rcu_dereference(pos->next))
+ pos = rcu_dereference_raw(pos->next))
/**
* list_for_each_entry_rcu - iterate over rcu list of given type
@@ -257,9 +257,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
* as long as the traversal is guarded by rcu_read_lock().
*/
#define list_for_each_continue_rcu(pos, head) \
- for ((pos) = rcu_dereference((pos)->next); \
+ for ((pos) = rcu_dereference_raw((pos)->next); \
prefetch((pos)->next), (pos) != (head); \
- (pos) = rcu_dereference((pos)->next))
+ (pos) = rcu_dereference_raw((pos)->next))
/**
* list_for_each_entry_continue_rcu - continue iteration over list of given type
@@ -418,10 +418,10 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
* as long as the traversal is guarded by rcu_read_lock().
*/
#define hlist_for_each_entry_rcu(tpos, pos, head, member) \
- for (pos = rcu_dereference((head)->first); \
+ for (pos = rcu_dereference_raw((head)->first); \
pos && ({ prefetch(pos->next); 1; }) && \
({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
- pos = rcu_dereference(pos->next))
+ pos = rcu_dereference_raw(pos->next))
#endif /* __KERNEL__ */
#endif
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 589a409..b70ffe5 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -101,10 +101,10 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
*
*/
#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
- for (pos = rcu_dereference((head)->first); \
+ for (pos = rcu_dereference_raw((head)->first); \
(!is_a_nulls(pos)) && \
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
- pos = rcu_dereference(pos->next))
+ pos = rcu_dereference_raw(pos->next))
#endif
#endif
This set of patches is inspired by earlier work by Peter Zijlstra and
Thomas Gleixner, and takes the following approach:
o Set up separate lockdep classes for RCU, RCU-bh, and RCU-sched.
o Set up separate lockdep classes for each instance of SRCU.
o Create primitives that check for being in an RCU read-side
critical section. These return exact answers if lockdep is
fully enabled, but if unsure, report being in an RCU read-side
critical section. (We want to avoid false positives!)
The primitives are:
For RCU: rcu_read_lock_held(void)
For RCU-bh: rcu_read_lock_bh_held(void)
For RCU-sched: rcu_read_lock_sched_held(void)
For SRCU: srcu_read_lock_held(struct srcu_struct *sp)
o Add rcu_dereference_check(), which takes a second argument
in which one places a boolean expression based on the above
primitives and/or lockdep_is_held().
o A new kernel configuration parameter, CONFIG_PROVE_RCU, enables
rcu_dereference_check(). This depends on CONFIG_PROVE_LOCKING,
and should be quite helpful during the transition period while
CONFIG_PROVE_RCU-unaware patches are in flight.
The existing rcu_dereference() primitive does no checking, but upcoming
patches will change that.
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---
include/linux/rcupdate.h | 126 ++++++++++++++++++++++++++++++++++++++++++----
include/linux/srcu.h | 87 +++++++++++++++++++++++++++++++-
kernel/rcupdate.c | 10 ++++
kernel/rcutorture.c | 12 ++++-
kernel/srcu.c | 50 ++++++++++++------
lib/Kconfig.debug | 12 ++++
lib/debug_locks.c | 1 +
7 files changed, 267 insertions(+), 31 deletions(-)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 24440f4..e3d37ef 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -78,14 +78,120 @@ extern void rcu_init(void);
} while (0)
#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
extern struct lockdep_map rcu_lock_map;
-# define rcu_read_acquire() \
- lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define rcu_read_acquire() \
+ lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_)
-#else
-# define rcu_read_acquire() do { } while (0)
-# define rcu_read_release() do { } while (0)
-#endif
+
+extern struct lockdep_map rcu_bh_lock_map;
+# define rcu_read_acquire_bh() \
+ lock_acquire(&rcu_bh_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define rcu_read_release_bh() lock_release(&rcu_bh_lock_map, 1, _THIS_IP_)
+
+extern struct lockdep_map rcu_sched_lock_map;
+# define rcu_read_acquire_sched() \
+ lock_acquire(&rcu_sched_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define rcu_read_release_sched() \
+ lock_release(&rcu_sched_lock_map, 1, _THIS_IP_)
+
+/**
+ * rcu_read_lock_held - might we be in RCU read-side critical section?
+ *
+ * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in
+ * an RCU read-side critical section. In absence of CONFIG_PROVE_LOCKING,
+ * this assumes we are in an RCU read-side critical section unless it can
+ * prove otherwise.
+ */
+static inline int rcu_read_lock_held(void)
+{
+ if (debug_locks)
+ return lock_is_held(&rcu_lock_map);
+ return 1;
+}
+
+/**
+ * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section?
+ *
+ * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in
+ * an RCU-bh read-side critical section. In absence of CONFIG_PROVE_LOCKING,
+ * this assumes we are in an RCU-bh read-side critical section unless it can
+ * prove otherwise.
+ */
+static inline int rcu_read_lock_bh_held(void)
+{
+ if (debug_locks)
+ return lock_is_held(&rcu_bh_lock_map);
+ return 1;
+}
+
+/**
+ * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section?
+ *
+ * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in an
+ * RCU-sched read-side critical section. In absence of CONFIG_PROVE_LOCKING,
+ * this assumes we are in an RCU-sched read-side critical section unless it
+ * can prove otherwise. Note that disabling of preemption (including
+ * disabling irqs) counts as an RCU-sched read-side critical section.
+ */
+static inline int rcu_read_lock_sched_held(void)
+{
+ int lockdep_opinion = 0;
+
+ if (debug_locks)
+ lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
+ return lockdep_opinion || preempt_count() != 0;
+}
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+# define rcu_read_acquire() do { } while (0)
+# define rcu_read_release() do { } while (0)
+# define rcu_read_acquire_bh() do { } while (0)
+# define rcu_read_release_bh() do { } while (0)
+# define rcu_read_acquire_sched() do { } while (0)
+# define rcu_read_release_sched() do { } while (0)
+
+static inline int rcu_read_lock_held(void)
+{
+ return 1;
+}
+
+static inline int rcu_read_lock_bh_held(void)
+{
+ return 1;
+}
+
+static inline int rcu_read_lock_sched_held(void)
+{
+ return preempt_count() != 0;
+}
+
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+#ifdef CONFIG_PROVE_RCU
+
+/**
+ * rcu_dereference_check - rcu_dereference with debug checking
+ *
+ * Do an rcu_dereference(), but check that the context is correct.
+ * For example, rcu_dereference_check(gp, rcu_read_lock_held()) to
+ * ensure that the rcu_dereference_check() executes within an RCU
+ * read-side critical section. It is also possible to check for
+ * locks being held, for example, by using lockdep_is_held().
+ */
+#define rcu_dereference_check(p, c) \
+ ({ \
+ if (debug_locks) \
+ WARN_ON_ONCE(!(c)); \
+ rcu_dereference(p); \
+ })
+
+#else /* #ifdef CONFIG_PROVE_RCU */
+
+#define rcu_dereference_check(p, c) rcu_dereference(p)
+
+#endif /* #else #ifdef CONFIG_PROVE_RCU */
/**
* rcu_read_lock - mark the beginning of an RCU read-side critical section.
@@ -160,7 +266,7 @@ static inline void rcu_read_lock_bh(void)
{
__rcu_read_lock_bh();
__acquire(RCU_BH);
- rcu_read_acquire();
+ rcu_read_acquire_bh();
}
/*
@@ -170,7 +276,7 @@ static inline void rcu_read_lock_bh(void)
*/
static inline void rcu_read_unlock_bh(void)
{
- rcu_read_release();
+ rcu_read_release_bh();
__release(RCU_BH);
__rcu_read_unlock_bh();
}
@@ -188,7 +294,7 @@ static inline void rcu_read_lock_sched(void)
{
preempt_disable();
__acquire(RCU_SCHED);
- rcu_read_acquire();
+ rcu_read_acquire_sched();
}
/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
@@ -205,7 +311,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
*/
static inline void rcu_read_unlock_sched(void)
{
- rcu_read_release();
+ rcu_read_release_sched();
__release(RCU_SCHED);
preempt_enable();
}
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 4765d97..adbe167 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -35,6 +35,9 @@ struct srcu_struct {
int completed;
struct srcu_struct_array *per_cpu_ref;
struct mutex mutex;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
};
#ifndef CONFIG_PREEMPT
@@ -43,12 +46,92 @@ struct srcu_struct {
#define srcu_barrier()
#endif /* #else #ifndef CONFIG_PREEMPT */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+int __init_srcu_struct(struct srcu_struct *sp, const char *name,
+ struct lock_class_key *key);
+
+#define init_srcu_struct(sp) \
+({ \
+ static struct lock_class_key __srcu_key; \
+ \
+ __init_srcu_struct((sp), #sp, &__srcu_key); \
+})
+
+# define srcu_read_acquire(sp) \
+ lock_acquire(&(sp)->dep_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define srcu_read_release(sp) \
+ lock_release(&(sp)->dep_map, 1, _THIS_IP_)
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
int init_srcu_struct(struct srcu_struct *sp);
+
+# define srcu_read_acquire(sp) do { } while (0)
+# define srcu_read_release(sp) do { } while (0)
+
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
void cleanup_srcu_struct(struct srcu_struct *sp);
-int srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
-void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
+int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
+void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
void synchronize_srcu(struct srcu_struct *sp);
void synchronize_srcu_expedited(struct srcu_struct *sp);
long srcu_batches_completed(struct srcu_struct *sp);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+/**
+ * srcu_read_lock_held - might we be in SRCU read-side critical section?
+ *
+ * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in
+ * an SRCU read-side critical section. In absence of CONFIG_PROVE_LOCKING,
+ * this assumes we are in an SRCU read-side critical section unless it can
+ * prove otherwise.
+ */
+static inline int srcu_read_lock_held(struct srcu_struct *sp)
+{
+ if (debug_locks)
+ return lock_is_held(&sp->dep_map);
+ return 1;
+}
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+static inline int srcu_read_lock_held(struct srcu_struct *sp)
+{
+ return 1;
+}
+
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+/**
+ * srcu_read_lock - register a new reader for an SRCU-protected structure.
+ * @sp: srcu_struct in which to register the new reader.
+ *
+ * Enter an SRCU read-side critical section. Note that SRCU read-side
+ * critical sections may be nested.
+ */
+static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
+{
+ int retval = __srcu_read_lock(sp);
+
+ srcu_read_acquire(sp);
+ return retval;
+}
+
+/**
+ * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
+ * @sp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock().
+ *
+ * Exit an SRCU read-side critical section.
+ */
+static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
+ __releases(sp)
+{
+ srcu_read_release(sp);
+ __srcu_read_unlock(sp, idx);
+}
+
#endif
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 9b7fd47..033cb55 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -50,6 +50,16 @@ static struct lock_class_key rcu_lock_key;
struct lockdep_map rcu_lock_map =
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
EXPORT_SYMBOL_GPL(rcu_lock_map);
+
+static struct lock_class_key rcu_bh_lock_key;
+struct lockdep_map rcu_bh_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key);
+EXPORT_SYMBOL_GPL(rcu_bh_lock_map);
+
+static struct lock_class_key rcu_sched_lock_key;
+struct lockdep_map rcu_sched_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key);
+EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
#endif
/*
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index adda92b..5f43f30 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -796,7 +796,11 @@ static void rcu_torture_timer(unsigned long unused)
idx = cur_ops->readlock();
completed = cur_ops->completed();
- p = rcu_dereference(rcu_torture_current);
+ p = rcu_dereference_check(rcu_torture_current,
+ rcu_read_lock_held() ||
+ rcu_read_lock_bh_held() ||
+ rcu_read_lock_sched_held() ||
+ srcu_read_lock_held(&srcu_ctl));
if (p == NULL) {
/* Leave because rcu_torture_writer is not yet underway */
cur_ops->readunlock(idx);
@@ -853,7 +857,11 @@ rcu_torture_reader(void *arg)
}
idx = cur_ops->readlock();
completed = cur_ops->completed();
- p = rcu_dereference(rcu_torture_current);
+ p = rcu_dereference_check(rcu_torture_current,
+ rcu_read_lock_held() ||
+ rcu_read_lock_bh_held() ||
+ rcu_read_lock_sched_held() ||
+ srcu_read_lock_held(&srcu_ctl));
if (p == NULL) {
/* Wait for rcu_torture_writer to get underway */
cur_ops->readunlock(idx);
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 31b275b..bde4295 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -34,6 +34,30 @@
#include <linux/smp.h>
#include <linux/srcu.h>
+static int init_srcu_struct_fields(struct srcu_struct *sp)
+{
+ sp->completed = 0;
+ mutex_init(&sp->mutex);
+ sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
+ return sp->per_cpu_ref ? 0 : -ENOMEM;
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+int __init_srcu_struct(struct srcu_struct *sp, const char *name,
+ struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /* Don't re-initialize a lock while it is held. */
+ debug_check_no_locks_freed((void *)sp, sizeof(*sp));
+ lockdep_init_map(&sp->dep_map, name, key, 0);
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+ return init_srcu_struct_fields(sp);
+}
+EXPORT_SYMBOL_GPL(__init_srcu_struct);
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
/**
* init_srcu_struct - initialize a sleep-RCU structure
* @sp: structure to initialize.
@@ -44,13 +68,12 @@
*/
int init_srcu_struct(struct srcu_struct *sp)
{
- sp->completed = 0;
- mutex_init(&sp->mutex);
- sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
- return (sp->per_cpu_ref ? 0 : -ENOMEM);
+ return init_srcu_struct_fields(sp);
}
EXPORT_SYMBOL_GPL(init_srcu_struct);
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
/*
* srcu_readers_active_idx -- returns approximate number of readers
* active on the specified rank of per-CPU counters.
@@ -100,15 +123,12 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
}
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
-/**
- * srcu_read_lock - register a new reader for an SRCU-protected structure.
- * @sp: srcu_struct in which to register the new reader.
- *
+/*
* Counts the new reader in the appropriate per-CPU element of the
* srcu_struct. Must be called from process context.
* Returns an index that must be passed to the matching srcu_read_unlock().
*/
-int srcu_read_lock(struct srcu_struct *sp)
+int __srcu_read_lock(struct srcu_struct *sp)
{
int idx;
@@ -120,26 +140,22 @@ int srcu_read_lock(struct srcu_struct *sp)
preempt_enable();
return idx;
}
-EXPORT_SYMBOL_GPL(srcu_read_lock);
+EXPORT_SYMBOL_GPL(__srcu_read_lock);
-/**
- * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
- * @sp: srcu_struct in which to unregister the old reader.
- * @idx: return value from corresponding srcu_read_lock().
- *
+/*
* Removes the count for the old reader from the appropriate per-CPU
* element of the srcu_struct. Note that this may well be a different
* CPU than that which was incremented by the corresponding srcu_read_lock().
* Must be called from process context.
*/
-void srcu_read_unlock(struct srcu_struct *sp, int idx)
+void __srcu_read_unlock(struct srcu_struct *sp, int idx)
{
preempt_disable();
srcu_barrier(); /* ensure compiler won't misorder critical section. */
per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--;
preempt_enable();
}
-EXPORT_SYMBOL_GPL(srcu_read_unlock);
+EXPORT_SYMBOL_GPL(__srcu_read_unlock);
/*
* Helper function for synchronize_srcu() and synchronize_srcu_expedited().
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 6bf97d1..6af20a8 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -499,6 +499,18 @@ config PROVE_LOCKING
For more details, see Documentation/lockdep-design.txt.
+config PROVE_RCU
+ bool "RCU debugging: prove RCU correctness"
+ depends on PROVE_LOCKING
+ default n
+ help
+ This feature enables lockdep extensions that check for correct
+ use of RCU APIs. This is currently under development. Say Y
+ if you want to debug RCU usage or help work on the PROVE_RCU
+ feature.
+
+ Say N if you are unsure.
+
config LOCKDEP
bool
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index bc3b117..5bf0020 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -23,6 +23,7 @@
* shut up after that.
*/
int debug_locks = 1;
+EXPORT_SYMBOL_GPL(debug_locks);
/*
* The locking-testsuite uses <debug_locks_silent> to get a
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---
Documentation/RCU/00-INDEX | 2 +
Documentation/RCU/checklist.txt | 34 +++++++++++--------
Documentation/RCU/lockdep.txt | 67 +++++++++++++++++++++++++++++++++++++++
Documentation/RCU/whatisRCU.txt | 13 +++++---
4 files changed, 97 insertions(+), 19 deletions(-)
create mode 100644 Documentation/RCU/lockdep.txt
diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX
index 0a27ea9..71b6f50 100644
--- a/Documentation/RCU/00-INDEX
+++ b/Documentation/RCU/00-INDEX
@@ -6,6 +6,8 @@ checklist.txt
- Review Checklist for RCU Patches
listRCU.txt
- Using RCU to Protect Read-Mostly Linked Lists
+lockdep.txt
+ - RCU and lockdep checking
NMI-RCU.txt
- Using RCU to Protect Dynamic NMI Handlers
rcubarrier.txt
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 767cf06..cbc180f 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -127,10 +127,14 @@ over a rather long period of time, but improvements are always welcome!
perfectly legal (if redundant) for update-side code to
use rcu_dereference() and the "_rcu()" list-traversal
primitives. This is particularly useful in code that
- is common to readers and updaters. However, neither
- rcu_dereference() nor the "_rcu()" list-traversal
- primitives can substitute for a good concurrency design
- coordinating among multiple updaters.
+ is common to readers and updaters. However, lockdep
+ will complain if you access rcu_dereference() outside
+ of an RCU read-side critical section. See lockdep.txt
+ to learn what to do about this.
+
+ Of course, neither rcu_dereference() nor the "_rcu()"
+ list-traversal primitives can substitute for a good
+ concurrency design coordinating among multiple updaters.
b. If the list macros are being used, the list_add_tail_rcu()
and list_add_rcu() primitives must be used in order
@@ -249,7 +253,9 @@ over a rather long period of time, but improvements are always welcome!
must be protected by appropriate update-side locks. RCU
read-side critical sections are delimited by rcu_read_lock()
and rcu_read_unlock(), or by similar primitives such as
- rcu_read_lock_bh() and rcu_read_unlock_bh().
+ rcu_read_lock_bh() and rcu_read_unlock_bh(), in which case
+ the matching rcu_dereference() primitive must be used in order
+ to keep lockdep happy, in this case, rcu_dereference_bh().
The reason that it is permissible to use RCU list-traversal
primitives when the update-side lock is held is that doing so
@@ -302,15 +308,15 @@ over a rather long period of time, but improvements are always welcome!
not the case, a self-spawning RCU callback would prevent the
victim CPU from ever going offline.)
-14. SRCU (srcu_read_lock(), srcu_read_unlock(), synchronize_srcu(),
- and synchronize_srcu_expedited()) may only be invoked from
- process context. Unlike other forms of RCU, it -is- permissible
- to block in an SRCU read-side critical section (demarked by
- srcu_read_lock() and srcu_read_unlock()), hence the "SRCU":
- "sleepable RCU". Please note that if you don't need to sleep
- in read-side critical sections, you should be using RCU rather
- than SRCU, because RCU is almost always faster and easier to
- use than is SRCU.
+14. SRCU (srcu_read_lock(), srcu_read_unlock(), srcu_dereference(),
+ synchronize_srcu(), and synchronize_srcu_expedited()) may only
+ be invoked from process context. Unlike other forms of RCU, it
+ -is- permissible to block in an SRCU read-side critical section
+ (demarked by srcu_read_lock() and srcu_read_unlock()), hence the
+ "SRCU": "sleepable RCU". Please note that if you don't need
+ to sleep in read-side critical sections, you should be using
+ RCU rather than SRCU, because RCU is almost always faster and
+ easier to use than is SRCU.
Also unlike other forms of RCU, explicit initialization
and cleanup is required via init_srcu_struct() and
diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt
new file mode 100644
index 0000000..fe24b58
--- /dev/null
+++ b/Documentation/RCU/lockdep.txt
@@ -0,0 +1,67 @@
+RCU and lockdep checking
+
+All flavors of RCU have lockdep checking available, so that lockdep is
+aware of when each task enters and leaves any flavor of RCU read-side
+critical section. Each flavor of RCU is tracked separately (but note
+that this is not the case in 2.6.32 and earlier). This allows lockdep's
+tracking to include RCU state, which can sometimes help when debugging
+deadlocks and the like.
+
+In addition, RCU provides the following primitives that check lockdep's
+state:
+
+ rcu_read_lock_held() for normal RCU.
+ rcu_read_lock_bh_held() for RCU-bh.
+ rcu_read_lock_sched_held() for RCU-sched.
+ srcu_read_lock_held() for SRCU.
+
+These functions are conservative, and will therefore return 1 if they
+aren't certain (for example, if CONFIG_DEBUG_LOCK_ALLOC is not set).
+This prevents things like WARN_ON(!rcu_read_lock_held()) from giving false
+positives when lockdep is disabled.
+
+In addition, a separate kernel config parameter CONFIG_PROVE_RCU enables
+checking of rcu_dereference() primitives:
+
+ rcu_dereference(p):
+ Check for RCU read-side critical section.
+ rcu_dereference_bh(p):
+ Check for RCU-bh read-side critical section.
+ rcu_dereference_sched(p):
+ Check for RCU-sched read-side critical section.
+ srcu_dereference(p, sp):
+ Check for SRCU read-side critical section.
+ rcu_dereference_check(p, c):
+ Use explicit check expression "c".
+ rcu_dereference_raw(p)
+ Don't check. (Use sparingly, if at all.)
+
+The rcu_dereference_check() check expression can be any boolean
+expression, but would normally include one of the rcu_read_lock_held()
+family of functions and a lockdep expression. However, any boolean
+expression can be used. For a moderately ornate example, consider
+the following:
+
+ file = rcu_dereference_check(fdt->fd[fd],
+ rcu_read_lock_held() ||
+ lockdep_is_held(&files->file_lock) ||
+ atomic_read(&files->count) == 1);
+
+This expression picks up the pointer "fdt->fd[fd]" in an RCU-safe manner,
+and, if CONFIG_PROVE_RCU is configured, verifies that this expression
+is used in:
+
+1. An RCU read-side critical section, or
+2. with files->file_lock held, or
+3. on an unshared files_struct.
+
+In case (1), the pointer is picked up in an RCU-safe manner for vanilla
+RCU read-side critical sections, in case (2) the ->file_lock prevents
+any change from taking place, and finally, in case (3) the current task
+is the only task accessing the file_struct, again preventing any change
+from taking place.
+
+There are currently only "universal" versions of the rcu_assign_pointer()
+and RCU list-/tree-traversal primitives, which do not (yet) check for
+being in an RCU read-side critical section. In the future, separate
+versions of these primitives might be created.
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 469a58b..1dc00ee 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -323,15 +323,17 @@ used as follows:
Defer Protect
a. synchronize_rcu() rcu_read_lock() / rcu_read_unlock()
- call_rcu()
+ call_rcu() rcu_dereference()
b. call_rcu_bh() rcu_read_lock_bh() / rcu_read_unlock_bh()
+ rcu_dereference_bh()
c. synchronize_sched() rcu_read_lock_sched() / rcu_read_unlock_sched()
preempt_disable() / preempt_enable()
local_irq_save() / local_irq_restore()
hardirq enter / hardirq exit
NMI enter / NMI exit
+ rcu_dereference_sched()
These three mechanisms are used as follows:
@@ -781,9 +783,8 @@ Linux-kernel source code, but it helps to have a full list of the
APIs, since there does not appear to be a way to categorize them
in docbook. Here is the list, by category.
-RCU pointer/list traversal:
+RCU list traversal:
- rcu_dereference
list_for_each_entry_rcu
hlist_for_each_entry_rcu
hlist_nulls_for_each_entry_rcu
@@ -809,7 +810,7 @@ RCU: Critical sections Grace period Barrier
rcu_read_lock synchronize_net rcu_barrier
rcu_read_unlock synchronize_rcu
- synchronize_rcu_expedited
+ rcu_dereference synchronize_rcu_expedited
call_rcu
@@ -817,7 +818,7 @@ bh: Critical sections Grace period Barrier
rcu_read_lock_bh call_rcu_bh rcu_barrier_bh
rcu_read_unlock_bh synchronize_rcu_bh
- synchronize_rcu_bh_expedited
+ rcu_dereference_bh synchronize_rcu_bh_expedited
sched: Critical sections Grace period Barrier
@@ -826,12 +827,14 @@ sched: Critical sections Grace period Barrier
rcu_read_unlock_sched call_rcu_sched
[preempt_disable] synchronize_sched_expedited
[and friends]
+ rcu_dereference_sched
SRCU: Critical sections Grace period Barrier
srcu_read_lock synchronize_srcu N/A
srcu_read_unlock synchronize_srcu_expedited
+ srcu_dereference
SRCU: Initialization/cleanup
init_srcu_struct
Acked-by: Eric Dumazet <eric.d...@gmail.com>
CC to netdev and David Miller, network maintainer.
Acked-by: Eric Dumazet <eric.d...@gmail.com>
Thanks Paul, great work !
--
> Le jeudi 11 f??vrier 2010 ?? 16:00 -0800, Paul E. McKenney a ??crit :
> > Update rcu_dereference() primitives to use new lockdep-based checking.
> > The rcu_dereference() in __in6_dev_get() may be protected either by
> > rcu_read_lock() or RTNL, per Eric Dumazet. The rcu_dereference()
> > in __sk_free() is protected by the fact that it is never reached if an
> > update could change it. Check for this by using rcu_dereference_check()
> > to verify that the struct sock's ->sk_wmem_alloc counter is zero.
> >
> > Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
>
> CC to netdev and David Miller, network maintainer.
>
> Acked-by: Eric Dumazet <eric.d...@gmail.com>
>
> Thanks Paul, great work !
Dave, does this look good to you too? Cannot pick up the rest of the patchset
without these checks/annotations into the RCU tree as there's too many
warnings triggering in the networking code. So it's an all-or-nothing
patchset in that regard.
Thanks,
Ingo
Isn't there a bug? Looks like data pointed to by rt should be
protected by RCU, but the rcu_read_lock is withdrawn before access.
Best Regards,
Michał Mirosław
Not really a bug, since we exit from dn_rt_cache_get_first() with
rcu_read_lock_bh()
We call the unlock only if NULL is returned, and rcu_dereference(NULL)
can be done in any context.
Paul had to move the rcu_dereference() so that no lockdep warning
triggers for rcu_dereference(NULL), its more a cleanup than a bug fix.
I'm almost thinking you can do something smart here and not replicate
that condition :-)
;-)
How about like the following? If this is agreeable, I will re-send the
patch stack with this included.
Thanx, Paul
------------------------------------------------------------------------
vfs: abstract rcu_dereference_check for files-fdtable use
Create an rcu_dereference_check_fdtable() that encapsulates the
rcu_dereference_check() condition for fcheck_files() use. This has
the beneficial side-effect of getting rid of a very long line.
Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
---
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 144412f..013dc52 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -57,11 +57,14 @@ struct files_struct {
struct file * fd_array[NR_OPEN_DEFAULT];
};
-#define files_fdtable(files) \
- (rcu_dereference_check((files)->fdt, \
+#define rcu_dereference_check_fdtable(files, fdtfd) \
+ (rcu_dereference_check((fdtfd), \
rcu_read_lock_held() || \
lockdep_is_held(&(files)->file_lock) || \
- atomic_read(&files->count) == 1))
+ atomic_read(&(files)->count) == 1))
+
+#define files_fdtable(files) \
+ (rcu_dereference_check_fdtable((files), (files)->fdt))
struct file_operations;
struct vfsmount;
@@ -82,7 +85,7 @@ static inline struct file * fcheck_files(struct files_struct *files, unsigned in
struct fdtable *fdt = files_fdtable(files);
if (fd < fdt->max_fds)
- file = rcu_dereference_check(fdt->fd[fd], rcu_read_lock_held() || lockdep_is_held(&files->file_lock) || atomic_read(&files->count) == 1);
+ file = rcu_dereference_check_fdtable(files, fdt->fd[fd]);
return file;
>
> * Eric Dumazet <eric.d...@gmail.com> wrote:
>
>> Le jeudi 11 f??vrier 2010 ?? 16:00 -0800, Paul E. McKenney a ??crit :
>> > Update rcu_dereference() primitives to use new lockdep-based checking.
>> > The rcu_dereference() in __in6_dev_get() may be protected either by
>> > rcu_read_lock() or RTNL, per Eric Dumazet. The rcu_dereference()
>> > in __sk_free() is protected by the fact that it is never reached if an
>> > update could change it. Check for this by using rcu_dereference_check()
>> > to verify that the struct sock's ->sk_wmem_alloc counter is zero.
>> >
>> > Signed-off-by: Paul E. McKenney <pau...@linux.vnet.ibm.com>
>>
>> CC to netdev and David Miller, network maintainer.
>>
>> Acked-by: Eric Dumazet <eric.d...@gmail.com>
>>
>> Thanks Paul, great work !
>
> Dave, does this look good to you too? Cannot pick up the rest of the patchset
> without these checks/annotations into the RCU tree as there's too many
> warnings triggering in the networking code. So it's an all-or-nothing
> patchset in that regard.
Looks good:
Acked-by: David S. Miller <da...@davemloft.net>