diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4e3387a..944fdfc 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -267,7 +267,7 @@ static inline void put_cred(const struct cred *_cred)
* Access the subjective credentials of the current task.
*/
#define current_cred() \
- (current->cred)
+ __rcu_dereference(current->cred)
/**
* __task_cred - Access a task's objective credentials
@@ -356,7 +356,7 @@ static inline void put_cred(const struct cred *_cred)
#define current_cred_xxx(xxx) \
({ \
- current->cred->xxx; \
+ __rcu_dereference(current->cred)->xxx; \
})
#define current_uid() (current_cred_xxx(uid))
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 038e16f..52a33eb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1361,9 +1361,9 @@ struct task_struct {
struct list_head cpu_timers[3];
/* process credentials */
- const struct cred *real_cred; /* objective and real subjective task
+ const struct cred __rcu *real_cred; /* objective and real subjective task
* credentials (COW) */
- const struct cred *cred; /* effective (overridable) subjective task
+ const struct cred __rcu *cred; /* effective (overridable) subjective task
* credentials (COW) */
struct mutex cred_guard_mutex; /* guard against foreign influences on
* credential calculations
diff --git a/kernel/cred.c b/kernel/cred.c
index 1ed8ca1..eed586d 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -171,8 +171,8 @@ void __put_cred(struct cred *cred)
cred->magic = CRED_MAGIC_DEAD;
cred->put_addr = __builtin_return_address(0);
#endif
- BUG_ON(cred == current->cred);
- BUG_ON(cred == current->real_cred);
+ BUG_ON(cred == __rcu_dereference(current->cred));
+ BUG_ON(cred == __rcu_dereference(current->real_cred));
call_rcu(&cred->rcu, put_cred_rcu);
}
@@ -183,23 +183,22 @@ EXPORT_SYMBOL(__put_cred);
*/
void exit_creds(struct task_struct *tsk)
{
- struct cred *cred;
+ struct cred *cred = (struct cred *)rcu_dereference(tsk->cred);
+ struct cred *real_cred = (struct cred *)rcu_dereference(tsk->real_cred);
- kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred,
- atomic_read(&tsk->cred->usage),
- read_cred_subscribers(tsk->cred));
+ kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid,
+ real_cred, cred, atomic_read(&cred->usage),
+ read_cred_subscribers(cred));
- cred = (struct cred *) tsk->real_cred;
tsk->real_cred = NULL;
validate_creds(cred);
alter_cred_subscribers(cred, -1);
put_cred(cred);
- cred = (struct cred *) tsk->cred;
tsk->cred = NULL;
- validate_creds(cred);
- alter_cred_subscribers(cred, -1);
- put_cred(cred);
+ validate_creds(real_cred);
+ alter_cred_subscribers(real_cred, -1);
+ put_cred(real_cred);
cred = (struct cred *) tsk->replacement_session_keyring;
if (cred) {
@@ -273,7 +272,7 @@ struct cred *prepare_creds(void)
kdebug("prepare_creds() alloc %p", new);
- old = task->cred;
+ old = rcu_dereference(task->cred);
memcpy(new, old, sizeof(struct cred));
atomic_set(&new->usage, 1);
@@ -415,23 +414,25 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
struct thread_group_cred *tgcred;
#endif
struct cred *new;
+ const struct cred *cred;
int ret;
mutex_init(&p->cred_guard_mutex);
+ cred = rcu_dereference(p->cred);
if (
#ifdef CONFIG_KEYS
- !p->cred->thread_keyring &&
+ !cred->thread_keyring &&
#endif
clone_flags & CLONE_THREAD
) {
- p->real_cred = get_cred(p->cred);
- get_cred(p->cred);
- alter_cred_subscribers(p->cred, 2);
+ rcu_assign_pointer(p->real_cred, get_cred(cred));
+ get_cred(cred);
+ alter_cred_subscribers(cred, 2);
kdebug("share_creds(%p{%d,%d})",
- p->cred, atomic_read(&p->cred->usage),
- read_cred_subscribers(p->cred));
- atomic_inc(&p->cred->user->processes);
+ cred, atomic_read(&cred->usage),
+ read_cred_subscribers(cred));
+ atomic_inc(&cred->user->processes);
return 0;
}
@@ -475,7 +476,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
#endif
atomic_inc(&new->user->processes);
- p->cred = p->real_cred = get_cred(new);
+ rcu_assign_pointer(p->cred, get_cred(new));
+ rcu_assign_pointer(p->real_cred, new);
alter_cred_subscribers(new, 2);
validate_creds(new);
return 0;
@@ -502,13 +504,13 @@ error_put:
int commit_creds(struct cred *new)
{
struct task_struct *task = current;
- const struct cred *old = task->real_cred;
+ const struct cred *old = __rcu_dereference(task->real_cred);
kdebug("commit_creds(%p{%d,%d})", new,
atomic_read(&new->usage),
read_cred_subscribers(new));
- BUG_ON(task->cred != old);
+ BUG_ON(__rcu_dereference(task->cred) != old);
#ifdef CONFIG_DEBUG_CREDENTIALS
BUG_ON(read_cred_subscribers(old) < 2);
validate_creds(old);
@@ -605,7 +607,7 @@ EXPORT_SYMBOL(abort_creds);
*/
const struct cred *override_creds(const struct cred *new)
{
- const struct cred *old = current->cred;
+ const struct cred *old = rcu_dereference(current->cred);
kdebug("override_creds(%p{%d,%d})", new,
atomic_read(&new->usage),
@@ -634,7 +636,7 @@ EXPORT_SYMBOL(override_creds);
*/
void revert_creds(const struct cred *old)
{
- const struct cred *override = current->cred;
+ const struct cred *override = rcu_dereference(current->cred);
kdebug("revert_creds(%p{%d,%d})", old,
atomic_read(&old->usage),
diff --git a/kernel/fork.c b/kernel/fork.c
index f88bd98..ba7489b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1032,10 +1032,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
retval = -EAGAIN;
- if (atomic_read(&p->real_cred->user->processes) >=
+ if (atomic_read(&rcu_dereference(p->real_cred)->user->processes) >=
p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
- p->real_cred->user != INIT_USER)
+ rcu_dereference(p->real_cred)->user != INIT_USER)
goto bad_fork_free;
}
--
1.6.3.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index a2ec74b..552a114 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -45,7 +45,7 @@ struct files_struct {
* read mostly part
*/
atomic_t count;
- struct fdtable *fdt;
+ struct fdtable __rcu *fdt;
struct fdtable fdtab;
/*
* written part on a separate cache line in SMP
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f14d925..6aa50e0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1282,7 +1282,7 @@ struct task_struct {
struct sched_info sched_info;
#endif
- struct list_head tasks;
+ struct rcu_list_head tasks;
struct plist_node pushable_tasks;
struct mm_struct *mm, *active_mm;
@@ -1340,7 +1340,7 @@ struct task_struct {
/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX];
- struct list_head thread_group;
+ struct rcu_list_head thread_group;
struct completion *vfork_done; /* for vfork() */
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
@@ -2240,7 +2240,7 @@ static inline struct task_struct *next_thread(const struct task_struct *p)
static inline int thread_group_empty(struct task_struct *p)
{
- return list_empty(&p->thread_group);
+ return list_empty_rcu(&p->thread_group);
}
#define delay_group_leader(p) \
diff --git a/kernel/exit.c b/kernel/exit.c
index 546774a..6d9bbf1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -85,7 +85,7 @@ static void __exit_signal(struct task_struct *tsk)
BUG_ON(!sig);
BUG_ON(!atomic_read(&sig->count));
- sighand = rcu_dereference(tsk->sighand);
+ sighand = tsk->sighand;
spin_lock(&sighand->siglock);
posix_cpu_timers_exit(tsk);
@@ -1180,18 +1180,18 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
if (unlikely(wo->wo_flags & WNOWAIT)) {
int exit_code = p->exit_code;
- int why, status;
+ int why, __status;
get_task_struct(p);
read_unlock(&tasklist_lock);
if ((exit_code & 0x7f) == 0) {
why = CLD_EXITED;
- status = exit_code >> 8;
+ __status = exit_code >> 8;
} else {
why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
- status = exit_code & 0x7f;
+ __status = exit_code & 0x7f;
}
- return wait_noreap_copyout(wo, p, pid, uid, why, status);
+ return wait_noreap_copyout(wo, p, pid, uid, why, __status);
}
/*
@@ -1616,7 +1616,7 @@ repeat:
*/
wo->notask_error = -ECHILD;
if ((wo->wo_type < PIDTYPE_MAX) &&
- (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))
+ (!wo->wo_pid || hlist_empty_rcu(&wo->wo_pid->tasks[wo->wo_type])))
goto notask;
set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/fork.c b/kernel/fork.c
index ba7489b..7a3bf5b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1230,7 +1230,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
* We dont wake it up yet.
*/
p->group_leader = p;
- INIT_LIST_HEAD(&p->thread_group);
+ INIT_LIST_HEAD_RCU(&p->thread_group);
/* Now that the task is set up, run cgroup callbacks if
* necessary. We need to run them before the task is visible
diff --git a/kernel/sched.c b/kernel/sched.c
index 3a8fb30..7eff482 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -52,6 +52,7 @@
#include <linux/threads.h>
#include <linux/timer.h>
#include <linux/rcupdate.h>
+#include <linux/rculist.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/percpu.h>
@@ -239,7 +240,7 @@ static DEFINE_MUTEX(sched_domains_mutex);
struct cfs_rq;
-static LIST_HEAD(task_groups);
+static LIST_HEAD_RCU(task_groups);
/* task group related information */
struct task_group {
@@ -267,11 +268,11 @@ struct task_group {
#endif
struct rcu_head rcu;
- struct list_head list;
+ struct rcu_list_head list;
struct task_group *parent;
- struct list_head siblings;
- struct list_head children;
+ struct rcu_list_head siblings;
+ struct rcu_list_head children;
};
#ifdef CONFIG_USER_SCHED
@@ -418,7 +419,7 @@ struct cfs_rq {
* leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
* list is used during load balance.
*/
- struct list_head leaf_cfs_rq_list;
+ struct rcu_list_head leaf_cfs_rq_list;
struct task_group *tg; /* group that "owns" this runqueue */
#ifdef CONFIG_SMP
@@ -476,7 +477,7 @@ struct rt_rq {
unsigned long rt_nr_boosted;
struct rq *rq;
- struct list_head leaf_rt_rq_list;
+ struct rcu_list_head leaf_rt_rq_list;
struct task_group *tg;
struct sched_rt_entity *rt_se;
#endif
@@ -547,10 +548,10 @@ struct rq {
#ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this cpu: */
- struct list_head leaf_cfs_rq_list;
+ struct rcu_list_head leaf_cfs_rq_list;
#endif
#ifdef CONFIG_RT_GROUP_SCHED
- struct list_head leaf_rt_rq_list;
+ struct rcu_list_head leaf_rt_rq_list;
#endif
/*
@@ -9423,7 +9424,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
init_cfs_rq(cfs_rq, rq);
cfs_rq->tg = tg;
if (add)
- list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
+ list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
tg->se[cpu] = se;
/* se could be NULL for init_task_group */
@@ -9455,7 +9456,7 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
rt_rq->rt_se = rt_se;
rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
if (add)
- list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
+ list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
tg->rt_se[cpu] = rt_se;
if (!rt_se)
@@ -9547,8 +9548,8 @@ void __init sched_init(void)
#endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_GROUP_SCHED
- list_add(&init_task_group.list, &task_groups);
- INIT_LIST_HEAD(&init_task_group.children);
+ list_add_rcu(&init_task_group.list, &task_groups);
+ INIT_LIST_HEAD_RCU(&init_task_group.children);
#ifdef CONFIG_USER_SCHED
INIT_LIST_HEAD(&root_task_group.children);
@@ -9573,7 +9574,7 @@ void __init sched_init(void)
init_rt_rq(&rq->rt, rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
init_task_group.shares = init_task_group_load;
- INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
+ INIT_LIST_HEAD_RCU(&rq->leaf_cfs_rq_list);
#ifdef CONFIG_CGROUP_SCHED
/*
* How much cpu bandwidth does init_task_group get?
@@ -9619,7 +9620,7 @@ void __init sched_init(void)
rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
#ifdef CONFIG_RT_GROUP_SCHED
- INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
+ INIT_LIST_HEAD_RCU(&rq->leaf_rt_rq_list);
#ifdef CONFIG_CGROUP_SCHED
init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL);
#elif defined CONFIG_USER_SCHED
@@ -10058,7 +10059,7 @@ struct task_group *sched_create_group(struct task_group *parent)
WARN_ON(!parent); /* root should already exist */
tg->parent = parent;
- INIT_LIST_HEAD(&tg->children);
+ INIT_LIST_HEAD_RCU(&tg->children);
list_add_rcu(&tg->siblings, &parent->children);
spin_unlock_irqrestore(&task_group_lock, flags);
The new macros are used in cases where not using the
regular accessors is proven to be correct.
Signed-off-by: Arnd Bergmann <ar...@arndb.de>
---
include/linux/compiler.h | 2 ++
include/linux/rcupdate.h | 46 +++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 43 insertions(+), 5 deletions(-)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 188fcae..6cc0857 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -10,6 +10,7 @@
# define __force __attribute__((force))
# define __nocast __attribute__((nocast))
# define __iomem __attribute__((noderef, address_space(2)))
+# define __rcu __attribute__((noderef, address_space(3)))
# define __acquires(x) __attribute__((context(x,0,1)))
# define __releases(x) __attribute__((context(x,1,0)))
# define __acquire(x) __context__(x,1)
@@ -25,6 +26,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
# define __force
# define __nocast
# define __iomem
+# define __rcu
# define __chk_user_ptr(x) (void)0
# define __chk_io_ptr(x) (void)0
# define __builtin_warning(x, y...) (1)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 24440f4..644e28c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -40,6 +40,7 @@
#include <linux/seqlock.h>
#include <linux/lockdep.h>
#include <linux/completion.h>
+#include <linux/compiler.h>
/**
* struct rcu_head - callback structure for use with RCU
@@ -225,13 +226,31 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
*
* Inserts memory barriers on architectures that require them
* (currently only the Alpha), and, more importantly, documents
- * exactly which pointers are protected by RCU.
+ * exactly which pointers are protected by RCU and checks that
+ * the pointer is annotated as __rcu.
*/
-
#define rcu_dereference(p) ({ \
- typeof(p) _________p1 = ACCESS_ONCE(p); \
+ typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
+ (void) (((typeof (*p) __rcu *)p) == p); \
smp_read_barrier_depends(); \
- (_________p1); \
+ ((typeof(*p) __force __kernel *)(_________p1)); \
+ })
+
+/**
+ * __rcu_dereference - fetch an __rcu pointer outside of a
+ * read-side critical section.
+ *
+ * __rcu_dereference does not contain any barrier but only
+ * converts a __rcu pointer to one that can be dereferenced.
+ * Use this for annotating code that operates on __rcu variables
+ * for checking with sparse in places where you can be sure
+ * that no writers exist, e.g. in a write-side critical section
+ * or in an RCU call.
+ */
+
+#define __rcu_dereference(p) ({ \
+ (void) (((typeof (*p) __rcu *)p) == p); \
+ ((typeof(*p) __force __kernel *)(p)); \
})
/**
@@ -252,9 +271,26 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
if (!__builtin_constant_p(v) || \
((v) != NULL)) \
smp_wmb(); \
- (p) = (v); \
+ (p) = (typeof(*v) __force __rcu *)(v); \
})
+/**
+ * __rcu_assign_pointer - assign a variable to an __rcu pointer
+ * without barriers.
+ * Using this is almost always a bug.
+ */
+#define __rcu_assign_pointer(p, v) \
+ ({ \
+ (p) = (typeof(*v) __force __rcu *)(v); \
+ })
+
+/**
+ * RCU_INIT_POINTER - initialize an RCU protected member
+ * in a statically allocated data structure.
+ */
+#define RCU_INIT_POINTER(p, v) \
+ p = (typeof(*v) __force __rcu *)(v)
+
/* Infrastructure to implement the synchronize_() primitives. */
struct rcu_synchronize {
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a177698..c0d85e3 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -569,11 +569,11 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
struct perf_event {
#ifdef CONFIG_PERF_EVENTS
struct list_head group_entry;
- struct list_head event_entry;
+ struct rcu_list_head event_entry;
struct list_head sibling_list;
int nr_siblings;
struct perf_event *group_leader;
- struct perf_event *output;
+ struct perf_event __rcu *output;
const struct pmu *pmu;
enum perf_event_active_state state;
@@ -634,7 +634,7 @@ struct perf_event {
/* mmap bits */
struct mutex mmap_mutex;
atomic_t mmap_count;
- struct perf_mmap_data *data;
+ struct perf_mmap_data __rcu *data;
/* poll related */
wait_queue_head_t waitq;
@@ -682,7 +682,7 @@ struct perf_event_context {
struct mutex mutex;
struct list_head group_list;
- struct list_head event_list;
+ struct rcu_list_head event_list;
int nr_events;
int nr_active;
int is_active;
@@ -700,7 +700,7 @@ struct perf_event_context {
* These fields let us detect when two contexts have both
* been cloned (inherited) from a common ancestor.
*/
- struct perf_event_context *parent_ctx;
+ struct perf_event_context __rcu *parent_ctx;
u64 parent_gen;
u64 generation;
int pin_count;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 52a33eb..f14d925 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1502,7 +1502,7 @@ struct task_struct {
struct futex_pi_state *pi_state_cache;
#endif
#ifdef CONFIG_PERF_EVENTS
- struct perf_event_context *perf_event_ctxp;
+ struct perf_event_context __rcu *perf_event_ctxp;
struct mutex perf_event_mutex;
struct list_head perf_event_list;
#endif
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 2ae7409..18701de 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -150,7 +150,7 @@ static void put_ctx(struct perf_event_context *ctx)
{
if (atomic_dec_and_test(&ctx->refcount)) {
if (ctx->parent_ctx)
- put_ctx(ctx->parent_ctx);
+ put_ctx(__rcu_dereference(ctx->parent_ctx));
if (ctx->task)
put_task_struct(ctx->task);
call_rcu(&ctx->rcu_head, free_ctx);
@@ -160,7 +160,7 @@ static void put_ctx(struct perf_event_context *ctx)
static void unclone_ctx(struct perf_event_context *ctx)
{
if (ctx->parent_ctx) {
- put_ctx(ctx->parent_ctx);
+ put_ctx(__rcu_dereference(ctx->parent_ctx));
ctx->parent_ctx = NULL;
}
}
@@ -1129,8 +1129,8 @@ static void __perf_event_sync_stat(struct perf_event *event,
perf_event_update_userpage(next_event);
}
-#define list_next_entry(pos, member) \
- list_entry(pos->member.next, typeof(*pos), member)
+#define list_next_entry_rcu(pos, member) \
+ list_entry_rcu(pos->member.next, typeof(*pos), member)
static void perf_event_sync_stat(struct perf_event_context *ctx,
struct perf_event_context *next_ctx)
@@ -1142,10 +1142,10 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
update_context_time(ctx);
- event = list_first_entry(&ctx->event_list,
+ event = list_first_entry_rcu(&ctx->event_list,
struct perf_event, event_entry);
- next_event = list_first_entry(&next_ctx->event_list,
+ next_event = list_first_entry_rcu(&next_ctx->event_list,
struct perf_event, event_entry);
while (&event->event_entry != &ctx->event_list &&
@@ -1153,8 +1153,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
__perf_event_sync_stat(event, next_event);
- event = list_next_entry(event, event_entry);
- next_event = list_next_entry(next_event, event_entry);
+ event = list_next_entry_rcu(event, event_entry);
+ next_event = list_next_entry_rcu(next_event, event_entry);
}
}
@@ -1173,7 +1173,7 @@ void perf_event_task_sched_out(struct task_struct *task,
struct task_struct *next, int cpu)
{
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
- struct perf_event_context *ctx = task->perf_event_ctxp;
+ struct perf_event_context *ctx = rcu_dereference(task->perf_event_ctxp);
struct perf_event_context *next_ctx;
struct perf_event_context *parent;
struct pt_regs *regs;
@@ -1187,7 +1187,7 @@ void perf_event_task_sched_out(struct task_struct *task,
rcu_read_lock();
parent = rcu_dereference(ctx->parent_ctx);
- next_ctx = next->perf_event_ctxp;
+ next_ctx = rcu_dereference(next->perf_event_ctxp);
if (parent && next_ctx &&
rcu_dereference(next_ctx->parent_ctx) == parent) {
/*
@@ -1206,8 +1206,8 @@ void perf_event_task_sched_out(struct task_struct *task,
* XXX do we need a memory barrier of sorts
* wrt to rcu_dereference() of perf_event_ctxp
*/
- task->perf_event_ctxp = next_ctx;
- next->perf_event_ctxp = ctx;
+ __rcu_assign_pointer(task->perf_event_ctxp, next_ctx);
+ __rcu_assign_pointer(next->perf_event_ctxp, ctx);
ctx->task = next;
next_ctx->task = task;
do_switch = 0;
@@ -1329,7 +1329,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
void perf_event_task_sched_in(struct task_struct *task, int cpu)
{
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
- struct perf_event_context *ctx = task->perf_event_ctxp;
+ struct perf_event_context *ctx = __rcu_dereference(task->perf_event_ctxp);
if (likely(!ctx))
return;
@@ -1470,7 +1470,7 @@ void perf_event_task_tick(struct task_struct *curr, int cpu)
return;
cpuctx = &per_cpu(perf_cpu_context, cpu);
- ctx = curr->perf_event_ctxp;
+ ctx = rcu_dereference(curr->perf_event_ctxp);
perf_ctx_adjust_freq(&cpuctx->ctx);
if (ctx)
@@ -1501,7 +1501,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
int enabled = 0;
local_irq_save(flags);
- ctx = task->perf_event_ctxp;
+ ctx = __rcu_dereference(task->perf_event_ctxp);
if (!ctx || !ctx->nr_events)
goto out;
@@ -1591,7 +1591,7 @@ __perf_event_init_context(struct perf_event_context *ctx,
raw_spin_lock_init(&ctx->lock);
mutex_init(&ctx->mutex);
INIT_LIST_HEAD(&ctx->group_list);
- INIT_LIST_HEAD(&ctx->event_list);
+ INIT_LIST_HEAD_RCU(&ctx->event_list);
atomic_set(&ctx->refcount, 1);
ctx->task = task;
}
@@ -2366,7 +2366,7 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
static void perf_mmap_data_release(struct perf_event *event)
{
- struct perf_mmap_data *data = event->data;
+ struct perf_mmap_data *data = __rcu_dereference(event->data);
WARN_ON(atomic_read(&event->mmap_count));
@@ -2387,7 +2387,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
WARN_ON_ONCE(event->ctx->parent_ctx);
if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
- unsigned long size = perf_data_size(event->data);
+ unsigned long size = perf_data_size(__rcu_dereference(event->data));
struct user_struct *user = current_user();
atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
@@ -4421,7 +4421,7 @@ perf_event_alloc(struct perf_event_attr *attr,
INIT_LIST_HEAD(&event->child_list);
INIT_LIST_HEAD(&event->group_entry);
- INIT_LIST_HEAD(&event->event_entry);
+ INIT_LIST_HEAD_RCU(&event->event_entry);
INIT_LIST_HEAD(&event->sibling_list);
init_waitqueue_head(&event->waitq);
@@ -4629,7 +4629,7 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
set:
mutex_lock(&event->mmap_mutex);
- old_output = event->output;
+ old_output = __rcu_dereference(event->output);
rcu_assign_pointer(event->output, output_event);
mutex_unlock(&event->mmap_mutex);
@@ -4999,7 +4999,7 @@ void perf_event_exit_task(struct task_struct *child)
* scheduled, so we are now safe from rescheduling changing
* our context.
*/
- child_ctx = child->perf_event_ctxp;
+ child_ctx = __rcu_dereference(child->perf_event_ctxp);
__perf_event_task_sched_out(child_ctx);
/*
@@ -5062,7 +5062,7 @@ again:
*/
void perf_event_free_task(struct task_struct *task)
{
- struct perf_event_context *ctx = task->perf_event_ctxp;
+ struct perf_event_context *ctx = __rcu_dereference(task->perf_event_ctxp);
struct perf_event *event, *tmp;
if (!ctx)
@@ -5160,7 +5160,7 @@ int perf_event_init_task(struct task_struct *child)
}
__perf_event_init_context(child_ctx, child);
- child->perf_event_ctxp = child_ctx;
+ __rcu_assign_pointer(child->perf_event_ctxp, child_ctx);
get_task_struct(child);
}
@@ -5183,13 +5183,13 @@ int perf_event_init_task(struct task_struct *child)
*/
cloned_ctx = rcu_dereference(parent_ctx->parent_ctx);
if (cloned_ctx) {
- child_ctx->parent_ctx = cloned_ctx;
+ __rcu_assign_pointer(child_ctx->parent_ctx, cloned_ctx);
child_ctx->parent_gen = parent_ctx->parent_gen;
} else {
- child_ctx->parent_ctx = parent_ctx;
+ __rcu_assign_pointer(child_ctx->parent_ctx, parent_ctx);
child_ctx->parent_gen = parent_ctx->generation;
}
- get_ctx(child_ctx->parent_ctx);
+ get_ctx(__rcu_dereference(child_ctx->parent_ctx));
}
mutex_unlock(&parent_ctx->mutex);
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index fee6c2f..f05f5e4 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -49,28 +49,28 @@
struct notifier_block {
int (*notifier_call)(struct notifier_block *, unsigned long, void *);
- struct notifier_block *next;
+ struct notifier_block __rcu *next;
int priority;
};
struct atomic_notifier_head {
spinlock_t lock;
- struct notifier_block *head;
+ struct notifier_block __rcu *head;
};
struct blocking_notifier_head {
struct rw_semaphore rwsem;
- struct notifier_block *head;
+ struct notifier_block __rcu *head;
};
struct raw_notifier_head {
- struct notifier_block *head;
+ struct notifier_block __rcu *head;
};
struct srcu_notifier_head {
struct mutex mutex;
struct srcu_struct srcu;
- struct notifier_block *head;
+ struct notifier_block __rcu *head;
};
#define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \
diff --git a/kernel/notifier.c b/kernel/notifier.c
index acd24e7..c0e8489 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -18,43 +18,43 @@ BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
* are layered on top of these, with appropriate locking added.
*/
-static int notifier_chain_register(struct notifier_block **nl,
+static int notifier_chain_register(struct notifier_block __rcu **nl,
struct notifier_block *n)
{
while ((*nl) != NULL) {
- if (n->priority > (*nl)->priority)
+ if (n->priority > __rcu_dereference(*nl)->priority)
break;
- nl = &((*nl)->next);
+ nl = __rcu_dereference(&((*nl)->next));
}
n->next = *nl;
rcu_assign_pointer(*nl, n);
return 0;
}
-static int notifier_chain_cond_register(struct notifier_block **nl,
+static int notifier_chain_cond_register(struct notifier_block __rcu **nl,
struct notifier_block *n)
{
while ((*nl) != NULL) {
- if ((*nl) == n)
+ if (__rcu_dereference(*nl) == n)
return 0;
- if (n->priority > (*nl)->priority)
+ if (n->priority > __rcu_dereference(*nl)->priority)
break;
- nl = &((*nl)->next);
+ nl = __rcu_dereference(&((*nl)->next));
}
n->next = *nl;
rcu_assign_pointer(*nl, n);
return 0;
}
-static int notifier_chain_unregister(struct notifier_block **nl,
+static int notifier_chain_unregister(struct notifier_block __rcu **nl,
struct notifier_block *n)
{
while ((*nl) != NULL) {
- if ((*nl) == n) {
+ if (__rcu_dereference(*nl) == n) {
rcu_assign_pointer(*nl, n->next);
return 0;
}
- nl = &((*nl)->next);
+ nl = __rcu_dereference(&((*nl)->next));
}
return -ENOENT;
}
@@ -71,7 +71,7 @@ static int notifier_chain_unregister(struct notifier_block **nl,
* @returns: notifier_call_chain returns the value returned by the
* last notifier function called.
*/
-static int __kprobes notifier_call_chain(struct notifier_block **nl,
+static int __kprobes notifier_call_chain(struct notifier_block __rcu **nl,
unsigned long val, void *v,
int nr_to_call, int *nr_calls)
{
When not running sparse, the types are defined to the
original list_head etc in order to not break working
setups that are lacking annotation.
Signed-off-by: Arnd Bergmann <ar...@arndb.de>
---
include/linux/rculist.h | 152 ++++++++++++++++++++++++++++++++++------------
1 files changed, 112 insertions(+), 40 deletions(-)
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 1bf0f70..dfbc6ea 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -9,16 +9,67 @@
#include <linux/list.h>
#include <linux/rcupdate.h>
+#ifdef __CHECKER__
+struct rcu_list_head {
+ struct rcu_list_head __rcu *next;
+ struct rcu_list_head *prev;
+};
+#define LIST_HEAD_INIT_RCU(name) { (struct rcu_list_head __force __rcu *)&(name), &(name) }
+
+#define LIST_HEAD_RCU(name) \
+ struct rcu_list_head name = LIST_HEAD_INIT_RCU(name)
+
+static inline void INIT_LIST_HEAD_RCU(struct rcu_list_head *list)
+{
+ __rcu_assign_pointer(list->next, list);
+ list->prev = list;
+}
+
+struct rcu_hlist_head {
+ struct rcu_hlist_node __rcu *first;
+};
+
+struct rcu_hlist_node {
+ struct rcu_hlist_node __rcu *next, **pprev;
+};
+
+#define HLIST_HEAD_INIT_RCU { .first = (void __rcu __force *)NULL }
+#define HLIST_HEAD_RCU(name) struct rcu_hlist_head name = \
+ { .first = (void __rcu __force *)NULL }
+#define INIT_HLIST_HEAD_RCU(ptr) ((ptr)->first = (void __rcu __force *)NULL)
+static inline void INIT_HLIST_NODE_RCU(struct rcu_hlist_node *h)
+{
+ __rcu_assign_pointer(h->next, NULL);
+ h->pprev = NULL;
+}
+
+#else /* !__CHECKER__ */
+
+#define rcu_list_head list_head
+#define LIST_HEAD_INIT_RCU LIST_HEAD_INIT
+#define LIST_HEAD_RCU LIST_HEAD
+#define INIT_LIST_HEAD_RCU INIT_LIST_HEAD
+
+#define rcu_hlist_head hlist_head
+#define rcu_hlist_node hlist_node
+#define HLIST_HEAD_INIT_RCU HLIST_HEAD_INIT
+#define HLIST_HEAD_RCU HLIST_HEAD
+#define INIT_HLIST_HEAD_RCU INIT_HLIST_HEAD
+#define INIT_HLIST_NODE_RCU INIT_HLIST_NODE
+
+#endif /* !__CHECKER__ */
+
+
/*
* Insert a new entry between two known consecutive entries.
*
* This is only for internal list manipulation where we know
* the prev/next entries already!
*/
-static inline void __list_add_rcu(struct list_head *new,
- struct list_head *prev, struct list_head *next)
+static inline void __list_add_rcu(struct rcu_list_head *new,
+ struct rcu_list_head *prev, struct rcu_list_head *next)
{
- new->next = next;
+ __rcu_assign_pointer(new->next, next);
new->prev = prev;
rcu_assign_pointer(prev->next, new);
next->prev = new;
@@ -40,9 +91,9 @@ static inline void __list_add_rcu(struct list_head *new,
* the _rcu list-traversal primitives, such as
* list_for_each_entry_rcu().
*/
-static inline void list_add_rcu(struct list_head *new, struct list_head *head)
+static inline void list_add_rcu(struct rcu_list_head *new, struct rcu_list_head *head)
{
- __list_add_rcu(new, head, head->next);
+ __list_add_rcu(new, head, __rcu_dereference(head->next));
}
/**
@@ -61,8 +112,8 @@ static inline void list_add_rcu(struct list_head *new, struct list_head *head)
* the _rcu list-traversal primitives, such as
* list_for_each_entry_rcu().
*/
-static inline void list_add_tail_rcu(struct list_head *new,
- struct list_head *head)
+static inline void list_add_tail_rcu(struct rcu_list_head *new,
+ struct rcu_list_head *head)
{
__list_add_rcu(new, head->prev, head);
}
@@ -91,13 +142,29 @@ static inline void list_add_tail_rcu(struct list_head *new,
* or call_rcu() must be used to defer freeing until an RCU
* grace period has elapsed.
*/
-static inline void list_del_rcu(struct list_head *entry)
+static inline void __list_del_rcu(struct rcu_list_head *prev, struct rcu_list_head *next)
{
- __list_del(entry->prev, entry->next);
+ next->prev = prev;
+ __rcu_assign_pointer(prev->next, next);
+}
+
+static inline void list_del_rcu(struct rcu_list_head *entry)
+{
+ __list_del_rcu(entry->prev, __rcu_dereference(entry->next));
entry->prev = LIST_POISON2;
}
/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty_rcu(const struct rcu_list_head *head)
+{
+ return rcu_dereference(head->next) == head;
+}
+
+
+/**
* hlist_del_init_rcu - deletes entry from hash list with re-initialization
* @n: the element to delete from the hash list.
*
@@ -117,7 +184,7 @@ static inline void list_del_rcu(struct list_head *entry)
* perfectly legal to run concurrently with the _rcu list-traversal
* primitives, such as hlist_for_each_entry_rcu().
*/
-static inline void hlist_del_init_rcu(struct hlist_node *n)
+static inline void hlist_del_init_rcu(struct rcu_hlist_node *n)
{
if (!hlist_unhashed(n)) {
__hlist_del(n);
@@ -133,13 +200,13 @@ static inline void hlist_del_init_rcu(struct hlist_node *n)
* The @old entry will be replaced with the @new entry atomically.
* Note: @old should not be empty.
*/
-static inline void list_replace_rcu(struct list_head *old,
- struct list_head *new)
+static inline void list_replace_rcu(struct rcu_list_head *old,
+ struct rcu_list_head *new)
{
new->next = old->next;
new->prev = old->prev;
- rcu_assign_pointer(new->prev->next, new);
- new->next->prev = new;
+ __rcu_assign_pointer(new->prev->next, new);
+ rcu_dereference(new->next)->prev = new;
old->prev = LIST_POISON2;
}
@@ -160,13 +227,13 @@ static inline void list_replace_rcu(struct list_head *old,
* based on call_rcu() could be created. But only if -really-
* needed -- there is no shortage of RCU API members.
*/
-static inline void list_splice_init_rcu(struct list_head *list,
- struct list_head *head,
+static inline void list_splice_init_rcu(struct rcu_list_head *list,
+ struct rcu_list_head *head,
void (*sync)(void))
{
- struct list_head *first = list->next;
- struct list_head *last = list->prev;
- struct list_head *at = head->next;
+ struct rcu_list_head *first = __rcu_dereference(list->next);
+ struct rcu_list_head *last = list->prev;
+ struct rcu_list_head *at = __rcu_dereference(head->next);
if (list_empty(head))
return;
@@ -192,7 +259,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
* this function.
*/
- last->next = at;
+ __rcu_assign_pointer(last->next,at);
rcu_assign_pointer(head->next, first);
first->prev = head;
at->prev = last;
@@ -200,7 +267,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
/**
* list_entry_rcu - get the struct for this entry
- * @ptr: the &struct list_head pointer.
+ * @ptr: the &struct rcu_list_head pointer.
* @type: the type of the struct this is embedded in.
* @member: the name of the list_struct within the struct.
*
@@ -241,13 +308,13 @@ static inline void list_splice_init_rcu(struct list_head *list,
*/
#define list_for_each_entry_rcu(pos, head, member) \
for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
- prefetch(pos->member.next), &pos->member != (head); \
+ prefetch(__rcu_dereference(pos->member.next)), &pos->member != (head); \
pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
/**
* list_for_each_continue_rcu
- * @pos: the &struct list_head to use as a loop cursor.
+ * @pos: the &struct rcu_list_head to use as a loop cursor.
* @head: the head for your list.
*
* Iterate over an rcu-protected list, continuing after current point.
@@ -294,9 +361,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
* the _rcu list-traversal primitives, such as
* hlist_for_each_entry().
*/
-static inline void hlist_del_rcu(struct hlist_node *n)
+static inline void hlist_del_rcu(struct rcu_hlist_node *n)
{
- __hlist_del(n);
+ __hlist_del((struct hlist_node *)n);
n->pprev = LIST_POISON2;
}
@@ -307,16 +374,16 @@ static inline void hlist_del_rcu(struct hlist_node *n)
*
* The @old entry will be replaced with the @new entry atomically.
*/
-static inline void hlist_replace_rcu(struct hlist_node *old,
- struct hlist_node *new)
+static inline void hlist_replace_rcu(struct rcu_hlist_node *old,
+ struct rcu_hlist_node *new)
{
- struct hlist_node *next = old->next;
+ struct rcu_hlist_node __rcu *next = old->next;
new->next = next;
new->pprev = old->pprev;
rcu_assign_pointer(*new->pprev, new);
if (next)
- new->next->pprev = &new->next;
+ __rcu_dereference(new->next)->pprev = &new->next;
old->pprev = LIST_POISON2;
}
@@ -339,12 +406,12 @@ static inline void hlist_replace_rcu(struct hlist_node *old,
* problems on Alpha CPUs. Regardless of the type of CPU, the
* list-traversal primitive must be guarded by rcu_read_lock().
*/
-static inline void hlist_add_head_rcu(struct hlist_node *n,
- struct hlist_head *h)
+static inline void hlist_add_head_rcu(struct rcu_hlist_node *n,
+ struct rcu_hlist_head *h)
{
- struct hlist_node *first = h->first;
+ struct rcu_hlist_node *first = __rcu_dereference(h->first);
- n->next = first;
+ __rcu_assign_pointer(n->next, first);
n->pprev = &h->first;
rcu_assign_pointer(h->first, n);
if (first)
@@ -369,8 +436,8 @@ static inline void hlist_add_head_rcu(struct hlist_node *n,
* hlist_for_each_entry_rcu(), used to prevent memory-consistency
* problems on Alpha CPUs.
*/
-static inline void hlist_add_before_rcu(struct hlist_node *n,
- struct hlist_node *next)
+static inline void hlist_add_before_rcu(struct rcu_hlist_node *n,
+ struct rcu_hlist_node *next)
{
n->pprev = next->pprev;
n->next = next;
@@ -396,8 +463,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
* hlist_for_each_entry_rcu(), used to prevent memory-consistency
* problems on Alpha CPUs.
*/
-static inline void hlist_add_after_rcu(struct hlist_node *prev,
- struct hlist_node *n)
+static inline void hlist_add_after_rcu(struct rcu_hlist_node *prev,
+ struct rcu_hlist_node *n)
{
n->next = prev->next;
n->pprev = &prev->next;
@@ -406,12 +473,17 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
n->next->pprev = &n->next;
}
+static inline int hlist_empty_rcu(const struct rcu_hlist_head *h)
+{
+ return !__rcu_dereference(h->first);
+}
+
/**
* hlist_for_each_entry_rcu - iterate over rcu list of given type
* @tpos: the type * to use as a loop cursor.
- * @pos: the &struct hlist_node to use as a loop cursor.
+ * @pos: the &struct rcu_hlist_node to use as a loop cursor.
* @head: the head for your list.
- * @member: the name of the hlist_node within the struct.
+ * @member: the name of the rcu_hlist_node within the struct.
*
* This list-traversal primitive may safely run concurrently with
* the _rcu list-mutation primitives such as hlist_add_head_rcu()
@@ -419,7 +491,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
*/
#define hlist_for_each_entry_rcu(tpos, pos, head, member) \
for (pos = rcu_dereference((head)->first); \
- pos && ({ prefetch(pos->next); 1; }) && \
+ pos && ({ prefetch(__rcu_dereference(pos->next)); 1; }) && \
({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
pos = rcu_dereference(pos->next))
diff --git a/kernel/audit.c b/kernel/audit.c
index 5feed23..6115194 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -116,7 +116,7 @@ static atomic_t audit_lost = ATOMIC_INIT(0);
static struct sock *audit_sock;
/* Hash for inode-based rules */
-struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
+struct rcu_list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
/* The audit_freelist is a list of pre-allocated audit buffers (if more
* than AUDIT_MAXFREE are in use, the audit buffer is freed instead of
@@ -985,7 +985,7 @@ static int __init audit_init(void)
audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
- INIT_LIST_HEAD(&audit_inode_hash[i]);
+ INIT_LIST_HEAD_RCU(&audit_inode_hash[i]);
return 0;
}
diff --git a/kernel/audit.h b/kernel/audit.h
index 208687b..bd4335c 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -58,7 +58,7 @@ struct audit_tree;
struct audit_chunk;
struct audit_entry {
- struct list_head list;
+ struct rcu_list_head list;
struct rcu_head rcu;
struct audit_krule rule;
};
@@ -71,7 +71,7 @@ extern int audit_ever_enabled;
extern int audit_pid;
#define AUDIT_INODE_BUCKETS 32
-extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
+extern struct rcu_list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
static inline int audit_hash_ino(u32 ino)
{
@@ -101,7 +101,7 @@ extern int selinux_audit_rule_update(void);
extern struct mutex audit_filter_mutex;
extern void audit_free_rule_rcu(struct rcu_head *);
-extern struct list_head audit_filter_list[];
+extern struct rcu_list_head audit_filter_list[];
/* audit watch functions */
extern unsigned long audit_watch_inode(struct audit_watch *watch);
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 4b05bd9..dfebdbb 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -20,7 +20,7 @@ struct audit_tree {
};
struct audit_chunk {
- struct list_head hash;
+ struct rcu_list_head hash;
struct inotify_watch watch;
struct list_head trees; /* with root here */
int dead;
@@ -121,7 +121,7 @@ static struct audit_chunk *alloc_chunk(int count)
if (!chunk)
return NULL;
- INIT_LIST_HEAD(&chunk->hash);
+ INIT_LIST_HEAD_RCU(&chunk->hash);
INIT_LIST_HEAD(&chunk->trees);
chunk->count = count;
atomic_long_set(&chunk->refs, 1);
@@ -157,10 +157,10 @@ static void __put_chunk(struct rcu_head *rcu)
}
enum {HASH_SIZE = 128};
-static struct list_head chunk_hash_heads[HASH_SIZE];
+static struct rcu_list_head chunk_hash_heads[HASH_SIZE];
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock);
-static inline struct list_head *chunk_hash(const struct inode *inode)
+static inline struct rcu_list_head *chunk_hash(const struct inode *inode)
{
unsigned long n = (unsigned long)inode / L1_CACHE_BYTES;
return chunk_hash_heads + n % HASH_SIZE;
@@ -169,14 +169,14 @@ static inline struct list_head *chunk_hash(const struct inode *inode)
/* hash_lock is held by caller */
static void insert_hash(struct audit_chunk *chunk)
{
- struct list_head *list = chunk_hash(chunk->watch.inode);
+ struct rcu_list_head *list = chunk_hash(chunk->watch.inode);
list_add_rcu(&chunk->hash, list);
}
/* called under rcu_read_lock */
struct audit_chunk *audit_tree_lookup(const struct inode *inode)
{
- struct list_head *list = chunk_hash(inode);
+ struct rcu_list_head *list = chunk_hash(inode);
struct audit_chunk *p;
list_for_each_entry_rcu(p, list, hash) {
@@ -959,7 +959,7 @@ static int __init audit_tree_init(void)
audit_panic("cannot initialize inotify handle for rectree watches");
for (i = 0; i < HASH_SIZE; i++)
- INIT_LIST_HEAD(&chunk_hash_heads[i]);
+ INIT_LIST_HEAD_RCU(&chunk_hash_heads[i]);
return 0;
}
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index a706040..0de3cdb 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -44,13 +44,13 @@
*/
/* Audit filter lists, defined in <linux/audit.h> */
-struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
- LIST_HEAD_INIT(audit_filter_list[0]),
- LIST_HEAD_INIT(audit_filter_list[1]),
- LIST_HEAD_INIT(audit_filter_list[2]),
- LIST_HEAD_INIT(audit_filter_list[3]),
- LIST_HEAD_INIT(audit_filter_list[4]),
- LIST_HEAD_INIT(audit_filter_list[5]),
+struct rcu_list_head audit_filter_list[AUDIT_NR_FILTERS] = {
+ LIST_HEAD_INIT_RCU(audit_filter_list[0]),
+ LIST_HEAD_INIT_RCU(audit_filter_list[1]),
+ LIST_HEAD_INIT_RCU(audit_filter_list[2]),
+ LIST_HEAD_INIT_RCU(audit_filter_list[3]),
+ LIST_HEAD_INIT_RCU(audit_filter_list[4]),
+ LIST_HEAD_INIT_RCU(audit_filter_list[5]),
#if AUDIT_NR_FILTERS != 6
#error Fix audit_filter_list initialiser
#endif
@@ -821,10 +821,10 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old,
/* Find an existing audit rule.
* Caller must hold audit_filter_mutex to prevent stale rule data. */
static struct audit_entry *audit_find_rule(struct audit_entry *entry,
- struct list_head **p)
+ struct rcu_list_head **p)
{
struct audit_entry *e, *found = NULL;
- struct list_head *list;
+ struct rcu_list_head *list;
int h;
if (entry->rule.inode_f) {
@@ -834,7 +834,7 @@ static struct audit_entry *audit_find_rule(struct audit_entry *entry,
/* we don't know the inode number, so must walk entire hash */
for (h = 0; h < AUDIT_INODE_BUCKETS; h++) {
list = &audit_inode_hash[h];
- list_for_each_entry(e, list, list)
+ list_for_each_entry_rcu(e, list, list)
if (!audit_compare_rule(&entry->rule, &e->rule)) {
found = e;
goto out;
@@ -845,7 +845,7 @@ static struct audit_entry *audit_find_rule(struct audit_entry *entry,
*p = list = &audit_filter_list[entry->rule.listnr];
}
- list_for_each_entry(e, list, list)
+ list_for_each_entry_rcu(e, list, list)
if (!audit_compare_rule(&entry->rule, &e->rule)) {
found = e;
goto out;
@@ -864,7 +864,7 @@ static inline int audit_add_rule(struct audit_entry *entry)
struct audit_entry *e;
struct audit_watch *watch = entry->rule.watch;
struct audit_tree *tree = entry->rule.tree;
- struct list_head *list;
+ struct rcu_list_head *list;
int h, err;
#ifdef CONFIG_AUDITSYSCALL
int dont_count = 0;
@@ -947,7 +947,7 @@ static inline int audit_del_rule(struct audit_entry *entry)
struct audit_entry *e;
struct audit_watch *watch = entry->rule.watch;
struct audit_tree *tree = entry->rule.tree;
- struct list_head *list;
+ struct rcu_list_head *list;
LIST_HEAD(inotify_list);
int ret = 0;
#ifdef CONFIG_AUDITSYSCALL
@@ -1296,7 +1296,7 @@ int audit_filter_type(int type)
int result = 0;
rcu_read_lock();
- if (list_empty(&audit_filter_list[AUDIT_FILTER_TYPE]))
+ if (list_empty_rcu(&audit_filter_list[AUDIT_FILTER_TYPE]))
goto unlock_and_return;
list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TYPE],
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index fc0f928..e4fdd74 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -686,7 +686,7 @@ static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
*/
static enum audit_state audit_filter_syscall(struct task_struct *tsk,
struct audit_context *ctx,
- struct list_head *list)
+ struct rcu_list_head *list)
{
struct audit_entry *e;
enum audit_state state;
@@ -695,7 +695,7 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
return AUDIT_DISABLED;
rcu_read_lock();
- if (!list_empty(list)) {
+ if (!list_empty_rcu(list)) {
int word = AUDIT_WORD(ctx->major);
int bit = AUDIT_BIT(ctx->major);
@@ -733,9 +733,9 @@ void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx)
int bit = AUDIT_BIT(ctx->major);
struct audit_names *n = &ctx->names[i];
int h = audit_hash_ino((u32)n->ino);
- struct list_head *list = &audit_inode_hash[h];
+ struct rcu_list_head *list = &audit_inode_hash[h];
- if (list_empty(list))
+ if (list_empty_rcu(list))
continue;
list_for_each_entry_rcu(e, list, list) {
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 0008dee..832092d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -66,7 +66,7 @@ struct cgroup_subsys_state {
unsigned long flags;
/* ID for this css, if possible */
- struct css_id *id;
+ struct css_id __rcu *id;
};
/* bits in struct cgroup_subsys_state flags field */
@@ -190,7 +190,7 @@ struct cgroup {
struct list_head children; /* my children */
struct cgroup *parent; /* my parent */
- struct dentry *dentry; /* cgroup fs entry, RCU protected */
+ struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */
/* Private pointers for each registered subsystem */
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
@@ -486,7 +486,7 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state(
static inline struct cgroup_subsys_state *task_subsys_state(
struct task_struct *task, int subsys_id)
{
- return rcu_dereference(task->cgroups->subsys[subsys_id]);
+ return rcu_dereference(task->cgroups)->subsys[subsys_id];
}
static inline struct cgroup* task_cgroup(struct task_struct *task,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78efe7c..038e16f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1489,7 +1489,7 @@ struct task_struct {
#endif
#ifdef CONFIG_CGROUPS
/* Control Group info protected by css_set_lock */
- struct css_set *cgroups;
+ struct css_set __rcu *cgroups;
/* cg_list protected by css_set_lock and tsk->alloc_lock */
struct list_head cg_list;
#endif
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index aa3bee5..e0f379e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -127,7 +127,7 @@ struct css_id {
* is called after synchronize_rcu(). But for safe use, css_is_removed()
* css_tryget() should be used for avoiding race.
*/
- struct cgroup_subsys_state *css;
+ struct cgroup_subsys_state __rcu *css;
/*
* ID of this css.
*/
@@ -605,7 +605,7 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
* task can't change groups, so the only thing that can happen
* is that it exits and its css is set back to init_css_set.
*/
- css = task->cgroups;
+ css = __rcu_dereference(task->cgroups);
if (css == &init_css_set) {
res = &root->top_cgroup;
} else {
@@ -850,7 +850,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
*
* CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
*/
-DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
+static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
{
@@ -1081,9 +1081,11 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
struct cgroupfs_root *root = sb->s_fs_info;
struct cgroup *cgrp = &root->top_cgroup;
struct cgroup_sb_opts opts;
+ struct dentry *dentry;
lock_kernel();
- mutex_lock(&cgrp->dentry->d_inode->i_mutex);
+ dentry = __rcu_dereference(cgrp->dentry);
+ mutex_lock(&dentry->d_inode->i_mutex);
mutex_lock(&cgroup_mutex);
/* See what subsystems are wanted */
@@ -1116,7 +1118,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
kfree(opts.release_agent);
kfree(opts.name);
mutex_unlock(&cgroup_mutex);
- mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
+ mutex_unlock(&dentry->d_inode->i_mutex);
unlock_kernel();
return ret;
}
@@ -1375,7 +1377,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
root_count++;
sb->s_root->d_fsdata = root_cgrp;
- root->top_cgroup.dentry = sb->s_root;
+ __rcu_assign_pointer(root->top_cgroup.dentry, sb->s_root);
/* Link the top cgroup in this hierarchy into all
* the css_set objects */
@@ -1513,7 +1515,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
int len = dentry->d_name.len;
if ((start -= len) < buf)
return -ENAMETOOLONG;
- memcpy(start, cgrp->dentry->d_name.name, len);
+ memcpy(start, dentry->d_name.name, len);
cgrp = cgrp->parent;
if (!cgrp)
break;
@@ -1559,7 +1561,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
}
task_lock(tsk);
- cg = tsk->cgroups;
+ cg = __rcu_dereference(tsk->cgroups);
get_css_set(cg);
task_unlock(tsk);
/*
@@ -1986,7 +1988,7 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
struct dentry *parent;
int error = 0;
- parent = cgrp->parent->dentry;
+ parent = __rcu_dereference(cgrp->parent->dentry);
error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
if (!error) {
dentry->d_fsdata = cgrp;
@@ -2030,7 +2032,7 @@ int cgroup_add_file(struct cgroup *cgrp,
struct cgroup_subsys *subsys,
const struct cftype *cft)
{
- struct dentry *dir = cgrp->dentry;
+ struct dentry *dir = __rcu_dereference(cgrp->dentry);
struct dentry *dentry;
int error;
mode_t mode;
@@ -2135,7 +2137,7 @@ static void cgroup_enable_task_cg_lists(void)
* entry won't be deleted though the process has exited.
*/
if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
- list_add(&p->cg_list, &p->cgroups->tasks);
+ list_add(&p->cg_list, &__rcu_dereference(p->cgroups)->tasks);
task_unlock(p);
} while_each_thread(g, p);
write_unlock(&css_set_lock);
@@ -2828,7 +2830,7 @@ static int cgroup_populate_dir(struct cgroup *cgrp)
struct cgroup_subsys *ss;
/* First clear out any existing files */
- cgroup_clear_directory(cgrp->dentry);
+ cgroup_clear_directory(__rcu_dereference(cgrp->dentry));
err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
if (err < 0)
@@ -2852,7 +2854,7 @@ static int cgroup_populate_dir(struct cgroup *cgrp)
* from RCU-read-side without locks.
*/
if (css->id)
- rcu_assign_pointer(css->id->css, css);
+ rcu_assign_pointer(__rcu_dereference(css->id)->css, css);
}
return 0;
@@ -2960,13 +2962,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
goto err_remove;
/* The cgroup directory was pre-locked for us */
- BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
+ BUG_ON(!mutex_is_locked(&__rcu_dereference(cgrp->dentry)->d_inode->i_mutex));
err = cgroup_populate_dir(cgrp);
/* If err < 0, we have a half-filled directory - oh well ;) */
mutex_unlock(&cgroup_mutex);
- mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
+ mutex_unlock(&__rcu_dereference(cgrp->dentry)->d_inode->i_mutex);
return 0;
@@ -3164,8 +3166,8 @@ again:
list_del(&cgrp->sibling);
cgroup_unlock_hierarchy(cgrp->root);
- spin_lock(&cgrp->dentry->d_lock);
- d = dget(cgrp->dentry);
+ spin_lock(&__rcu_dereference(cgrp->dentry)->d_lock);
+ d = dget(__rcu_dereference(cgrp->dentry));
spin_unlock(&d->d_lock);
cgroup_d_remove_dir(d);
@@ -3203,7 +3205,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
/* At system boot, before all subsystems have been
* registered, no tasks have been forked, so we don't
* need to invoke fork callbacks here. */
- BUG_ON(!list_empty(&init_task.tasks));
+ BUG_ON(!list_empty_rcu(&init_task.tasks));
mutex_init(&ss->hierarchy_mutex);
lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
@@ -3226,7 +3228,7 @@ int __init cgroup_init_early(void)
css_set_count = 1;
init_cgroup_root(&rootnode);
root_count = 1;
- init_task.cgroups = &init_css_set;
+ __rcu_assign_pointer(init_task.cgroups, &init_css_set);
init_css_set_link.cg = &init_css_set;
init_css_set_link.cgrp = dummytop;
@@ -3425,7 +3427,7 @@ void cgroup_fork(struct task_struct *child)
{
task_lock(current);
child->cgroups = current->cgroups;
- get_css_set(child->cgroups);
+ get_css_set(__rcu_dereference(child->cgroups));
task_unlock(current);
INIT_LIST_HEAD(&child->cg_list);
}
@@ -3465,7 +3467,8 @@ void cgroup_post_fork(struct task_struct *child)
write_lock(&css_set_lock);
task_lock(child);
if (list_empty(&child->cg_list))
- list_add(&child->cg_list, &child->cgroups->tasks);
+ list_add(&child->cg_list,
+ &__rcu_dereference(child->cgroups)->tasks);
task_unlock(child);
write_unlock(&css_set_lock);
}
@@ -3532,8 +3535,8 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
/* Reassign the task to the init_css_set. */
task_lock(tsk);
- cg = tsk->cgroups;
- tsk->cgroups = &init_css_set;
+ cg = __rcu_dereference(tsk->cgroups);
+ __rcu_assign_pointer(tsk->cgroups, &init_css_set);
task_unlock(tsk);
if (cg)
put_css_set_taskexit(cg);
@@ -3583,19 +3586,19 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
/* Keep the cgroup alive */
task_lock(tsk);
parent = task_cgroup(tsk, subsys->subsys_id);
- cg = tsk->cgroups;
+ cg = __rcu_dereference(tsk->cgroups);
get_css_set(cg);
task_unlock(tsk);
mutex_unlock(&cgroup_mutex);
/* Now do the VFS work to create a cgroup */
- inode = parent->dentry->d_inode;
+ inode = __rcu_dereference(parent->dentry)->d_inode;
/* Hold the parent directory mutex across this operation to
* stop anyone else deleting the new cgroup */
mutex_lock(&inode->i_mutex);
- dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
+ dentry = lookup_one_len(nodename, __rcu_dereference(parent->dentry), strlen(nodename));
if (IS_ERR(dentry)) {
printk(KERN_INFO
"cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
@@ -3864,7 +3867,7 @@ static void __free_css_id_cb(struct rcu_head *head)
void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
{
- struct css_id *id = css->id;
+ struct css_id *id = __rcu_dereference(css->id);
/* When this is called before css_id initialization, id can be NULL */
if (!id)
return;
@@ -3941,8 +3944,8 @@ static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss)
return PTR_ERR(newid);
newid->stack[0] = newid->id;
- newid->css = rootcss;
- rootcss->id = newid;
+ __rcu_assign_pointer(newid->css, rootcss);
+ __rcu_assign_pointer(rootcss->id, newid);
return 0;
}
@@ -3957,7 +3960,7 @@ static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
parent_css = parent->subsys[subsys_id];
child_css = child->subsys[subsys_id];
depth = css_depth(parent_css) + 1;
- parent_id = parent_css->id;
+ parent_id = __rcu_dereference(parent_css->id);
child_id = get_new_cssid(ss, depth);
if (IS_ERR(child_id))
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ba401fa..42d4851 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2434,7 +2434,7 @@ void cpuset_print_task_mems_allowed(struct task_struct *tsk)
{
struct dentry *dentry;
- dentry = task_cs(tsk)->css.cgroup->dentry;
+ dentry = __rcu_dereference(task_cs(tsk)->css.cgroup->dentry);
spin_lock(&cpuset_buffer_lock);
snprintf(cpuset_name, CPUSET_NAME_LEN,
dentry ? (const char *)dentry->d_name.name : "/");
Signed-off-by: Arnd Bergmann <ar...@arndb.de>
---
include/linux/module.h | 4 ++--
kernel/module.c | 20 +++++++++++++-------
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/include/linux/module.h b/include/linux/module.h
index 6cb1a3c..94ce22e 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -6,7 +6,7 @@
* Rewritten by Richard Henderson <r...@tamu.edu> Dec 1996
* Rewritten again by Rusty Russell, 2002
*/
-#include <linux/list.h>
+#include <linux/rculist.h>
#include <linux/stat.h>
#include <linux/compiler.h>
#include <linux/cache.h>
@@ -238,7 +238,7 @@ struct module
enum module_state state;
/* Member of list of modules */
- struct list_head list;
+ struct rcu_list_head list;
/* Unique handle for this module */
char name[MODULE_NAME_LEN];
diff --git a/kernel/module.c b/kernel/module.c
index f82386b..d8b7603 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -78,7 +78,7 @@ EXPORT_TRACEPOINT_SYMBOL(module_get);
* (delete uses stop_machine/add uses RCU list operations). */
DEFINE_MUTEX(module_mutex);
EXPORT_SYMBOL_GPL(module_mutex);
-static LIST_HEAD(modules);
+static LIST_HEAD_RCU(modules);
/* Block module loading/unloading? */
int modules_disabled = 0;
@@ -360,10 +360,12 @@ struct module *find_module(const char *name)
{
struct module *mod;
- list_for_each_entry(mod, &modules, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(mod, &modules, list) {
if (strcmp(mod->name, name) == 0)
return mod;
}
+ rcu_read_unlock();
return NULL;
}
EXPORT_SYMBOL_GPL(find_module);
@@ -544,7 +546,8 @@ static void module_unload_free(struct module *mod)
{
struct module *i;
- list_for_each_entry(i, &modules, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(i, &modules, list) {
struct module_use *use;
list_for_each_entry(use, &i->modules_which_use_me, list) {
@@ -559,6 +562,7 @@ static void module_unload_free(struct module *mod)
}
}
}
+ rcu_read_unlock();
}
#ifdef CONFIG_MODULE_FORCE_UNLOAD
@@ -1368,7 +1372,7 @@ static void mod_kobject_remove(struct module *mod)
static int __unlink_module(void *_mod)
{
struct module *mod = _mod;
- list_del(&mod->list);
+ list_del_rcu(&mod->list);
return 0;
}
@@ -2718,7 +2722,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
unsigned int i;
int ret;
- list_for_each_entry(mod, &modules, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(mod, &modules, list) {
for (i = 0; i < mod->num_symtab; i++) {
ret = fn(data, mod->strtab + mod->symtab[i].st_name,
mod, mod->symtab[i].st_value);
@@ -2726,6 +2731,7 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
return ret;
}
}
+ rcu_read_unlock();
return 0;
}
#endif /* CONFIG_KALLSYMS */
@@ -2768,12 +2774,12 @@ static char *module_flags(struct module *mod, char *buf)
static void *m_start(struct seq_file *m, loff_t *pos)
{
mutex_lock(&module_mutex);
- return seq_list_start(&modules, *pos);
+ return seq_list_start((struct list_head *)&modules, *pos);
}
static void *m_next(struct seq_file *m, void *p, loff_t *pos)
{
- return seq_list_next(p, &modules, pos);
+ return seq_list_next(p, (struct list_head *)&modules, pos);
}
static void m_stop(struct seq_file *m, void *p)
Signed-off-by: Arnd Bergmann <ar...@arndb.de>
---
include/linux/init_task.h | 6 +++---
include/linux/pid.h | 9 +++++----
kernel/pid.c | 8 ++++----
3 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index abec69b..c9cf902 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -53,9 +53,9 @@ extern struct group_info init_groups;
#define INIT_STRUCT_PID { \
.count = ATOMIC_INIT(1), \
.tasks = { \
- { .first = &init_task.pids[PIDTYPE_PID].node }, \
- { .first = &init_task.pids[PIDTYPE_PGID].node }, \
- { .first = &init_task.pids[PIDTYPE_SID].node }, \
+ { RCU_INIT_POINTER(.first, &init_task.pids[PIDTYPE_PID].node) }, \
+ { RCU_INIT_POINTER(.first, &init_task.pids[PIDTYPE_PGID].node) }, \
+ { RCU_INIT_POINTER(.first, &init_task.pids[PIDTYPE_SID].node) }, \
}, \
.rcu = RCU_HEAD_INIT, \
.level = 0, \
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 49f1c2f..8eb2aa1 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -2,6 +2,7 @@
#define _LINUX_PID_H
#include <linux/rcupdate.h>
+#include <linux/rculist.h>
enum pid_type
{
@@ -51,7 +52,7 @@ struct upid {
/* Try to keep pid_chain in the same cacheline as nr for find_vpid */
int nr;
struct pid_namespace *ns;
- struct hlist_node pid_chain;
+ struct rcu_hlist_node pid_chain;
};
struct pid
@@ -59,7 +60,7 @@ struct pid
atomic_t count;
unsigned int level;
/* lists of tasks that use this pid */
- struct hlist_head tasks[PIDTYPE_MAX];
+ struct rcu_hlist_head tasks[PIDTYPE_MAX];
struct rcu_head rcu;
struct upid numbers[1];
};
@@ -68,7 +69,7 @@ extern struct pid init_struct_pid;
struct pid_link
{
- struct hlist_node node;
+ struct rcu_hlist_node node;
struct pid *pid;
};
@@ -164,7 +165,7 @@ pid_t pid_vnr(struct pid *pid);
#define do_each_pid_task(pid, type, task) \
do { \
- struct hlist_node *pos___; \
+ struct rcu_hlist_node *pos___; \
if ((pid) != NULL) \
hlist_for_each_entry_rcu((task), pos___, \
&(pid)->tasks[type], pids[type].node) {
diff --git a/kernel/pid.c b/kernel/pid.c
index 2e17c9c..871d75e 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -39,7 +39,7 @@
#define pid_hashfn(nr, ns) \
hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
-static struct hlist_head *pid_hash;
+static struct rcu_hlist_head *pid_hash;
static unsigned int pidhash_shift = 4;
struct pid init_struct_pid = INIT_STRUCT_PID;
@@ -290,7 +290,7 @@ out_free:
struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
{
- struct hlist_node *elem;
+ struct rcu_hlist_node *elem;
struct upid *pnr;
hlist_for_each_entry_rcu(pnr, elem,
@@ -336,7 +336,7 @@ static void __change_pid(struct task_struct *task, enum pid_type type,
link->pid = new;
for (tmp = PIDTYPE_MAX; --tmp >= 0; )
- if (!hlist_empty(&pid->tasks[tmp]))
+ if (!hlist_empty_rcu(&pid->tasks[tmp]))
return;
free_pid(pid);
@@ -366,7 +366,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
{
struct task_struct *result = NULL;
if (pid) {
- struct hlist_node *first;
+ struct rcu_hlist_node *first;
first = rcu_dereference(pid->tasks[type].first);
if (first)
result = hlist_entry(first, struct task_struct, pids[(type)].node);
Hrm, wait.. dumb question: how can an annotation break compilation ? If there is
any way out of this, I would prefer if we can do without a #ifdef __CHECKER__ if
possible. It calls for bugs and implementation mismatch.
Thanks,
Mathieu
--
Mathieu Desnoyers
Operating System Efficiency Consultant
EfficiOS Inc.
http://www.efficios.com
Hrm, I'm not sure about this one. It would be better to something closer to
list.h LIST_HEAD_INIT / LIST_HEAD / INIT_LIST_HEAD. The first two are for
static declaration/init, while the last one is for runtime init. I fear that
your RCU_INIT_POINTER might be semantically confusing between static and dynamic
initialization usual semantic.
Thanks,
Mathieu
> +
> /* Infrastructure to implement the synchronize_() primitives. */
>
> struct rcu_synchronize {
> --
> 1.6.3.3
>
--
Mathieu Desnoyers
Operating System Efficiency Consultant
EfficiOS Inc.
http://www.efficios.com
modules list is under module_mutex, nothing should be done here.
The problem is that you can not (currently) declare a list_head
to be RCU protected by writing 'struct list_head __rcu list;',
because address space annotations only work on pointers.
The solution I used was to define a new struct rcu_list_head that
has the ->next member annotated as __rcu. Unfortunately, this
means that it is now incompatible to a struct list_head from
the compiler's point of view.
We could get rid of the #ifdef by unconditionally defining
the separate rcu_list_head, as long as all users are correctly
converted to it. This does have problems in cases such as the
module code that uses the same list with and without RCU, as
Alexey mentioned.
The alternative would be not to annotate list RCU at all and
__force away the warnings for these so we can still keep the
normal __rcu annotations.
Arnd
I only had to use it in one place, INIT_STRUCT_PID (see patch 08/10), and
the approach of LIST_HEAD does not seem to work there. I'd certainly
prefer to write it in a way that cannot be used for dynamic initialization,
but could not think of one.
Arnd
Ok, this is a significant limitation of the list rcu annotation
then, it's not possible to pass the same list into list_for_each_entry
and list_for_each_entry_rcu with the way I changed the rcu list
definition. I would be possible to do a __list_for_each_entry_rcu
macro that takes an rcu_list_head but does not actually use
rcu_dereference, but I'm not sure if that's good enough.
Arnd
Hmmm... If the __rcu annotation was visible at runtime, it would be
easy provide an annotated version of list_for_each_entry_rcu() that
checks for module_mutex being held under lockdep.
Thanx, Paul
Well, if we keep the struct rcu_list_head and make it mandatory for
rcu protected lists, it could be defined as
struct rcu_list_head {
struct list_head head;
#ifdef CONFIG_PROVE_RCU
bool (*check)(void);
#endif
};
#ifdef CONFIG_PROVE_RCU
#define RCU_LIST_HEAD_INIT_CHECK(__head, __check) \
{ .head = LIST_HEAD_INIT((__head).head), .check = (__check) }
#else
#define RCU_LIST_HEAD_INIT_CHECK(__list,__check) {.head = LIST_HEAD_INIT((__head).head) }
#endif
#define RCU_LIST_HEAD_INIT(head) RCU_LIST_HEAD_INIT_CHECK(head,&rcu_read_lock_held)
#define RCU_LIST_HEAD_INIT_BH(head) RCU_LIST_HEAD_INIT_CHECK(head,&rcu_read_lock_bh_held)
#define list_entry_rcu_check(ptr, type, member, check) \
container_of(rcu_dereference_check((void __rcu __force *)(ptr), check), type, member)
#define list_for_each_entry_rcu(pos, __head, member) \
for (pos = list_entry_rcu_check((__head)->head.next, typeof(*pos), \
member, (__head)->check); \
prefetch(pos->member.next), &pos->member != (head); \
pos = list_entry_rcu_check(pos->member.next, typeof(*pos), \
member, (__head)->check)))
That would let us check all the heads for correct usage, and at the same
time avoid having to annotate all the list entries.
Arnd
Cool!!!
The nice thing about this is that we don't end up with the API explosion
for the RCU list primitives. However, it does require that a given
rcu_list_head have a single synchronization-design rule for all uses.
Of course, if there were multiple rules, one could construct a check
that was simply the union of all the rules, but that would miss some
types of errors.
Of course, if this became a problem, there could be an argument to the
->check function that the normal list_for_each_entry_rcu() defaults to
"no effect".
Or is there a better way to handle this?
Thanx, Paul
One approach would be to use your original sparse-based approach, but
use an rcu_deference_const(ptr,lockdep_condition) for cases when the
value cannot change, for example, when the update-side lock is held.
This should eliminate most of the false positives, in particular,
eliminate the need for otherwise-useless rcu_read_lock()s -- and also
for the compiler constraints in the normal rcu_dereference().
Your pointer-to-function idea could be a really cool way to handle the
tree algorithms that can be protected either by RCU or by locking.
The tree nodes could have the pointer to check function, and the
current rcu_dereference_raw() calls could be replaced by an invocation
of rcu_dereference_check() that calls the check function. A check
function for an RCU-protected tree would use "rcu_read_lock_held() ||
lockdep_is_held(&update_side_lock)", while a lock-protected tree would
just use "lockdep_is_held(&update_side_lock)".
Thoughts?
What would it miss? E.g. having the module code check for
(mutex_is_locked(&module_lock) || rcu_read_lock_held) should
cover all cases as far as I can tell.
> > Of course, if this became a problem, there could be an argument to the
> > ->check function that the normal list_for_each_entry_rcu() defaults to
> > "no effect".
I've also been thinking about adding a list_for_each_entry_norcu()
macro that takes an rcu_list_head but then just performs a simple
list_for_each_entry().
> > Or is there a better way to handle this?
>
> One approach would be to use your original sparse-based approach, but
> use an rcu_deference_const(ptr,lockdep_condition) for cases when the
> value cannot change, for example, when the update-side lock is held.
> This should eliminate most of the false positives, in particular,
> eliminate the need for otherwise-useless rcu_read_lock()s -- and also
> for the compiler constraints in the normal rcu_dereference().
Right.
> Your pointer-to-function idea could be a really cool way to handle the
> tree algorithms that can be protected either by RCU or by locking.
> The tree nodes could have the pointer to check function, and the
> current rcu_dereference_raw() calls could be replaced by an invocation
> of rcu_dereference_check() that calls the check function. A check
> function for an RCU-protected tree would use "rcu_read_lock_held() ||
> lockdep_is_held(&update_side_lock)", while a lock-protected tree would
> just use "lockdep_is_held(&update_side_lock)".
I've postponed that problem for now, and updated my series to split
the rculist annotations from the basic __rcu pointer annotations,
as well as to apply on top of your patches in tip/core/rcu,
see http://git.kernel.org/?p=linux/kernel/git/arnd/playground.git;\
a=shortlog;h=refs/heads/rcu-annotate-tip.
Should we merge the simple annotations in this merge window and
then think about rculist and trees separately?
Arnd
My concern is single data structures used in different parts of the code
with different update-side locks, perhaps also different flavors of RCU.
Some of the tree data structures in the Linux kernel can be protected by
either locking or RCU, for example.
> > > Of course, if this became a problem, there could be an argument to the
> > > ->check function that the normal list_for_each_entry_rcu() defaults to
> > > "no effect".
>
> I've also been thinking about adding a list_for_each_entry_norcu()
> macro that takes an rcu_list_head but then just performs a simple
> list_for_each_entry().
We might need to do something like this, but if we do, we need to
minimize the need to use it.
> > > Or is there a better way to handle this?
> >
> > One approach would be to use your original sparse-based approach, but
> > use an rcu_deference_const(ptr,lockdep_condition) for cases when the
> > value cannot change, for example, when the update-side lock is held.
> > This should eliminate most of the false positives, in particular,
> > eliminate the need for otherwise-useless rcu_read_lock()s -- and also
> > for the compiler constraints in the normal rcu_dereference().
>
> Right.
>
> > Your pointer-to-function idea could be a really cool way to handle the
> > tree algorithms that can be protected either by RCU or by locking.
> > The tree nodes could have the pointer to check function, and the
> > current rcu_dereference_raw() calls could be replaced by an invocation
> > of rcu_dereference_check() that calls the check function. A check
> > function for an RCU-protected tree would use "rcu_read_lock_held() ||
> > lockdep_is_held(&update_side_lock)", while a lock-protected tree would
> > just use "lockdep_is_held(&update_side_lock)".
>
> I've postponed that problem for now, and updated my series to split
> the rculist annotations from the basic __rcu pointer annotations,
> as well as to apply on top of your patches in tip/core/rcu,
> see http://git.kernel.org/?p=linux/kernel/git/arnd/playground.git;\
> a=shortlog;h=refs/heads/rcu-annotate-tip.
>
> Should we merge the simple annotations in this merge window and
> then think about rculist and trees separately?
I haven't given up on the possibility of getting the whole thing into
this merge window, but if that is not possible, it would be good to
start on the annotations. Of course, the annotations would need to be
done so that they don't rain false positives on people who are not
actively looking to see them.
Thanx, Paul
[ . . . ]
> > I've postponed that problem for now, and updated my series to split
> > the rculist annotations from the basic __rcu pointer annotations,
> > as well as to apply on top of your patches in tip/core/rcu,
> > see http://git.kernel.org/?p=linux/kernel/git/arnd/playground.git;\
> > a=shortlog;h=refs/heads/rcu-annotate-tip.
At first glance, this looks reasonably sane. I have looked up through
the "scheduler: __rcu annotations" commit.
Some comments:
o The name rcu_dereference_const() makes more sense to me than
does __rcu_dereference(), as it documents when you can safely
use it -- when something is preventing the RCU-protected
pointer in question from changing.
o Uses of __rcu_dereference() in your playground.git that are
safe because some lock is held should be changed to
rcu_dereference_check(), mentioning that lock. Ditto zero
reference counts.
For example, in your first change to put_ctx() in
kernel/perf_event.c, the:
put_ctx(__rcu_dereference(ctx->parent_ctx));
should instead be:
put_ctx(rcu_dereference_check(ctx->parent_ctx,
ctx->refcount == 0));
This does take a bit more space, but very clearly documents
the synchronization design and enables the combination of
sparse and lockdep to enforce it. And yes, this example has
the "if" right above the use, but many other cases are not
so easy to see so quickly. And a future change might well
rearrange the code so that the "if" is a long ways away from
the dereference.
o Whatever we choose for the name of what is __rcu_dereference()
in your tree, uses should be commented, just as things like
smp_mb() are commented. For example:
q = __rcu_dereference(p->next); /* Initialization. */
to indicate that the structure is still being initialized so
that no other CPU or task has access to it.
Again, looks promising!
Thanx, Paul