[PATCH 1/2] AIO: Fix reference counting in io_cancel and aio_cancel

Sergey Temerkhanov

unread,

Mar 16, 2010, 7:30:02 AM3/16/10

to

io_cancel() and aio_cancel_all() have inconsistent reference counting
(ki_users field of struct kiocb) which leads to unkillable processes upon
io_cancel() or io_destroy() syscalls. This patch fixes the undesired behavior.

Regards, Sergey Temerkhanov, Cifronic ZAO

aio-fix.patch

Sergey Temerkhanov

unread,

Mar 16, 2010, 7:30:01 AM3/16/10

to

Some functions in fs/aio.c use pr_debug and another use dprintk statements.
This patch makes it uniform.

aio-debug.patch

Jeff Moyer

unread,

Mar 16, 2010, 10:50:03 AM3/16/10

to

Sergey Temerkhanov <temer...@cifronik.ru> writes:

> io_cancel() and aio_cancel_all() have inconsistent reference counting
> (ki_users field of struct kiocb) which leads to unkillable processes upon
> io_cancel() or io_destroy() syscalls. This patch fixes the undesired behavior.

Hi, Sergey,

Thanks for the patch. Would you mind resubmitting it to make it a bit
easier to review? The way you've done things, it is difficult to tell
if you just moved the aio_cancel_all function or if you moved it and
made changes. Please have the patch that moves it separated out from
other changes, or at least mention in the changelog that the function
was unchanged. Next, if you could tell what sorts of testing you've
performed, that would be great. Finally, it would be a good idea to CC
linu...@kvack.org on aio patches.

Thanks!
Jeff

>
> Regards, Sergey Temerkhanov, Cifronic ZAO

> diff -r b290b47c0880 fs/aio.c
> --- a/fs/aio.c Tue Mar 16 13:51:13 2010 +0300
> +++ b/fs/aio.c Tue Mar 16 14:11:36 2010 +0300
> @@ -63,6 +63,8 @@
> static void aio_kick_handler(struct work_struct *);
> static void aio_queue_work(struct kioctx *);
>
> +static void aio_cancel_all(struct kioctx *ctx);
> +
> /* aio_setup
> * Creates the slab caches used by the aio routines, panic on
> * failure as this is done early during the boot sequence.
> @@ -315,33 +317,6 @@
> return ctx;
> }
>
> -/* aio_cancel_all
> - * Cancels all outstanding aio requests on an aio context. Used
> - * when the processes owning a context have all exited to encourage
> - * the rapid destruction of the kioctx.
> - */
> -static void aio_cancel_all(struct kioctx *ctx)
> -{
> - int (*cancel)(struct kiocb *, struct io_event *);
> - struct io_event res;
> - spin_lock_irq(&ctx->ctx_lock);
> - ctx->dead = 1;
> - while (!list_empty(&ctx->active_reqs)) {
> - struct list_head *pos = ctx->active_reqs.next;
> - struct kiocb *iocb = list_kiocb(pos);
> - list_del_init(&iocb->ki_list);
> - cancel = iocb->ki_cancel;
> - kiocbSetCancelled(iocb);
> - if (cancel) {
> - iocb->ki_users++;
> - spin_unlock_irq(&ctx->ctx_lock);
> - cancel(iocb, &res);
> - spin_lock_irq(&ctx->ctx_lock);
> - }
> - }
> - spin_unlock_irq(&ctx->ctx_lock);
> -}
> -
> static void wait_for_all_aios(struct kioctx *ctx)
> {
> struct task_struct *tsk = current;
> @@ -1038,6 +1013,36 @@
> return ret;
> }
>
> +/* aio_cancel_all
> + * Cancels all outstanding aio requests on an aio context. Used
> + * when the processes owning a context have all exited to encourage
> + * the rapid destruction of the kioctx.
> + */
> +static void aio_cancel_all(struct kioctx *ctx)
> +{
> + int (*cancel)(struct kiocb *, struct io_event *);
> + struct io_event res;
> + spin_lock_irq(&ctx->ctx_lock);
> + ctx->dead = 1;
> + while (!list_empty(&ctx->active_reqs)) {
> + struct list_head *pos = ctx->active_reqs.next;
> + struct kiocb *iocb = list_kiocb(pos);
> + list_del_init(&iocb->ki_list);
> + cancel = iocb->ki_cancel;
> + kiocbSetCancelled(iocb);
> + if (cancel) {
> + iocb->ki_users++;
> + spin_unlock_irq(&ctx->ctx_lock);
> + cancel(iocb, &res);
> + spin_lock_irq(&ctx->ctx_lock);
> + dprintk("aio_cancel_all: iocb->ki_users: %d\n", iocb->ki_users);
> + __aio_put_req(ctx, iocb);
> + }
> + __aio_put_req(ctx, iocb);
> + }
> + spin_unlock_irq(&ctx->ctx_lock);
> +}
> +
> /* aio_read_evt
> * Pull an event off of the ioctx's event ring. Returns the number of
> * events fetched (0 or 1 ;-)
> @@ -1787,9 +1792,11 @@
> if (copy_to_user(result, &tmp, sizeof(tmp)))
> ret = -EFAULT;
> }
> + aio_put_req(kiocb);
> } else
> ret = -EINVAL;
>
> + aio_put_req(kiocb);
> put_ioctx(ctx);
>
> return ret;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Jeff Moyer

unread,

Mar 16, 2010, 11:00:02 AM3/16/10

to

Sergey Temerkhanov <temer...@cifronik.ru> writes:

> Some functions in fs/aio.c use pr_debug and another use dprintk statements.
> This patch makes it uniform.

This is a good idea, but I'd rather use pr_debug everywhere, since
(depending on the kernel config) those can be enabled at run-time.

Cheers,
Jeff

> Regards, Sergey Temerkhanov, Cifronic ZAO

> diff -r 5bbe95a762e1 fs/aio.c
> --- a/fs/aio.c Tue Mar 16 14:11:37 2010 +0300
> +++ b/fs/aio.c Tue Mar 16 14:11:44 2010 +0300
> @@ -76,7 +76,7 @@
>
> aio_wq = create_workqueue("aio");
>
> - pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
> + dprintk("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
>
> return 0;
> }
> @@ -221,7 +221,7 @@
> aio_free_ring(ctx);
> mmdrop(ctx->mm);
> ctx->mm = NULL;
> - pr_debug("__put_ioctx: freeing %p\n", ctx);
> + dprintk("__put_ioctx: freeing %p\n", ctx);
> call_rcu(&ctx->rcu_head, ctx_rcu_free);
> }
>
> @@ -247,7 +247,7 @@
> /* Prevent overflows */
> if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
> (nr_events > (0x10000000U / sizeof(struct kiocb)))) {
> - pr_debug("ENOMEM: nr_events too high\n");
> + dprintk("ENOMEM: nr_events too high\n");
> return ERR_PTR(-EINVAL);
> }
>
> @@ -468,6 +468,7 @@
> kfree(req->ki_iovec);
> kmem_cache_free(kiocb_cachep, req);
> ctx->reqs_active--;
> + dprintk("really_put_req: req->ki_users: %d\n", req->ki_users);
>
> if (unlikely(!ctx->reqs_active && ctx->dead))
> wake_up(&ctx->wait);
> @@ -503,12 +504,14 @@
> */
> static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
> {
> - dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
> + dprintk("aio_put(%p): f_count=%ld\n",
> req, atomic_long_read(&req->ki_filp->f_count));
>
> assert_spin_locked(&ctx->ctx_lock);
>
> req->ki_users--;
> + dprintk("__aio_put_req: req: %p, req->ki_users: %d\n",
> + req, req->ki_users);
> BUG_ON(req->ki_users < 0);
> if (likely(req->ki_users))
> return 0;
> @@ -954,6 +957,7 @@
> * cancelled requests don't get events, userland was given one
> * when the event got cancelled.
> */
> + dprintk("kiocbIsCancelled(iocb): %d\n", kiocbIsCancelled(iocb));
> if (kiocbIsCancelled(iocb))
> goto put_rq;
>
> @@ -984,7 +988,7 @@
> put_aio_ring_event(event, KM_IRQ0);
> kunmap_atomic(ring, KM_IRQ1);
>
> - pr_debug("added to ring %p at [%lu]\n", iocb, tail);
> + dprintk("added to ring %p at [%lu]\n", iocb, tail);
>
> /*
> * Check if the user asked us to deliver the result through an
> @@ -1187,6 +1191,7 @@
> }
>
> while (likely(i < nr)) {
> + dprintk("aio: i: %d, nr: %ld\n", i, nr);
> add_wait_queue_exclusive(&ctx->wait, &wait);
> do {
> set_task_state(tsk, TASK_INTERRUPTIBLE);
> @@ -1199,6 +1204,7 @@
> ret = -EINVAL;
> break;
> }
> + dprintk("aio: to.timed_out: %d\n", to.timed_out);
> if (to.timed_out) /* Only check after read evt */
> break;
> /* Try to only show up in io wait if there are ops
> @@ -1294,7 +1300,7 @@
>
> ret = -EINVAL;
> if (unlikely(ctx || nr_events == 0)) {
> - pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
> + dprintk("EINVAL: io_setup: ctx %lu nr_events %u\n",
> ctx, nr_events);
> goto out;
> }
> @@ -1327,7 +1333,7 @@
> io_destroy(ioctx);
> return 0;
> }
> - pr_debug("EINVAL: io_destroy: invalid context id\n");
> + dprintk("EINVAL: io_destroy: invalid context id\n");
> return -EINVAL;
> }
>
> @@ -1586,7 +1592,7 @@
>
> /* enforce forwards compatibility on users */
> if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) {
> - pr_debug("EINVAL: io_submit: reserve field set\n");
> + dprintk("EINVAL: io_submit: reserve field set\n");
> return -EINVAL;
> }
>
> @@ -1596,7 +1602,7 @@
> (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
> ((ssize_t)iocb->aio_nbytes < 0)
> )) {
> - pr_debug("EINVAL: io_submit: overflow check\n");
> + dprintk("EINVAL: io_submit: overflow check\n");
> return -EINVAL;
> }
>
> @@ -1690,7 +1696,7 @@
>
> ctx = lookup_ioctx(ctx_id);
> if (unlikely(!ctx)) {
> - pr_debug("EINVAL: io_submit: invalid context id\n");
> + dprintk("EINVAL: io_submit: invalid context id\n");
> return -EINVAL;
> }
>
> @@ -1780,7 +1786,7 @@
>
> if (NULL != cancel) {
> struct io_event tmp;
> - pr_debug("calling cancel\n");
> + dprintk("calling cancel\n");
> memset(&tmp, 0, sizeof(tmp));
> tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user;
> tmp.data = kiocb->ki_user_data;

Сергей Темерханов

unread,

Mar 16, 2010, 12:40:02 PM3/16/10

to

On Tuesday 16 March 2010 17:46:10 Jeff Moyer wrote:
> Sergey Temerkhanov <temer...@cifronik.ru> writes:
> > io_cancel() and aio_cancel_all() have inconsistent reference counting
> > (ki_users field of struct kiocb) which leads to unkillable processes upon
> > io_cancel() or io_destroy() syscalls. This patch fixes the undesired
> > behavior.
>
> Hi, Sergey,
>
> Thanks for the patch. Would you mind resubmitting it to make it a bit
> easier to review? The way you've done things, it is difficult to tell
> if you just moved the aio_cancel_all function or if you moved it and
> made changes. Please have the patch that moves it separated out from
> other changes, or at least mention in the changelog that the function
> was unchanged. Next, if you could tell what sorts of testing you've
> performed, that would be great. Finally, it would be a good idea to CC
> linu...@kvack.org on aio patches.
>
> Thanks!
> Jeff
>

This patch fixes reference counting in io_cancel() and aio_cancel_all()

aio-fix.patch

Сергей Темерханов

unread,

Mar 16, 2010, 12:40:02 PM3/16/10

to

On Tuesday 16 March 2010 17:46:10 Jeff Moyer wrote:

> Sergey Temerkhanov <temer...@cifronik.ru> writes:
> > io_cancel() and aio_cancel_all() have inconsistent reference counting
> > (ki_users field of struct kiocb) which leads to unkillable processes upon
> > io_cancel() or io_destroy() syscalls. This patch fixes the undesired
> > behavior.
>
> Hi, Sergey,
>
> Thanks for the patch. Would you mind resubmitting it to make it a bit
> easier to review? The way you've done things, it is difficult to tell
> if you just moved the aio_cancel_all function or if you moved it and
> made changes. Please have the patch that moves it separated out from
> other changes, or at least mention in the changelog that the function
> was unchanged. Next, if you could tell what sorts of testing you've
> performed, that would be great. Finally, it would be a good idea to CC
> linu...@kvack.org on aio patches.
>
> Thanks!
> Jeff

Well, I've broken the 1st patch into 2. This one just moves aio_cancel_all()

aio-move-cancel-all.patch

Сергей Темерханов

unread,

Mar 16, 2010, 12:40:02 PM3/16/10

to

On Tuesday 16 March 2010 17:46:10 Jeff Moyer wrote:

> Sergey Temerkhanov <temer...@cifronik.ru> writes:
> > io_cancel() and aio_cancel_all() have inconsistent reference counting
> > (ki_users field of struct kiocb) which leads to unkillable processes upon
> > io_cancel() or io_destroy() syscalls. This patch fixes the undesired
> > behavior.
>
> Hi, Sergey,
>
> Thanks for the patch. Would you mind resubmitting it to make it a bit
> easier to review? The way you've done things, it is difficult to tell
> if you just moved the aio_cancel_all function or if you moved it and
> made changes. Please have the patch that moves it separated out from
> other changes, or at least mention in the changelog that the function
> was unchanged. Next, if you could tell what sorts of testing you've
> performed, that would be great. Finally, it would be a good idea to CC
> linu...@kvack.org on aio patches.
>
> Thanks!
> Jeff

And the last one changes dprintk statements to pr_debug

aio-debug.patch

[PATCH 1/2] AIO: Fix reference counting in io_cancel and aio_cancel_all

Sergey Temerkhanov

Sergey Temerkhanov

Jeff Moyer

Jeff Moyer

Сергей Темерханов

Сергей Темерханов

Сергей Темерханов