This patch is also available via HTTP:
http://bogomips.org/libkqueue.git/patch/?id=5e42ef9fb3
This patch is against trunk, but should apply trivially
to the stable branch.
Thanks for libkqueue!
src/common/private.h | 2 +-
src/posix/kevent.c | 8 +++++---
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/src/common/private.h b/src/common/private.h
index 451b01b..ca51cac 100644
--- a/src/common/private.h
+++ b/src/common/private.h
@@ -143,7 +143,7 @@ struct filter {
struct kqueue {
int kq_id;
struct filter kq_filt[EVFILT_SYSCOUNT];
- fd_set kq_fds, kq_rfds;
+ fd_set kq_fds;
int kq_nfds;
tracing_mutex_t kq_mtx;
volatile uint32_t kq_ref;
diff --git a/src/posix/kevent.c b/src/posix/kevent.c
index 6ebb62e..9051587 100644
--- a/src/posix/kevent.c
+++ b/src/posix/kevent.c
@@ -21,6 +21,8 @@
const struct filter evfilt_proc = EVFILT_NOTIMPL;
+static fd_set __thread kq_rfds;
+
int
posix_kevent_wait(
struct kqueue *kq,
@@ -29,8 +31,8 @@ posix_kevent_wait(
int n;
dbg_puts("waiting for events");
- kq->kq_rfds = kq->kq_fds;
- n = pselect(kq->kq_nfds, &kq->kq_rfds, NULL , NULL, timeout, NULL);
+ kq_rfds = kq->kq_fds;
+ n = pselect(kq->kq_nfds, &kq_rfds, NULL , NULL, timeout, NULL);
if (n < 0) {
if (errno == EINTR) {
dbg_puts("signal caught");
@@ -55,7 +57,7 @@ posix_kevent_copyout(struct kqueue *kq, int nready,
// dbg_printf("eventlist: n = %d nevents = %d", nready, nevents);
filt = &kq->kq_filt[i];
// dbg_printf("pfd[%d] = %d", i, filt->kf_pfd);
- if (FD_ISSET(filt->kf_pfd, &kq->kq_rfds)) {
+ if (FD_ISSET(filt->kf_pfd, &kq_rfds)) {
dbg_printf("pending events for filter %d (%s)", filt->kf_id, filter_name(filt->kf_id));
rv = filt->kf_copyout(filt, eventlist, nevents);
if (rv < 0) {
On 04/21/2012 08:43 PM, Eric Wong wrote:
> The fd_set passed to pselect() may be clobbered if
> multiple threads are parked in pselect(). This can
> lead to lost events or false positives.
> ---
> This seems to solve issues I've been having with a
> multi-threaded application losing events/wakeups.
Thanks for finding this bug and sending a patch. I agree there is a race
condition in posix_kevent_wait() involving the kq_rfds (and the kq_fds) variables.
Unfortunately, we can't just use a global TSD variable for kq_rfds because a
program can call kqueue() multiple times to create multiple kqueue
descriptors. Each kqueue descriptor requires a separate kq_rfds variable,
which is why this is part of the kqueue struct.
There are a couple different ways this could be solved, so I will give it a
try and let you know when it's done.
Regards,
- Mark
Ah, good point! I actually made a different patch against stable. It
didn't apply cleanly against trunk, and I went with the TSD option out
of laziness since it applied to both branches :x
> There are a couple different ways this could be solved, so I will give it a
> try and let you know when it's done.
Anyways, here's my patch against stable:
http://bogomips.org/libkqueue.git/patch/?id=4aaf11fcfe2
(I haven't (and can't) test the changes to the Solaris code)
From 4aaf11fcfe24018f99c58e8d709b040492078ad7 Mon Sep 17 00:00:00 2001
From: Eric Wong <e...@yhbt.net>
Date: Sat, 21 Apr 2012 01:26:33 +0000
Subject: [PATCH] call pselect() with an private, on-stack fd_set
The fd_set passed to pselect() may be clobbered if
multiple threads are parked in pselect().
---
src/common/kevent.c | 5 +++--
src/common/private.h | 6 +++---
src/posix/kevent.c | 10 +++++-----
src/solaris/kevent.c | 4 ++--
4 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/src/common/kevent.c b/src/common/kevent.c
index 883df23..9366be6 100644
--- a/src/common/kevent.c
+++ b/src/common/kevent.c
@@ -238,6 +238,7 @@ kevent(int kqfd, const struct kevent *changelist, int nchanges,
const struct timespec *timeout)
{
struct kqueue *kq;
+ fd_set kq_rfds;
int rv, n, nret;
nret = 0;
@@ -282,7 +283,7 @@ kevent(int kqfd, const struct kevent *changelist, int nchanges,
for (nret = 0; nret == 0;)
{
/* Wait for one or more events. */
- n = kevent_wait(kq, timeout);
+ n = kevent_wait(kq, &kq_rfds, timeout);
if (n < 0) {
dbg_puts("kevent_wait failed");
goto errout;
@@ -292,7 +293,7 @@ kevent(int kqfd, const struct kevent *changelist, int nchanges,
/* Copy the events to the caller */
kqueue_lock(kq);
- nret = kevent_copyout(kq, n, eventlist, nevents);
+ nret = kevent_copyout(kq, &kq_rfds, n, eventlist, nevents);
kqueue_unlock(kq);
}
diff --git a/src/common/private.h b/src/common/private.h
index f0f06a2..bd00311 100644
--- a/src/common/private.h
+++ b/src/common/private.h
@@ -153,7 +153,7 @@ struct filter {
struct kqueue {
int kq_sockfd[2];
struct filter kq_filt[EVFILT_SYSCOUNT];
- fd_set kq_fds, kq_rfds;
+ fd_set kq_fds;
int kq_nfds;
pthread_mutex_t kq_mtx;
#ifdef __sun__
@@ -192,8 +192,8 @@ const char *filter_name(short);
int filter_lower(struct filter *);
int filter_raise(struct filter *);
-int kevent_wait(struct kqueue *, const struct timespec *);
-int kevent_copyout(struct kqueue *, int, struct kevent *, int);
+int kevent_wait(struct kqueue *, fd_set *, const struct timespec *);
+int kevent_copyout(struct kqueue *, fd_set *, int, struct kevent *, int);
void kevent_free(struct kqueue *);
const char *kevent_dump(const struct kevent *);
diff --git a/src/posix/kevent.c b/src/posix/kevent.c
index 8a657c7..ec24b56 100644
--- a/src/posix/kevent.c
+++ b/src/posix/kevent.c
@@ -22,13 +22,13 @@
const struct filter evfilt_proc = EVFILT_NOTIMPL;
int
-kevent_wait(struct kqueue *kq, const struct timespec *timeout)
+kevent_wait(struct kqueue *kq, fd_set *kq_rfds, const struct timespec *timeout)
{
int n;
dbg_puts("waiting for events");
- kq->kq_rfds = kq->kq_fds;
- n = pselect(kq->kq_nfds, &kq->kq_rfds, NULL , NULL, timeout, NULL);
+ *kq_rfds = kq->kq_fds;
+ n = pselect(kq->kq_nfds, kq_rfds, NULL , NULL, timeout, NULL);
if (n < 0) {
if (errno == EINTR) {
dbg_puts("signal caught");
@@ -42,7 +42,7 @@ kevent_wait(struct kqueue *kq, const struct timespec *timeout)
}
int
-kevent_copyout(struct kqueue *kq, int nready,
+kevent_copyout(struct kqueue *kq, fd_set *kq_rfds, int nready,
struct kevent *eventlist, int nevents)
{
struct filter *filt;
@@ -53,7 +53,7 @@ kevent_copyout(struct kqueue *kq, int nready,
// dbg_printf("eventlist: n = %d nevents = %d", nready, nevents);
filt = &kq->kq_filt[i];
// dbg_printf("pfd[%d] = %d", i, filt->kf_pfd);
- if (FD_ISSET(filt->kf_pfd, &kq->kq_rfds)) {
+ if (FD_ISSET(filt->kf_pfd, kq_rfds)) {
dbg_printf("pending events for filter %d (%s)", filt->kf_id, filter_name(filt->kf_id));
rv = filt->kf_copyout(filt, eventlist, nevents);
if (rv < 0) {
diff --git a/src/solaris/kevent.c b/src/solaris/kevent.c
index 811bfa2..e7fe87d 100644
--- a/src/solaris/kevent.c
+++ b/src/solaris/kevent.c
@@ -81,7 +81,7 @@ port_event_dump(port_event_t *evt)
}
int
-kevent_wait(struct kqueue *kq, const struct timespec *timeout)
+kevent_wait(struct kqueue *kq, fd_set *kq_rfds, const struct timespec *timeout)
{
port_event_t *pe = (port_event_t *) pthread_getspecific(kq->kq_port_event);
@@ -110,7 +110,7 @@ kevent_wait(struct kqueue *kq, const struct timespec *timeout)
}
int
-kevent_copyout(struct kqueue *kq, int nready,
+kevent_copyout(struct kqueue *kq, fd_set *kq_rfds, int nready,
struct kevent *eventlist, int nevents)
{
port_event_t *pe = (port_event_t *) pthread_getspecific(kq->kq_port_event);
--
1.7.9.1.290.gbd444
That was one of the ways I was thinking of handling it :)
However, I decided to go with the more conservative approach of holding the
kqueue lock for the duration of the kevent() function call, and dropping it
only when there is a possibility of going to sleep for a long time. That
should protect us against any other potential data races involving the kqueue
struct.
Can you test against r541 of the stable branch and let me know if that fixes
the problem you saw in your application?
Thanks,
- Mark
Good catch.. this should be fixed in r542.
- Mark