André Goddard Rosa (6):
mqueue: remove unneeded info->messages initialization
mqueue: apply mathematics distributivity on mq_bytes calculation
mqueue: simplify do_open() error handling
mqueue: only set error codes if they are really necessary
mqueue: fix typo "failues" -> "failures"
mqueue: fix mq_open() file descriptor leak on user-space processes
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Signed-off-by: André Goddard Rosa <andre....@gmail.com>
---
ipc/mqueue.c | 16 ++++++++++------
1 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 04403fd..2cddf93 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -657,24 +657,28 @@ out:
static struct file *do_open(struct ipc_namespace *ipc_ns,
struct dentry *dentry, int oflag)
{
+ int ret;
const struct cred *cred = current_cred();
static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
MAY_READ | MAY_WRITE };
if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
- dput(dentry);
- mntput(ipc_ns->mq_mnt);
- return ERR_PTR(-EINVAL);
+ ret = -EINVAL;
+ goto err;
}
if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) {
- dput(dentry);
- mntput(ipc_ns->mq_mnt);
- return ERR_PTR(-EACCES);
+ ret = -EACCES;
+ goto err;
}
return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
+
+err:
+ dput(dentry);
+ mntput(ipc_ns->mq_mnt);
+ return ERR_PTR(ret);
}
SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
--
1.7.0.87.g0901d
It reduces code size:
text data bss dec hex filename
9949 72 16 10037 2735 ipc/mqueue-BEFORE.o
9941 72 16 10029 272d ipc/mqueue-AFTER.o
Signed-off-by: André Goddard Rosa <andre....@gmail.com>
---
ipc/mqueue.c | 13 +++++--------
1 files changed, 5 insertions(+), 8 deletions(-)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index c79bd57..2d76647 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -134,7 +134,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
init_waitqueue_head(&info->wait_q);
INIT_LIST_HEAD(&info->e_wait_q[0].list);
INIT_LIST_HEAD(&info->e_wait_q[1].list);
- info->messages = NULL;
info->notify_owner = NULL;
info->qsize = 0;
info->user = NULL; /* set when all is ok */
@@ -146,6 +145,10 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
info->attr.mq_msgsize = attr->mq_msgsize;
}
mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
+ info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
+ if (!info->messages)
+ goto out_inode;
+
mq_bytes = (mq_msg_tblsz +
(info->attr.mq_maxmsg * info->attr.mq_msgsize));
@@ -154,18 +157,12 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
u->mq_bytes + mq_bytes >
p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) {
spin_unlock(&mq_lock);
+ kfree(info->messages);
goto out_inode;
}
u->mq_bytes += mq_bytes;
spin_unlock(&mq_lock);
- info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
- if (!info->messages) {
- spin_lock(&mq_lock);
- u->mq_bytes -= mq_bytes;
- spin_unlock(&mq_lock);
- goto out_inode;
- }
/* all is ok */
info->user = get_uid(u);
} else if (S_ISDIR(mode)) {
--
1.7.0.87.g0901d
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <mqueue.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/resource.h>
int main(int argc, char *argv[])
{
struct rlimit limit;
char queue_name[] = "/mq_open/bug";
char tmp_name[] = "/tmp/tmp";
int fd, i = 1;
if (getrlimit(RLIMIT_NOFILE, &limit) != 0) {
printf("%s\n", "Failed to get RLIMIT_NOFILE");
return EXIT_FAILURE;
}
printf("Max number of open files is: %d\n", limit.rlim_cur);
while (i <= limit.rlim_cur) {
mqd_t queue;
errno = 0;
queue = mq_open(queue_name, O_CREAT |O_RDWR, S_IRUSR | S_IWUSR
, NULL);
if (queue != (mqd_t)-1) {
/* Success opening mqueue, no leak will happen */
printf("Successfully opened an mqueue[%d]\n", queue);
printf("mq_close(%d) = %d\n", queue, mq_close(queue));
return EXIT_SUCCESS;
}
/* Failed to open mqueue, maybe a leak is happening... */
if (errno == EMFILE)
{
printf("\nRun out of file descriptors");
break;
}
printf("\rLeaking [%d] files?!?!", i++);
fflush(stdout);
usleep(500);
}
/* Double check that no file descriptor is available anymore indeed */
putchar('\n');
errno = 0;
fd = open(tmp_name, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
if (fd == -1) {
printf("open() failed: %s\n", strerror(errno));
if (errno == EMFILE) {
printf("%s\n", "Cannot open new files, fds exhausted");
return EXIT_FAILURE;
}
} else
printf("close(%d) = %d\n", fd, close(fd));
printf("%s\n", "Expected output: kernel is not leaking any fds!");
return EXIT_SUCCESS;
}
## Preparing for testing
$ touch /tmp/tmp
$ gcc -g main_mq_open_fd_leak.c -lrt
## Linux kernel with the fix applied:
$ ./a.out
Max number of open files is: 1024
Leaking [1024] files?!?!
close(3) = 0
Expected output: kernel is not leaking any fds!
## Linux kernel without the fix:
## Shell execution:
$ ./a.out
Max number of open files is: 1024
Leaking [1019] files?!?!
Run out of file descriptors
Segmentation fault
## Valgrind execution:
$ valgrind --track-fds=yes ./a.out
==2895== Memcheck, a memory error detector
==2895== Copyright (C) 2002-2009, and GNU GPL'd, by Julian Seward et al.
==2895== Using Valgrind-3.6.0.SVN and LibVEX; rerun with -h for copyright info
==2895== Command: ./a.out
==2895==
Max number of open files is: 1024
Leaking [1024] files?!?!
open() failed: Too many open files
Cannot open new files, fds exhausted
==2895==
==2895== FILE DESCRIPTORS: 5 open at exit.
==2895== Open file descriptor 13:
==2895== <inherited from parent>
==2895==
==2895== Open file descriptor 12:
==2895== <inherited from parent>
==2895==
==2895== Open file descriptor 2: /dev/pts/1
==2895== <inherited from parent>
==2895==
==2895== Open file descriptor 1: /dev/pts/1
==2895== <inherited from parent>
==2895==
==2895== Open file descriptor 0: /dev/pts/1
==2895== <inherited from parent>
==2895==
==2895==
==2895== HEAP SUMMARY:
==2895== in use at exit: 0 bytes in 0 blocks
==2895== total heap usage: 0 allocs, 0 frees, 0 bytes allocated
==2895==
==2895== All heap blocks were freed -- no leaks are possible
==2895==
==2895== For counts of detected and suppressed errors, rerun with: -v
==2895== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)
When not running valgrind, user-space program segfaults trying to execute
strerror(errno). With valgrind, it executes successfully and prints the
5 open files: stdin, stdout, stderr, pipe[0] and pipe[1].
Signed-off-by: André Goddard Rosa <andre....@gmail.com>
---
ipc/mqueue.c | 3 +--
1 files changed, 1 insertions(+), 2 deletions(-)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index e47c9eb..b6cb064 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -710,7 +710,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
if (IS_ERR(dentry)) {
error = PTR_ERR(dentry);
- goto out_err;
+ goto out_putfd;
}
mntget(ipc_ns->mq_mnt);
@@ -749,7 +749,6 @@ out:
mntput(ipc_ns->mq_mnt);
out_putfd:
put_unused_fd(fd);
-out_err:
fd = error;
out_upsem:
mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
--
1.7.0.87.g0901d
Signed-off-by: André Goddard Rosa <andre....@gmail.com>
---
ipc/mqueue.c | 9 +++++----
1 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 2d76647..04403fd 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -261,8 +261,9 @@ static void mqueue_delete_inode(struct inode *inode)
clear_inode(inode);
- mq_bytes = (info->attr.mq_maxmsg * sizeof(struct msg_msg *) +
- (info->attr.mq_maxmsg * info->attr.mq_msgsize));
+ /* Total amount of bytes accounted for the mqueue */
+ mq_bytes = info->attr.mq_maxmsg * (sizeof(struct msg_msg *)
+ + info->attr.mq_msgsize);
user = info->user;
if (user) {
spin_lock(&mq_lock);
@@ -601,8 +602,8 @@ static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr)
/* check for overflow */
if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg)
return 0;
- if ((unsigned long)(attr->mq_maxmsg * attr->mq_msgsize) +
- (attr->mq_maxmsg * sizeof (struct msg_msg *)) <
+ if ((unsigned long)(attr->mq_maxmsg * (attr->mq_msgsize
+ + sizeof (struct msg_msg *))) <
(unsigned long)(attr->mq_maxmsg * attr->mq_msgsize))
return 0;
return 1;
--
1.7.0.87.g0901d
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 906e873..e47c9eb 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1297,7 +1297,7 @@ static int __init init_mqueue_fs(void)
if (mqueue_inode_cachep == NULL)
return -ENOMEM;
- /* ignore failues - they are not fatal */
+ /* ignore failures - they are not fatal */
mq_sysctl_table = mq_register_sysctl_table();
error = register_filesystem(&mqueue_fs_type);
--
1.7.0.87.g0901d
> Fix a file descriptor leak on user-space processes and perform a cleanup,
> reducing the code size:
> text data bss dec hex filename
> 9949 72 16 10037 2735 ipc/mqueue-BEFORE.o
> 9885 72 16 9973 26f5 ipc/mqueue-AFTER.o
>
> Andr__ Goddard Rosa (6):
> mqueue: remove unneeded info->messages initialization
> mqueue: apply mathematics distributivity on mq_bytes calculation
> mqueue: simplify do_open() error handling
> mqueue: only set error codes if they are really necessary
> mqueue: fix typo "failues" -> "failures"
> mqueue: fix mq_open() file descriptor leak on user-space processes
Fixing the leak is far more important than the other five patches, and
we'll want to backport the leak fix into earlier kernels. So the
bugfix patch should have been the first in the series!
So I've reordered the patches in that fashion and shall tag "mqueue:
fix mq_open() file descriptor leak on user-space processes" as needing
-stable backporting.
The patches apply and build OK with that reordering, but please do
double-check it, thanks.
<snip>
>
> When not running valgrind, user-space program segfaults trying to execute
> strerror(errno). With valgrind, it executes successfully and prints the
> 5 open files: stdin, stdout, stderr, pipe[0] and pipe[1].
>
> Signed-off-by: André Goddard Rosa <andre....@gmail.com>
> ---
The code has more than just this problem, could you please try
my patch below?
Thanks.
---------------------------->
Clean up the failure path of sys_mq_open().
Reorder the goto labels;
Rename 'upsem' to 'upunlock';
Remove some unused labels;
Fix some wrong goto path.
Signed-off-by: WANG Cong <xiyou.w...@gmail.com>
---
I think it's wrong to move dput after mntput
> Signed-off-by: WANG Cong <xiyou.w...@gmail.com>
>
> ---
>
Oh, this is to say mntget() should be called before lookup_one_len(),
the original code seems wrong again...
How about the one below?
---------
Signed-off-by: WANG Cong <xiyou.w...@gmail.com>
This is definitely wrong,
if (IS_ERR(filp)) {
error = PTR_ERR(filp);
- goto out_putfd;
+ goto out;
}
filp is assigned by do_open or do_create in mqueue.c, take a look at
the code, if do_open/do_create is failed, kernel is already dput &
mntput...
So I think original patch from André is enough....
> ---------
>
> Signed-off-by: WANG Cong <xiyou.w...@gmail.com>
>
If kernel failed to lookup the dentry in mqueue, the fd allocated by
get_unused_fd_flags will be leaked then.
I think this is a good catch ;-)
Acked-by: Xiaotian Feng <xtf...@gmail.com>
Clearly the original code is a piece of sh*t.
Inspired by André Goddard Rosa's patch.
a) do_create() and do_open() should not release the resources which
are not acquired within themselves, it's their caller's work;
b) Fix an fd leak;
c) The goto label 'out_upsem' doesn't make any sense, rename to
'out_unlock';
d) mntget() should be called before looking up dentry within
->mnt->mnt_root;
e) When dealing with failure case, we should release the resources
in a reversed order of acquiring, so reorder the goto labels;
f) Remove some unused labels due to reorder.
Also, this shrinks the binary by 60 bytes:
text data bss dec hex filename
7674 1684 8 9366 2496 ipc/mqueue.o.BEFORE
7614 1684 8 9306 245a ipc/mqueue.o.AFTER
Signed-off-by: WANG Cong <xiyou.w...@gmail.com>
Cc: André Goddard Rosa <andre....@gmail.com>
---
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index c79bd57..fdd09da 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -650,8 +650,6 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
out_drop_write:
mnt_drop_write(ipc_ns->mq_mnt);
out:
- dput(dentry);
- mntput(ipc_ns->mq_mnt);
return ERR_PTR(ret);
}
@@ -664,17 +662,11 @@ static struct file *do_open(struct ipc_namespace *ipc_ns,
static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
MAY_READ | MAY_WRITE };
- if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
- dput(dentry);
- mntput(ipc_ns->mq_mnt);
+ if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
return ERR_PTR(-EINVAL);
- }
- if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) {
- dput(dentry);
- mntput(ipc_ns->mq_mnt);
+ if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE]))
return ERR_PTR(-EACCES);
- }
return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
}
@@ -686,7 +678,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
struct file *filp;
char *name;
struct mq_attr attr;
- int fd, error;
+ int fd, error = 0;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
@@ -701,13 +693,13 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
if (fd < 0)
goto out_putname;
+ mntget(ipc_ns->mq_mnt);
mutex_lock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
if (IS_ERR(dentry)) {
error = PTR_ERR(dentry);
- goto out_err;
+ goto out_unlock;
}
- mntget(ipc_ns->mq_mnt);
if (oflag & O_CREAT) {
if (dentry->d_inode) { /* entry already exists */
@@ -731,24 +723,23 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
if (IS_ERR(filp)) {
error = PTR_ERR(filp);
- goto out_putfd;
+ goto out;
}
fd_install(fd, filp);
- goto out_upsem;
+ goto out_unlock;
out:
dput(dentry);
- mntput(ipc_ns->mq_mnt);
-out_putfd:
- put_unused_fd(fd);
-out_err:
- fd = error;
-out_upsem:
+out_unlock:
mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
+ if (error) {
+ mntput(ipc_ns->mq_mnt);
+ put_unused_fd(fd);
+ }
out_putname:
putname(name);
- return fd;
+ return error;
}
SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
On Thu, Feb 25, 2010 at 10:40 AM, Am�rico Wang <xiyou.w...@gmail.com> wrote:
>
> Fix bad code in ipc/mqueue.c.
>
> Inspired by Andr� Goddard Rosa's �patch.
>
> a) do_create() and do_open() should not release the resources which
> � are not acquired within themselves, it's their caller's work;
>
> b) Fix an fd leak;
>
> c) The goto label 'out_upsem' doesn't make any sense, rename to
> � 'out_unlock';
>
> d) mntget() should be called before looking up dentry within
> � ->mnt->mnt_root;
>
> e) When dealing with failure case, we should release the resources
> � in a reversed order of acquiring, so reorder the goto labels;
>
> f) Remove some unused labels due to reorder.
>
> Also, this shrinks the binary by 60 bytes:
>
> � text � � � data � � bss � � � dec � � � �hex filename
> � 7674 � � � 1684 � � � �8 � � �9366 � � �2496 ipc/mqueue.o.BEFORE
> � 7614 � � � 1684 � � � �8 � � �9306 � � �245a ipc/mqueue.o.AFTER
>
>
> Signed-off-by: WANG Cong <xiyou.w...@gmail.com>
> Cc: Andr� Goddard Rosa <andre....@gmail.com>
I have some questions below:
Inside do_open() and do_create() on mqueue.c, we call
dentry_open()/__dentry_open().
If dentry_open() fails, it'll automatically call:
dput(dentry);
mntput(mnt);
Then we'll go here and set "error":
> � � � �if (IS_ERR(filp)) {
> � � � � � � � �error = PTR_ERR(filp);
> - � � � � � � � goto out_putfd;
> + � � � � � � � goto out;
> � � � �}
>
And finally here:
> �out:
> � � � �dput(dentry);
> - � � � mntput(ipc_ns->mq_mnt);
> -out_putfd:
> - � � � put_unused_fd(fd);
> -out_err:
> - � � � fd = error;
> -out_upsem:
> +out_unlock:
> � � � �mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
> + � � � if (error) {
> + � � � � � � � mntput(ipc_ns->mq_mnt);
> + � � � � � � � put_unused_fd(fd);
> + � � � }
Is it ok to call:
dput(dentry);
mntput(ipc_ns->mq_mnt);
multiple times?
I also do not see where you set "error" to "fd":
> - � � � return fd;
> + � � � return error;
Am I missing something?
Could you please base your patch on top of mine?
Thank you,
Andr�
On Wed, Feb 24, 2010 at 7:01 PM, Andrew Morton
<ak...@linux-foundation.org> wrote:
> On Tue, 23 Feb 2010 04:04:22 -0300 Andr__ Goddard Rosa <andre....@gmail.com> wrote:
>
>> Fix a file descriptor leak on user-space processes and perform a cleanup,
>> reducing the code size:
>> text � �data � � bss � � dec � � hex filename
>> 9949 � � �72 � � �16 � 10037 � �2735 ipc/mqueue-BEFORE.o
>> 9885 � � �72 � � �16 � �9973 � �26f5 ipc/mqueue-AFTER.o
>>
>> Andr__ Goddard Rosa (6):
>> � mqueue: remove unneeded info->messages initialization
>> � mqueue: apply mathematics distributivity on mq_bytes calculation
>> � mqueue: simplify do_open() error handling
>> � mqueue: only set error codes if they are really necessary
>> � mqueue: fix typo "failues" -> "failures"
>> � mqueue: fix mq_open() file descriptor leak on user-space processes
>
> Fixing the leak is far more important than the other five patches, and
> we'll want to backport the leak fix into earlier kernels. �So the
> bugfix patch should have been the first in the series!
Sure, thank you for that, I'll consider your good advice.
> So I've reordered the patches in that fashion and shall tag "mqueue:
> fix mq_open() file descriptor leak on user-space processes" as needing
> -stable backporting.
>
> The patches apply and build OK with that reordering, but please do
> double-check it, thanks.
>
I have double checked and they look good; thanks for the follow-up
patch pleasing checkpatch.
:)
Do I need to send another patch adding the Acked-by's or is it done by
email system automatically?
Best regards,
Andr�
Oh, I trusted the current code too much, clearly this needs to be fixed
too. I already checked the 14 callers of dentry_open(), and will send
out a patchset to fix this tomorrow. (And the mqueue part will be based
on your patch.)
Thanks!
Sorry, NAK. Resources are either consumed by opened file (i.e. struct
file now is the holder of mnt/dentry) or released; in either case,
caller has given them up. Your variant is broken in its current form
and will be more complicated than existing code if you fix it.
> b) Fix an fd leak;
Fixed by original patch.
> c) The goto label 'out_upsem' doesn't make any sense, rename to
> 'out_unlock';
>
> d) mntget() should be called before looking up dentry within
> ->mnt->mnt_root;
Not really, since ->mnt doesn't change (and remains pinned) for the
entire lifetime of ipc_ns.
> e) When dealing with failure case, we should release the resources
> in a reversed order of acquiring, so reorder the goto labels;
>
> f) Remove some unused labels due to reorder.
I wouldn't say it's cleaner that way.
Anyway, I've applied the patchset as posted; if you want to do something
else, do that on top of it. I really object against your (a), the rest
is more or less a matter of taste. (a) is just plain wrong - we want
uniform behaviour from the caller's POV and it means that mnt/dentry should
always be given up from caller's POV. Either moved into new struct file,
or dropped.