shm_open support

38 views
Skip to first unread message

Aaron Burns

unread,
Jan 28, 2021, 3:47:00 PMJan 28
to gVisor Users [Public]
Hi,

I have an application that uses shm_open and it runs properly in a regular docker container.  When I try and run it with gVisor, I get a function not implemented error.
Can anybody tell me if this is expected or should this work?  I've setup a simple program that just tries to open a simple shared memory object for debugging this.

Some details:
docker --version
Docker version 18.06.1-ce, build e68fc7a
uname -a
Linux aburns-Precision-T7600 4.15.0-134-generic #138-Ubuntu SMP Fri Jan 15 10:52:18 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
runsc --version
runsc version release-20210125.0
spec: 1.0.1-dev

an strace from a regular docker container:
lum@bd5415fdd22f:/home/aburns$ strace ./a.out
execve("./a.out", ["./a.out"], 0x7ffe87995830 /* 12 vars */) = 0
brk(NULL)                               = 0x55e6292e8000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=24286, ...}) = 0
mmap(NULL, 24286, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f08a4273000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/librt.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\"\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0644, st_size=31680, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f08a4271000
mmap(NULL, 2128864, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f08a3e48000
mprotect(0x7f08a3e4f000, 2093056, PROT_NONE) = 0
mmap(0x7f08a404e000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x6000) = 0x7f08a404e000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\35\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2030928, ...}) = 0
mmap(NULL, 4131552, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f08a3a57000
mprotect(0x7f08a3c3e000, 2097152, PROT_NONE) = 0
mmap(0x7f08a3e3e000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e7000) = 0x7f08a3e3e000
mmap(0x7f08a3e44000, 15072, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f08a3e44000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0000b\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=144976, ...}) = 0
mmap(NULL, 2221184, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f08a3838000
mprotect(0x7f08a3852000, 2093056, PROT_NONE) = 0
mmap(0x7f08a3a51000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19000) = 0x7f08a3a51000
mmap(0x7f08a3a53000, 13440, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f08a3a53000
close(3)                                = 0
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f08a426e000
arch_prctl(ARCH_SET_FS, 0x7f08a426e740) = 0
mprotect(0x7f08a3e3e000, 16384, PROT_READ) = 0
mprotect(0x7f08a3a51000, 4096, PROT_READ) = 0
mprotect(0x7f08a404e000, 4096, PROT_READ) = 0
mprotect(0x55e6291dc000, 4096, PROT_READ) = 0
mprotect(0x7f08a4279000, 4096, PROT_READ) = 0
munmap(0x7f08a4273000, 24286)           = 0
set_tid_address(0x7f08a426ea10)         = 19
set_robust_list(0x7f08a426ea20, 24)     = 0
rt_sigaction(SIGRTMIN, {sa_handler=0x7f08a383dcb0, sa_mask=[], sa_flags=SA_RESTORER|SA_SIGINFO, sa_restorer=0x7f08a384a980}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {sa_handler=0x7f08a383dd50, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART|SA_SIGINFO, sa_restorer=0x7f08a384a980}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
statfs("/dev/shm/", {f_type=TMPFS_MAGIC, f_bsize=4096, f_blocks=16384, f_bfree=16384, f_bavail=16384, f_files=4111579, f_ffree=4111578, f_fsid={val=[0, 0]}, f_namelen=255, f_frsize=4096, f_flags=ST_VALID|ST_NOSUID|ST_NODEV|ST_NOEXEC|ST_RELATIME}) = 0
futex(0x7f08a3a56370, FUTEX_WAKE_PRIVATE, 2147483647) = 0
openat(AT_FDCWD, "/dev/shm/shmtest", O_RDWR|O_CREAT|O_NOFOLLOW|O_CLOEXEC, 0660) = 3
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0
brk(NULL)                               = 0x55e6292e8000
brk(0x55e629309000)                     = 0x55e629309000
write(1, "shared memory name:/shmtest", 27shared memory name:/shmtest) = 27
exit_group(0)                           = ?
+++ exited with 0 +++

a strace from a gVisor based container:
lum@b23db930e835:/home/aburns$ strace ./a.out 
execve("./a.out", ["./a.out"], 0x7fe5203c8760 /* 12 vars */) = 0
brk(NULL)                               = 0x55c17f3a3000
uname({sysname="Linux", nodename="b23db930e835", ...}) = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=24286, ...}) = 0
mmap(NULL, 24286, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fa1f3758000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/librt.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\"\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0644, st_size=31680, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fa1f3756000
mmap(NULL, 2128864, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fa1f3000000
mprotect(0x7fa1f3007000, 2093056, PROT_NONE) = 0
mmap(0x7fa1f3206000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x6000) = 0x7fa1f3206000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\35\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2030928, ...}) = 0
mmap(NULL, 4131552, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fa1f2c00000
mprotect(0x7fa1f2de7000, 2097152, PROT_NONE) = 0
mmap(0x7fa1f2fe7000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e7000) = 0x7fa1f2fe7000
mmap(0x7fa1f2fed000, 15072, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fa1f2fed000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0000b\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=144976, ...}) = 0
mmap(NULL, 2221184, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fa1f2800000
mprotect(0x7fa1f281a000, 2093056, PROT_NONE) = 0
mmap(0x7fa1f2a19000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19000) = 0x7fa1f2a19000
mmap(0x7fa1f2a1b000, 13440, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fa1f2a1b000
close(3)                                = 0
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fa1f3753000
arch_prctl(ARCH_SET_FS, 0x7fa1f3753740) = 0
mprotect(0x7fa1f2fe7000, 16384, PROT_READ) = 0
mprotect(0x7fa1f2a19000, 4096, PROT_READ) = 0
mprotect(0x7fa1f3206000, 4096, PROT_READ) = 0
mprotect(0x55c17f3a1000, 4096, PROT_READ) = 0
mprotect(0x7fa1f3629000, 4096, PROT_READ) = 0
munmap(0x7fa1f3758000, 24286)           = 0
set_tid_address(0x7fa1f3753a10)         = 19
set_robust_list(0x7fa1f3753a20, 24)     = 0
rt_sigaction(SIGRTMIN, {sa_handler=0x7fa1f2805cb0, sa_mask=[], sa_flags=SA_RESTORER|SA_SIGINFO, sa_restorer=0x7fa1f2812980}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {sa_handler=0x7fa1f2805d50, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART|SA_SIGINFO, sa_restorer=0x7fa1f2812980}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
statfs("/dev/shm/", {f_type=OVERLAYFS_SUPER_MAGIC, f_bsize=4096, f_blocks=2251799813685247, f_bfree=2251799813685247, f_bavail=2251799813685247, f_files=0, f_ffree=0, f_fsid={val=[0, 0]}, f_namelen=255, f_frsize=4096}) = 0
brk(NULL)                               = 0x55c17f3a3000
brk(0x55c17f3c4000)                     = 0x55c17f3c4000
openat(AT_FDCWD, "/proc/mounts", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(3, "none / overlayfs rw 0 0\nnone /pr"..., 4096) = 244
read(3, "", 4096)                       = 0
close(3)                                = 0
futex(0x7fa1f2a1e370, FUTEX_WAKE_PRIVATE, 2147483647) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(0, 0), ...}) = 0
ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0
write(1, "Error:Function not implemented\n", 31Error:Function not implemented
) = 31
exit_group(0)                           = ?
+++ exited with 0 +++


Does anyone have any thoughts?

Regards,
Aaron


Ayush Ranjan

unread,
Jan 28, 2021, 4:05:56 PMJan 28
to Aaron Burns, gVisor Users [Public]
Hey there!
Looks like this bug was fixed in our next generation Virtual Filesystem (VFS2). (Note that the runtimes used below are configured differently, vfs invokes runsc with VFS1 and vfs-vfs2-d invokes runsc with VFS2)

VFS1 logs:

$ docker run --runtime=vfs -it ubuntu bash

root@e92dada520f1:/# stat -f -c "%T" /dev/shm

overlayfs


VFS2 logs:

$ docker run --runtime=vfs-vfs2-d -it ubuntu bash

root@f704d73c7a6a:/# stat -f -c "%T" /dev/shm

tmpfs

Could you try switching to VFS2 and try?

https://groups.google.com/g/gvisor-users/c/rP-RCBDFKoY has instructions on how to enable VFS2. Along with this bug fix, you will also get some performance benefits for free!

--
You received this message because you are subscribed to the Google Groups "gVisor Users [Public]" group.
To unsubscribe from this group and stop receiving emails from it, send an email to gvisor-users...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/gvisor-users/e7d31b7d-1f9f-44c6-bac7-a30a0283fb13n%40googlegroups.com.


--
Ayush Ranjan
Software Engineer | Google Cloud

Ayush Ranjan

unread,
Jan 28, 2021, 4:40:30 PMJan 28
to gVisor Users [Public]
Looks like glibc requires the /dev/shm mount to be of type ("tmpfs" or "shm"). (see [1])
From your gVisor logs, you can see that statfs(/dev/shm) returns overlayfs type which seemed to be causing the issue.

In VFS2, /dev/shm is a tmpfs mount.

Aaron Burns

unread,
Jan 28, 2021, 5:12:09 PMJan 28
to gVisor Users [Public]
Hi!

Happy to try your suggestion.

I've added --vfs2 to as a runtime argument in my docker configuration file (/etc/docker/daemon.json) - I think that's all that's needed, please correct me if I'm wrong.

I now see tmpfs for dev/shm so it seems like I'm now using vfs2
lum@fcf82448f99e:/$ stat -f -c "%T" /dev/shm
tmpfs


Unfortunately, I can't seem to run my test program anymore
lum@fcf82448f99e:/home/aburns$ ./shmtest 
Error:Permission denied

Am I doing something silly?  


Aaron

Ayush Ranjan

unread,
Jan 28, 2021, 5:55:17 PMJan 28
to gVisor Users [Public]
Hmm, do you have access to the strace logs from your container?
If you clone gVisor repo and run `make dev`, it will automatically configure runtimes for you. It will configure a runtime named {RUNTIME_NAME}-vfs2-d (with debug options).
Upon using that runtime, strace logs will be dumped in /tmp/{RUNTIME_NAME}/logs/. You would want the sandbox logs from there and see which syscall is causing the EPERM.

Alternatively, if you could share (a simplified version) of shmtest, then I could have look.

Aaron Burns

unread,
Jan 28, 2021, 6:17:21 PMJan 28
to gVisor Users [Public]
I'll have a go at building it from the repo  - I installed gvisor from the instructions under "Install latest release" here: https://gvisor.dev/docs/user_guide/install/

Below is the code for the shmtest binary:

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>


void main(void)
{

int fd;
char lclFileName[256];
char saveString[ 256 ];


/* create our shared memory object in our common directory */
sprintf(lclFileName, "/%s", "shmtest");

/* Map to global data */
if((fd = shm_open(lclFileName, O_RDWR | O_CREAT, 0660)) == -1)
{
strerror_r(errno, saveString, sizeof( saveString ));
printf("Error:%s\n",saveString);
return;
}

printf("shared memory name:%s\n",lclFileName);

}


Thanks for help!

Aaron


Ayush Ranjan

unread,
Jan 28, 2021, 8:20:29 PMJan 28
to gVisor Users [Public]
It works for me.

Steps I took:
- Put your test code in shm.c
- Create the following Dockerfile:
```
FROM ubuntu
COPY shm.c .
RUN apt-get update && apt-get install -y build-essential
```
- Built it into an image called `shm`
- Run with VFS2 container and compare with runc:
```
~/playground$ docker run --runtime=vfs-vfs2-d -it shm bash
root@4f4a7f367fd9:/# gcc shm.c -o shm -lrt
root@4f4a7f367fd9:/# ./shm
shared memory name:/shmtest
root@4f4a7f367fd9:/# exit
exit
~/playground$ docker run -it shm bash
root@f14cd7391108:/# gcc shm.c -o shm -lrt
root@f14cd7391108:/# ./shm
shared memory name:/shmtest
root@f14cd7391108:/# exit
exit
```

RE your permission error: be sure to check the permissions on the executable. Maybe try chmod +x executable?
Your test script does seem to work with VFS2 for me. (I am also using a pretty recent build).

Aaron Burns

unread,
Jan 28, 2021, 8:46:01 PMJan 28
to gVisor Users [Public]
Thanks, that's great.  I do see a permissions mismatch with /dev/shm and my binary.  Once I get that sorted, I should be on to the next problem :)

Thank you very much for your help.

Aaron

Reply all
Reply to author
Forward
0 new messages