[RFC 1] libc: replace syscall call with function calls using a macro

20 views
Skip to first unread message

Waldemar Kozaczuk

unread,
Aug 10, 2020, 1:02:33 AM8/10/20
to osv...@googlegroups.com, Waldemar Kozaczuk
In order to minimize copying musl source files that invoke syscall()
into libc/ folder, we employ following scheme that works in simple cases:
single .c files uses single syscall type.

The scheme involves new header file - libc/syscall_to_function.h -
that defines simple macros specific to each source file
that override syscall() or __syscall() function call.

So instead of copying and modifying musl file,
we instead compile with --include option pointing to the
new syscall_to_function.h header and passing extra
preprocessor parameter that specifies specic syscall type.
The former is done to avoid collisions of same macro name
definitions in single header file.

Please nore it is RFC so this patch for example does
not delete files from libc/ folder as the final patch should.

Please also note commented out generic macro attempt
that does not work as macro definition can not have
nested preprocessor directives (preprocessor runs once).

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>
---
Makefile | 10 +++++++---
libc/aliases.ld | 3 +++
libc/syscall_to_function.h | 24 ++++++++++++++++++++++++
3 files changed, 34 insertions(+), 3 deletions(-)
create mode 100644 libc/syscall_to_function.h

diff --git a/Makefile b/Makefile
index 78590d40..f6eb21e0 100644
--- a/Makefile
+++ b/Makefile
@@ -1341,6 +1341,7 @@ musl += multibyte/wcstombs.o
musl += multibyte/wctob.o
musl += multibyte/wctomb.o

+#SHOULD it go away?
$(out)/libc/multibyte/mbsrtowcs.o: CFLAGS += -Imusl/src/multibyte

musl += network/htonl.o
@@ -1417,10 +1418,12 @@ libc += stdio/__fopen_rb_ca.o
libc += stdio/__fprintf_chk.o
libc += stdio/__lockfile.o
musl += stdio/__overflow.o
-libc += stdio/__stdio_close.o
+musl += stdio/__stdio_close.o
+$(out)/musl/src/stdio/__stdio_close.o: CFLAGS += -D__OSV_SYS_close_TO_close --include libc/syscall_to_function.h
musl += stdio/__stdio_exit.o
libc += stdio/__stdio_read.o
-libc += stdio/__stdio_seek.o
+musl += stdio/__stdio_seek.o
+$(out)/musl/src/stdio/__stdio_seek.o: CFLAGS += -D__OSV_SYS_lseek_TO_lseek --include libc/syscall_to_function.h
libc += stdio/__stdio_write.o
libc += stdio/__stdout_write.o
musl += stdio/__string_read.o
@@ -1454,7 +1457,8 @@ musl += stdio/fputwc.o
musl += stdio/fputws.o
musl += stdio/fread.o
libc += stdio/__fread_chk.o
-libc += stdio/freopen.o
+musl += stdio/freopen.o
+$(out)/musl/src/stdio/freopen.o: CFLAGS += -D__OSV_SYS_fcntl_TO_fcntl --include libc/syscall_to_function.h
musl += stdio/fscanf.o
musl += stdio/fseek.o
musl += stdio/fsetpos.o
diff --git a/libc/aliases.ld b/libc/aliases.ld
index a44b7572..ba5e87ff 100644
--- a/libc/aliases.ld
+++ b/libc/aliases.ld
@@ -23,3 +23,6 @@ __pow_finite = pow;
__finite = finite;
__finitef = finitef;
__finitel = finitel;
+
+/* unistd */
+__dup3 = dup3;
diff --git a/libc/syscall_to_function.h b/libc/syscall_to_function.h
new file mode 100644
index 00000000..68922cbf
--- /dev/null
+++ b/libc/syscall_to_function.h
@@ -0,0 +1,24 @@
+#include <bits/syscall.h>
+#include <unistd.h>
+
+#ifdef __OSV_SYS_close_TO_close
+#define syscall(syscall_number, fd) close(fd)
+#endif
+
+#ifdef __OSV_SYS_lseek_TO_lseek
+#define syscall(syscall_number, file, off, whence) lseek(file, off, whence)
+#endif
+
+#ifdef __OSV_SYS_fcntl_TO_fcntl
+#define syscall(syscall_number, fd, cmd, args) fcntl(fd, cmd, args)
+#define __syscall(syscall_number, fd, cmd, args) fcntl(fd, cmd, args)
+#endif
+
+/*
+#define syscall(syscall_number, ...) \
+ #if syscall_number == SYS_close \
+ close(___VA_ARGS__) \
+ #else \
+ #error Unsupported syscall number! \
+ #endif
+*/
--
2.25.1

Waldemar Kozaczuk

unread,
Aug 10, 2020, 1:02:35 AM8/10/20
to osv...@googlegroups.com, Waldemar Kozaczuk
In more complicated situations when single musl file
calls syscall() for 2 or more system call types (like stdio/tmpnam.o),
we cannot have single macro that will solve it like previous patch
did.

In those cases we fallback to statically inlined functions
defined for each syscall type. The defined variadic syscall/__syscall
macro then delegates to relevant inlined function by concatenating
'inlined_' and syscall number constant name that is a 1st parameter to a macro.

Please note that "__attribute__((always_inline))" guarantees
that the calls to those functions are indeed inlines and do not incur
any extra function call overhead as it was checked by disassembling tmpnam.o.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>
---
Makefile | 3 ++-
libc/syscall_to_function.h | 15 +++++++++++++++
2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f6eb21e0..1a1ef54b 100644
--- a/Makefile
+++ b/Makefile
@@ -1508,7 +1508,8 @@ musl += stdio/swprintf.o
musl += stdio/swscanf.o
musl += stdio/tempnam.o
libc += stdio/tmpfile.o
-libc += stdio/tmpnam.o
+musl += stdio/tmpnam.o
+$(out)/musl/src/stdio/tmpnam.o: CFLAGS += -D__OSV_SYS_inline_tmpnam --include libc/syscall_to_function.h
musl += stdio/ungetc.o
musl += stdio/ungetwc.o
musl += stdio/vasprintf.o
diff --git a/libc/syscall_to_function.h b/libc/syscall_to_function.h
index 68922cbf..a9f9afed 100644
--- a/libc/syscall_to_function.h
+++ b/libc/syscall_to_function.h
@@ -14,6 +14,21 @@
#define __syscall(syscall_number, fd, cmd, args) fcntl(fd, cmd, args)
#endif

+#ifdef __OSV_SYS_inline_tmpnam
+#include <time.h>
+__attribute__((always_inline)) static inline long inlined_SYS_clock_gettime(clockid_t c, struct timespec *t, int x)
+{
+ return clock_gettime(c, t);
+}
+
+__attribute__((always_inline)) static inline long inlined_SYS_access(const char *p, int i)
+{
+ return access(p, i);
+}
+
+#define __syscall(sys_number, ...) (inlined_##sys_number(__VA_ARGS__))
+#endif
+
/*
#define syscall(syscall_number, ...) \
#if syscall_number == SYS_close \
--
2.25.1

Nadav Har'El

unread,
Aug 10, 2020, 1:58:51 AM8/10/20
to Waldemar Kozaczuk, Osv Dev
On Mon, Aug 10, 2020 at 8:02 AM Waldemar Kozaczuk <jwkoz...@gmail.com> wrote:
In more complicated situations when single musl file
calls syscall() for 2 or more system call types (like stdio/tmpnam.o),
we cannot have single macro that will solve it like previous patch
did.

In those cases we fallback to statically inlined functions
defined for each syscall type. The defined variadic syscall/__syscall
macro then delegates to relevant inlined function by concatenating
'inlined_' and syscall number constant name that is a 1st parameter to a macro.

Please note that "__attribute__((always_inline))" guarantees
that the calls to those functions are indeed inlines and do not incur
any extra function call overhead as it was checked by disassembling tmpnam.o.

So is there any downside to this approach? Can't we use *just* this approach, without that extra  -D thing?
If it worked, and we won't need the "-D" things, we can add the "--include ..." thing to *all* musl source files, instead of picking them out individually.

Also, please begin syscall_to_function.h with a comment describing what it does - that it is for compiling
unmodified Musl files.
A thought: Do we really need this to be an actual C function - maybe this can also be a macro?
My thinking is that maybe you can get around the need to specify all these parameter types for every system call and can just use a macro to copy them.
On the other hand, it's not criticial - there is not a very large number of system calls, and after you write a dozen of them or so, you'd probably be done.


+
+__attribute__((always_inline)) static inline long inlined_SYS_access(const char *p, int i)
+{
+    return access(p, i);
+}
+
+#define __syscall(sys_number, ...) (inlined_##sys_number(__VA_ARGS__))
+#endif
+
 /*
 #define syscall(syscall_number, ...) \
   #if syscall_number == SYS_close \
--
2.25.1

--
You received this message because you are subscribed to the Google Groups "OSv Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+u...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/20200810050225.13067-2-jwkozaczuk%40gmail.com.

Waldek Kozaczuk

unread,
Aug 10, 2020, 11:50:39 AM8/10/20
to OSv Development
On Monday, August 10, 2020 at 1:58:51 AM UTC-4 Nadav Har'El wrote:
On Mon, Aug 10, 2020 at 8:02 AM Waldemar Kozaczuk <jwkoz...@gmail.com> wrote:
In more complicated situations when single musl file
calls syscall() for 2 or more system call types (like stdio/tmpnam.o),
we cannot have single macro that will solve it like previous patch
did.

In those cases we fallback to statically inlined functions
defined for each syscall type. The defined variadic syscall/__syscall
macro then delegates to relevant inlined function by concatenating
'inlined_' and syscall number constant name that is a 1st parameter to a macro.

Please note that "__attribute__((always_inline))" guarantees
that the calls to those functions are indeed inlines and do not incur
any extra function call overhead as it was checked by disassembling tmpnam.o.

So is there any downside to this approach? Can't we use *just* this approach, without that extra  -D thing?
If it worked, and we won't need the "-D" things, we can add the "--include ..." thing to *all* musl source files, instead of picking them out individually.
I agree. I like it as it will let us automatically catch any new/changed musl source files that current syscall_to_function translation macros do not support as we upgrade musl.

Also, please begin syscall_to_function.h with a comment describing what it does - that it is for compiling
unmodified Musl files.

Indeed using macros works beautifully which I was a bit surprised because one #define uses another #define at the same time which I thought preprocessor would not support:

#include <bits/syscall.h>

#ifdef __OSV_SYS_TO_FUNCTION_tmpnam
#include <unistd.h>
#define __OSV_TO_FUNCTION_SYS_clock_gettime(c, t, x) (clock_gettime(c, t))
#define __OSV_TO_FUNCTION_SYS_access(p, i) (access(p, i))
#endif

#define __syscall(sys_number, ...) (__OSV_TO_FUNCTION_##sys_number(__VA_ARGS__))
#define syscall(sys_number, ...) (__OSV_TO_FUNCTION_##sys_number(__VA_ARGS__))
#define syscall_cp(sys_number, ...) (__OSV_TO_FUNCTION_##sys_number(__VA_ARGS__))

I am really pleased it works and should solve all types of translations in 12 musl files where this would apply. I actually went on a rabbit hole trying to define macro with #ifdef/if an so on until I realized we can use powerful ## concatenation operator. In cases where this simple scheme will not work, we can always fallback to inlined functions.

There is one thing to discuss. While I agree that we should use ' --include libc/syscall_to_function.h' for all musl source and have compiler detect any undefined individual translation macros (like __OSV_TO_FUNCTION_SYS_clock_gettime) which is really nice feature, at the same time I am torn whether we should make all translation macros enabled by default for all musl sources OR enabled by the #idef switch per file (like #ifdef __OSV_SYS_TO_FUNCTION_tmpnam). I am afraid that sometimes we will need to add specific #include ... for relevant functions, we translate to and that might pollute compiling by causing some collisions. Also adding option like 'CFLAGS += -D__OSV_SYS_inline_tmpnam' to specific files (there are 12 of them) in the makefile also documents which musl files are affected by this translation mechanism which might be helpful to understand how all this magic happens, no?

So another version of syscal_to_function.h could look like this:

#include <bits/syscall.h>

#ifdef __OSV_SYS_close_TO_close
#include <unistd.h>
#define __OSV_TO_FUNCTION_SYS_close(fd) (close(fd))
#endif

#ifdef __OSV_SYS_lseek_TO_lseek
#include <unistd.h>
#define __OSV_TO_FUNCTION_SYS_lseek(file, off, whence) (lseek(file, off, whence))
#endif

#ifdef __OSV_SYS_fcntl_TO_fcntl
#include <unistd.h>
#define __OSV_TO_FUNCTION_SYS_fcntl(fd, cmd, args) (fcntl(fd, cmd, args))
#endif

#ifdef __OSV_SYS_inline_tmpnam
#include <unistd.h>
#define __OSV_TO_FUNCTION_SYS_clock_gettime(c, t, x) (clock_gettime(c, t))
#define __OSV_TO_FUNCTION_SYS_access(p, i) (access(p, i))
#endif

#define __syscall(sys_number, ...) (__OSV_TO_FUNCTION_##sys_number(__VA_ARGS__))
#define syscall(sys_number, ...) (__OSV_TO_FUNCTION_##sys_number(__VA_ARGS__))

Thoughts?

BTW I tried to apply ' --include libc/syscall_to_function.h' for all musl sources files in makefile but then I get these compilation errors which are due to some conflicts.

Building into build/release.x64
  GEN gen/include/osv/version.h
  CC musl/src/misc/get_current_dir_name.c
  CC musl/src/misc/nftw.c
  CC musl/src/multibyte/mbsinit.c
In file included from <command-line>:
include/api/unistd.h:179:6: error: ISO C requires a named argument before ‘...’
  179 | long syscall(long, ...);
      |      ^~~~~~~
include/api/unistd.h:180:6: error: ISO C requires a named argument before ‘...’
  180 | long __syscall(long, ...);
      |      ^~~~~~~~~
make: *** [Makefile:336: build/release.x64/musl/src/misc/get_current_dir_name.o] Error 1
make: *** Waiting for unfinished jobs....
  CC musl/src/multibyte/mbsnrtowcs.c
In file included from <command-line>:
include/api/unistd.h:179:6: error: ISO C requires a named argument before ‘...’
  179 | long syscall(long, ...);
      |      ^~~~~~~
include/api/unistd.h:180:6: error: ISO C requires a named argument before ‘...’
  180 | long __syscall(long, ...);
      |      ^~~~~~~~~
make: *** [Makefile:336: build/release.x64/musl/src/misc/nftw.o] Error 1
make failed. Exiting from build script

That possibly has to do with the fact that some of the files includes include/api/unistd.h which defined both syscall and __syscall() functions like so:

#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
#define L_SET 0
#define L_INCR 1
#define L_XTND 2
int brk(void *);
void *sbrk(intptr_t);
pid_t vfork(void);
int vhangup(void);
int chroot(const char *);
int getpagesize(void);
int getdtablesize(void);
int sethostname(const char *, size_t);
int getdomainname(char *, size_t);
int setdomainname(const char *, size_t);
int setgroups(size_t, const gid_t *);
char *getpass(const char *);
int daemon(int, int);
void setusershell(void);
void endusershell(void);
char *getusershell(void);
int acct(const char *);
long syscall(long, ...);
long __syscall(long, ...);
#endif

Unless there is a clean way to fix it, we may need to use --include for only specific musl sources. I also need to verify if this new scheme will work for other 8 out of 12 files under libc/

Waldek

Nadav Har'El

unread,
Aug 11, 2020, 3:36:16 AM8/11/20
to Waldek Kozaczuk, OSv Development
On Mon, Aug 10, 2020 at 6:50 PM Waldek Kozaczuk <jwkoz...@gmail.com> wrote:

There is one thing to discuss. While I agree that we should use ' --include libc/syscall_to_function.h' for all musl source and have compiler detect any undefined individual translation macros (like __OSV_TO_FUNCTION_SYS_clock_gettime) which is really nice feature, at the same time I am torn whether we should make all translation macros enabled by default for all musl sources OR enabled by the #idef switch per file (like #ifdef __OSV_SYS_TO_FUNCTION_tmpnam). I am afraid that sometimes we will need to add specific #include ... for relevant functions, we translate to and that might pollute compiling by causing some collisions.

A #define for syscall() may cause problems for including <unistd.h>. Are there any other problems?
If this is the only problem, then we can easily avoid it: In the top of libc/syscall_to_function.h, do a "#include <unistd.h>". Before creating a syscall macro. That's it - unistd.h is protected from being loaded again with an ifndef, so other code cannot include it again. Yes, it adds a silly include for unistd.h for source files that didn't need it, but I think that's ok (hopefully).

Also adding option like 'CFLAGS += -D__OSV_SYS_inline_tmpnam' to specific files (there are 12 of them) in the makefile also documents which musl files are affected by this translation mechanism which might be helpful to understand how all this magic happens, no?

I think it only makes it harder. I think it would be even nicer to have just one "--include libc/syscall_to_function.h" in the Makefile with a comment in front of it explaining why, or at worst, 12 copies of this for 12 individual source files - but not *also* -D things that makes everything longer and harder to remember how to do it next time we need to add another such change.


BTW I tried to apply ' --include libc/syscall_to_function.h' for all musl sources files in makefile but then I get these compilation errors which are due to some conflicts.

Building into build/release.x64
  GEN gen/include/osv/version.h
  CC musl/src/misc/get_current_dir_name.c
  CC musl/src/misc/nftw.c
  CC musl/src/multibyte/mbsinit.c
In file included from <command-line>:
include/api/unistd.h:179:6: error: ISO C requires a named argument before ‘...’
  179 | long syscall(long, ...);
      |      ^~~~~~~

Of course, this is the unistd.h problem I mentioned above, and proposed a solution.

Reply all
Reply to author
Forward
0 new messages