Hi,
I wrote a simple program that calls the gettid syscall 50 million times in a loop to measure the cost of syscall. I am interested if my test program makes sense and can be improved. More importantly if it correctly measures what I want to measure.
Here is the program:
#define _GNU_SOURCE
#include <stdio.h>
#include <syscall.h>
#include <assert.h>
#include <stdint.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/time.h>
#ifdef __OSV__
long gettid();
#endif
void call_gettid_syscall()
{
// errors are returned).
unsigned long syscall_nr = __NR_gettid;
long tid = 0;
#ifdef __x86_64__
asm ("movq %[syscall_no], %%rax\n"
"syscall\n"
"movq %%rax, %[tid]\n"
: [tid]"=m" (tid)
: [syscall_no]"m" (syscall_nr)
: "rax", "rdi");
#endif
#ifdef __aarch64__
asm ("mov x8, %[syscall_no]\n"
"svc #0\n"
"mov %[tid], x0\n"
: [tid]"=r" (tid)
: [syscall_no]"r" (syscall_nr)
: "x0", "x8");
#endif
assert(tid >= 0);
}
uint64_t nstime()
{
struct timeval tv;
gettimeofday(&tv, NULL);
uint64_t mul = 1000000000, mul2 = 1000;
return tv.tv_sec * mul + tv.tv_usec * mul2;
}
int main(int argc, char **argv)
{
long count = 50000000;
long loop = count;
uint64_t start = nstime();
while (loop--) {
call_gettid_syscall();
}
uint64_t end = nstime();
printf("%lu ns elapsed %.2f sec %s\n", (end - start) / count, (end - start) / 1000000000.0, " gettid syscall");
loop = count;
start = nstime();
long tid = 0;
while (loop--) {
tid = gettid();
assert(tid >=0);
}
end = nstime();
printf("%lu ns elapsed %.2f sec %s\n", (end - start) / count, (end - start) / 1000000000.0, " gettid local");
}
Here are the results on OSv (3 runs) with single cpu:
./scripts/run.py -e /tests/misc-syscall-perf.so -c 1
106 ns elapsed 5.33 sec gettid syscall
2 ns elapsed 0.14 sec gettid local
107 ns elapsed 5.38 sec gettid syscall
2 ns elapsed 0.15 sec gettid local
107 ns elapsed 5.37 sec gettid syscall
2 ns elapsed 0.15 sec gettid local
and on Linux:
taskset -c 1-1 /tmp/misc-syscall-perf
367 ns elapsed 18.38 sec gettid syscall
367 ns elapsed 18.37 sec gettid local
367 ns elapsed 18.39 sec gettid syscall
366 ns elapsed 18.32 sec gettid local
367 ns elapsed 18.40 sec gettid syscall
366 ns elapsed 18.30 sec gettid local
So clearly the local function calls on OSv are the winners but even the syscalls on OSv are almost 4 times faster than on Linux (BTW I did not see much difference with the static executable on OSv).
Regards,
Waldek