From: Waldemar Kozaczuk <
jwkoz...@gmail.com>
Committer: WALDEMAR KOZACZUK <
jwkoz...@gmail.com>
Branch: master
Add strace-like functionality
This PR implements simple strace-like mechanism described by #1263.
In essence, it adds a new structure trace_log that holds a fixed size (64K)
array of pointers to trace_records and acts as a ring buffer between threads
capturing active tracepoints (producers) and new thread strace (single consumer)
that prints them to the console. The write pointer - write_offset - is atomic that
rolls over every 64K and the strace thread follows it and tries to print
each trace (see print_trace).
The strace can be activated by adding "--strace" kernel command line argument.
./scripts/firecracker.py -e '--trace=vfs* --strace /hello'
OSv v0.57.0-75-gb7150705
init 0 0.001666500 vfs_mkdir("/dev" 0755)
init 0 0.001685758 vfs_mkdir_ret()
init 0 0.001698722 vfs_dup(0)
init 0 0.001699344 vfs_dup_ret(1)
init 0 0.001699520 vfs_dup(0)
init 0 0.001699644 vfs_dup_ret(2)
>init 0 0.006818073 vfs_mkdir("/rofs" 0755)
>init 0 0.006821609 vfs_mkdir_ret()
Booted up in 13.77 ms
Cmdline: /hello
>init 0 0.008424749 vfs_open("/usr/lib/fs" 0x0 00)
Hello from C code
>init 0 0.008429978 vfs_open_err(2)
>init 0 0.008435063 vfs_open("/etc/fstab" 0x0 00)
>init 0 0.008438018 vfs_open_ret(3)
>init 0 0.008531832 vfs_open("/dev" 0x0 00)
>init 0 0.008534193 vfs_open_ret(4)
>init 0 0.008534456 vfs_close(4)
>init 0 0.008537971 vfs_close_ret()
>init 0 0.008554489 vfs_open("/proc" 0x0 00)
>init 0 0.008556158 vfs_open_ret(4)
>init 0 0.008556252 vfs_close(4)
>init 0 0.008556763 vfs_close_ret()
>init 0 0.008583356 vfs_open("/sys" 0x0 00)
>init 0 0.008585037 vfs_open_ret(4)
>init 0 0.008585171 vfs_close(4)
>init 0 0.008585676 vfs_close_ret()
>init 0 0.008605026 vfs_open("/tmp" 0x0 00)
>init 0 0.008606443 vfs_open_ret(4)
>init 0 0.008606579 vfs_close(4)
>init 0 0.008607093 vfs_close_ret()
>init 0 0.008619345 vfs_close(3)
>init 0 0.008620198 vfs_close_ret()
>init 0 0.008634389 vfs_ioctl(1 0x5401)
>init 0 0.008634974 vfs_ioctl_ret()
>init 0 0.008635317 vfs_pwritev(1 0x200000501690 0x2 0xffffffff)
>init 0 0.008953023 vfs_pwritev_ret(0x16)
>init 0 0.008954858 vfs_pwritev(1 0x200000501660 0x2 0xffffffff)
>init 0 0.009209918 vfs_pwritev_ret(0x11)
>init 0 0.009235297 vfs_open("/init" 0x0 00)
>init 0 0.009237413 vfs_open_err(2)
>init 0 0.009259309 vfs_lstat(pathname=/hello, stat=0x2000004fe5e0)
>init 0 0.009265126 vfs_lstat_ret()
>init 0 0.009265804 vfs_open("/hello" 0x0 00)
>init 0 0.009267528 vfs_open_ret(3)
>init 0 0.009267772 vfs_close(3)
>init 0 0.009268057 vfs_close_ret()
/hello 0 0.009949201 vfs_pwritev(1 0x200000601e90 0x2 0xffffffff)
/hello 0 0.011528630 vfs_pwritev_ret(0x12)
Fixes #1263
---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -1045,6 +1045,7 @@ objects += core/mmio.o
objects += core/kprintf.o
objects += core/trace.o
objects += core/trace-count.o
+objects += core/strace.o
objects += core/callstack.o
objects += core/poll.o
objects += core/select.o
diff --git a/core/strace.cc b/core/strace.cc
--- a/core/strace.cc
+++ b/core/strace.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2023 Waldemar Kozaczuk
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#include <osv/strace.hh>
+#include <osv/sched.hh>
+#include "drivers/console.hh"
+
+trace_log* _trace_log = nullptr;
+
+static void print_trace(trace_record* tr) {
+ char msg[512];
+ auto tp = tr->tp;
+ float time = tr->time;
+ std::string thread_name = tr->thread_name.data();
+
+ auto len = snprintf(msg, 512, "%-15s %3d %12.9f %s(", thread_name.c_str(), tr->cpu, time / 1000000000, tp->name);
+ auto left = 512 - len;
+ auto m = msg + len;
+
+ auto fmt = tp->format;
+ //Copy all up to 1st '%'
+ while (*fmt && *fmt != '%' && left > 0) {
+ *m++ = *fmt++;
+ left--;
+ len++;
+ }
+
+ auto buf = tr->buffer;
+ auto sig = tr->tp->sig;
+ int written = 0;
+
+ if (tr->backtrace) {
+ buf += tracepoint_base::backtrace_len * sizeof(void*);
+ }
+
+ while (*sig != 0 && left > 2) {
+ //Copy fragment of tp->format up to next '%'
+ char _fmt[128];
+ int i = 0;
+ do {
+ _fmt[i++] = *fmt++;
+ } while (*fmt && *fmt != '%');
+ _fmt[i] = 0;
+
+ //Detect type of data, deserialize and print to the msg
+ switch (*sig++) {
+ case 'c':
+ buf = align_up(buf, object_serializer<char>().alignment());
+ written = snprintf(m, left, _fmt, *reinterpret_cast<char*>(buf++));
+ break;
+ case 'b':
+ case 'B':
+ buf = align_up(buf, object_serializer<u8>().alignment());
+ written = snprintf(m, left, _fmt, *buf++);
+ break;
+ case 'h':
+ case 'H':
+ buf = align_up(buf, object_serializer<u16>().alignment());
+ written = snprintf(m, left, _fmt, *reinterpret_cast<u16*>(buf));
+ buf += sizeof(u16);
+ break;
+ case 'i':
+ case 'I':
+ case 'f':
+ buf = align_up(buf, object_serializer<u32>().alignment());
+ written = snprintf(m, left, _fmt, *reinterpret_cast<u32*>(buf));
+ buf += sizeof(u32);
+ break;
+ case 'q':
+ case 'Q':
+ case 'd':
+ case 'P':
+ buf = align_up(buf, object_serializer<u64>().alignment());
+ written = snprintf(m, left, _fmt, *reinterpret_cast<u64*>(buf));
+ buf += sizeof(u64);
+ break;
+ case '?':
+ written = snprintf(m, left, _fmt, *reinterpret_cast<bool*>(buf++));
+ break;
+ case 'p': {
+ //string
+ char str[128];
+ auto slen = *buf++;
+ int i = 0;
+ while (slen-- && i < 127) {
+ str[i++] = *reinterpret_cast<char*>(buf++);
+ }
+ str[i] = 0;
+ written = snprintf(m, left, _fmt, str);
+ buf += (object_serializer<const char*>::max_len - i - 1);
+ break;
+ }
+ case '*': {
+ //binary data
+ buf = align_up(buf, sizeof(u16));
+ char str[256];
+ auto slen = *reinterpret_cast<u16*>(buf);
+ int i = 0;
+ buf += 2;
+ str[i++] = '{';
+ while (slen-- && (i + 4) < 255) {
+ auto byte = *reinterpret_cast<u8*>(buf++);
+ auto high = byte / 16;
+ str[i++] = high < 10 ? '0' + high : 'a' + (high - 10);
+ auto low = byte % 16;
+ str[i++] = low < 10 ? '0' + low : 'a' + (low - 10);
+ str[i++] = ' ';
+ }
+ str[i++] = '}';
+ str[i] = 0;
+ written = snprintf(m, left, _fmt, str);
+ break;
+ }
+ default:
+ assert(0 && "should not reach");
+ }
+
+ left -= written;
+ len += written;
+ m += written;
+ }
+ *m++ = ')';
+ *m++ = '\n';
+ console::write(msg, len + 2);
+}
+
+static sched::thread *strace = nullptr;
+
+void start_strace() {
+ _trace_log = new trace_log();
+ strace = sched::thread::make([] {
+ while (true) {
+ while (auto tr = _trace_log->read()) {
+ print_trace(tr);
+ }
+ sched::thread::sleep(std::chrono::microseconds(100));
+ }
+ }, sched::thread::attr().name("strace"));
+
+ strace->start();
+}
diff --git a/core/trace.cc b/core/trace.cc
--- a/core/trace.cc
+++ b/core/trace.cc
@@ -27,6 +27,7 @@
#include <osv/semaphore.hh>
#include <osv/elf.hh>
#include <cxxabi.h>
+#include "drivers/console.hh"
using namespace std;
@@ -186,6 +187,14 @@ void enable_backtraces(bool backtrace) {
}
}
+void list_all_tracepoints() {
+ char buf[128];
+ for (auto& tp : tracepoint_base::tp_list) {
+ auto len = snprintf(buf, 128, "Tracepoint %s @ %p\n",
tp.name, &tp);
+ console::write(buf, len);
+ }
+}
+
namespace std {
template<>
diff --git a/include/osv/strace.hh b/include/osv/strace.hh
--- a/include/osv/strace.hh
+++ b/include/osv/strace.hh
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2023 Waldemar Kozaczuk
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#ifndef STRACE_HH_
+#define STRACE_HH_
+
+#include <osv/trace.hh>
+
+void start_strace();
+
+#endif
diff --git a/include/osv/trace.hh b/include/osv/trace.hh
--- a/include/osv/trace.hh
+++ b/include/osv/trace.hh
@@ -23,10 +23,13 @@
#include <arch.hh>
#include <osv/rcu.hh>
#include <safe-ptr.hh>
+#include <stdint.h>
+#include <atomic>
void enable_trace();
void enable_tracepoint(std::string wildcard);
void enable_backtraces(bool = true);
+void list_all_tracepoints();
class tracepoint_base;
@@ -48,6 +51,40 @@ struct trace_record {
};
};
+//Simple lock-less multiple-producer single-consumer structure
+//designed to act as a data gateway between threads generating trace
+//records and the strace thread printing them to the console
+//
+//In essence it is an array of pointers to the trace records
+//indexed by write_offset which stores next entry offset to write and
+//is atomic to guarantee no two producers step on each other and
+//read_offset that stores offset of next entry to read
+//
+//The trace_log is designed as a circular buffer where
+//both read and write offsets would wrap around from 0xffff to 0
+//so it is possible that with huge number of trace record written
+//and slow consumer some data may get overwritten
+constexpr const size_t trace_log_size = 0x10000;
+struct trace_log {
+ trace_record *traces[trace_log_size];
+ std::atomic<uint16_t> write_offset = {0};
+ uint16_t read_offset = {0};
+
+ void write(trace_record* tr) {
+ traces[write_offset.fetch_add(1)] = tr;
+ }
+
+ trace_record* read() {
+ if (read_offset == write_offset.load()) {
+ return nullptr;
+ } else {
+ return traces[read_offset++];
+ }
+ }
+};
+
+extern trace_log* _trace_log;
+
template <size_t idx, size_t N, typename... args>
struct tuple_formatter
{
@@ -369,6 +406,9 @@ public:
serialize(buffer, as);
barrier();
tr->tp = this; // do this last to indicate the record is complete
+ if (_trace_log) {
+ _trace_log->write(tr);
+ }
}
void serialize(void* buffer, std::tuple<s_args...> as) {
serializer<0, sizeof...(s_args), s_args...>::write(buffer, 0, as);
diff --git a/loader.cc b/loader.cc
--- a/loader.cc
+++ b/loader.cc
@@ -31,6 +31,7 @@
#include "arch.hh"
#include "arch-setup.hh"
#include "osv/trace.hh"
+#include "osv/strace.hh"
#include <osv/power.hh>
#include <osv/rcu.hh>
#include <osv/mempool.hh>
@@ -150,6 +151,8 @@ static bool opt_leak = false;
static bool opt_noshutdown = false;
bool opt_power_off_on_abort = false;
static bool opt_log_backtrace = false;
+static bool opt_list_tracepoints = false;
+static bool opt_strace = false;
static bool opt_mount = true;
static bool opt_pivot = true;
static std::string opt_rootfs;
@@ -179,6 +182,8 @@ static void usage()
std::cout << " --sampler=arg start stack sampling profiler\n";
std::cout << " --trace=arg tracepoints to enable\n";
std::cout << " --trace-backtrace log backtraces in the tracepoint log\n";
+ std::cout << " --trace-list list available tracepoints\n";
+ std::cout << " --strace start a thread to print tracepoints to the console on the fly\n";
std::cout << " --leak start leak detector after boot\n";
std::cout << " --nomount don't mount the root file system\n";
std::cout << " --nopivot do not pivot the root from bootfs to the root fs\n";
@@ -260,6 +265,10 @@ static void parse_options(int loader_argc, char** loader_argv)
opt_log_backtrace = true;
}
+ if (extract_option_flag(options_values, "trace-list")) {
+ opt_list_tracepoints = true;
+ }
+
if (extract_option_flag(options_values, "verbose")) {
opt_verbose = true;
enable_verbose();
@@ -283,6 +292,9 @@ static void parse_options(int loader_argc, char** loader_argv)
enable_tracepoint(t);
}
}
+ if (extract_option_flag(options_values, "strace")) {
+ opt_strace = true;
+ }
}
opt_mount = !extract_option_flag(options_values, "nomount");
@@ -723,12 +735,19 @@ void main_cont(int loader_argc, char** loader_argv)
poweroff();
}
+ if (opt_list_tracepoints) {
+ list_all_tracepoints();
+ }
+
enable_trace();
if (opt_log_backtrace) {
// can only do this after smp_launch, otherwise the IDT is not initialized,
// and backtrace_safe() fails as soon as we get an exception
enable_backtraces();
}
+ if (opt_strace) {
+ start_strace();
+ }
sched::init_detached_threads_reaper();
elf::setup_missing_symbols_detector();