[COMMIT seastar master] memory: Use probabilistic sampler

34 views

Skip to first unread message

Commit Bot

<bot@cloudius-systems.com>

unread,

Jan 2, 2024, 9:23:23 AM1/2/24

to seastar-dev@googlegroups.com, Stephan Dollberg

From: Stephan Dollberg <ste...@redpanda.com>
Committer: Travis Downs <travis...@redpanda.com>
Branch: master

memory: Use probabilistic sampler

Use the adapted memory sampler from heapprofd.

Now we don't sample every allocation but only if we have reached the
next sample as per the sampler. This is checked on each allocation.

API methods are adapted to indicate that it's now a sampled memory
profile.

---
diff --git a/include/seastar/core/memory.hh b/include/seastar/core/memory.hh
--- a/include/seastar/core/memory.hh
+++ b/include/seastar/core/memory.hh
@@ -25,6 +25,7 @@
#include <seastar/core/bitops.hh>
#include <seastar/util/backtrace.hh>
#include <seastar/util/modules.hh>
+#include <seastar/util/sampler.hh>
#ifndef SEASTAR_MODULE
#include <new>
#include <cstdint>
@@ -91,8 +92,9 @@ namespace seastar {
/// ### Heap profiling
///
/// Heap profiling allows finding out how memory is used by your application, by
-/// recording the stacktrace of all allocations. See:
-/// * \ref set_heap_profiling_enabled()
+/// recording the stacktrace of a sampled subset (or all) allocations. See:
+/// * \ref set_heap_profiling_sampling_rate()
+/// * \ref sampled_memory_profile()
/// * \ref scoped_heap_profiling
///
/// ### Abort on allocation failure
@@ -177,10 +179,9 @@ public:
// Can be nested, in which case the profiling is re-enabled when all
// the objects go out of scope.
class disable_backtrace_temporarily {
- bool _old;
+ sampler::disable_sampling_temporarily _disable_sampling;
public:
disable_backtrace_temporarily();
- ~disable_backtrace_temporarily();
};

enum class reclaiming_result {
@@ -410,7 +411,10 @@ public:
///
/// The location is identified by its backtrace. One allocation_site can
/// represent many allocations at the same location. `count` and `size`
-/// represent the cumulative sum of all allocations at the location.
+/// represent the cumulative sum of all allocations at the location. Note the
+/// size represents an extrapolated size and not the sampled one, i.e.: when
+/// looking at the total size of all allocation sites it will approximate the
+/// total memory usage
struct allocation_site {
mutable size_t count = 0; /// number of live objects allocated at backtrace.
mutable size_t size = 0; /// amount of bytes in live objects allocated at backtrace.
@@ -431,9 +435,13 @@ struct allocation_site {
}
};

-/// @brief If memory profiling is on returns the current memory live set
+/// @brief If memory sampling is on returns the current sampled memory live set
+///
+/// If there is tracked allocations (because heap profiling was on earlier)
+/// these will still be returned if heap profiling is now off
+///
/// @return a vector of \ref allocation_site
-std::vector<allocation_site> memory_profile();
+std::vector<allocation_site> sampled_memory_profile();

/// @brief Copies the current sampled set of allocation_sites into the
/// array pointed to by the output parameter
@@ -448,31 +456,43 @@ std::vector<allocation_site> memory_profile();
/// @param output array to copy the allocation sites to
/// @param size the size of the array pointed to by \p output
/// @return number of \ref allocation_site copied to the vector
-size_t memory_profile(allocation_site* output, size_t size);
+size_t sampled_memory_profile(allocation_site* output, size_t size);

-/// Enable heap profiling
+/// @brief Enable sampled heap profiling by setting a sample rate
+///
+/// @param sample_rate the sample rate to use. Disable heap profiling by setting
+/// the sample rate to 0
///
/// In order to use heap profiling you have to define
/// `SEASTAR_HEAPPROF`.
///
+/// Use \ref sampled_memory_profile for API access to profiling data
+///
+/// Note: Changing the sampling rate while previously sampled allocations are
+/// still alive can lead to inconsistent results of their reported size (i.e.:
+/// their size will be over or under reported). Undefined behavior or memory
+/// corruption will not occur.
+///
/// For an example script that makes use of the heap profiling data
/// see [scylla-gdb.py] (https://github.com/scylladb/scylla/blob/e1b22b6a4c56b4f1d0adf65d1a11db4bcb51fe7d/scylla-gdb.py#L1439)
/// This script can generate either textual representation of the data,
/// or a zoomable flame graph ([flame graph generation instructions](https://github.com/scylladb/scylla/wiki/Seastar-heap-profiler),
/// [example flame graph](https://user-images.githubusercontent.com/1389273/72920437-f0cf8a80-3d51-11ea-92f0-f3dbeb698871.png)).
-void set_heap_profiling_enabled(bool);
+void set_heap_profiling_sampling_rate(size_t sample_rate);

-/// Checks whether heap profiling is currently enabled
-/// @return true if heap profiling is enabled, false otherwise
-bool get_heap_profiling_enabled();
+/// @brief Returns the current heap profiling sampling rate (0 means off)
+/// @return the current heap profiling sampling rate
+size_t get_heap_profiling_sample_rate();

/// @brief Enable heap profiling for the duration of the scope.
///
+/// Note: Nesting different sample rates is currently not supported.
+///
/// For more information about heap profiling see
-/// \ref set_heap_profiling_enabled().
+/// \ref set_heap_profiling_sampling_rate().
class scoped_heap_profiling {
public:
- scoped_heap_profiling() noexcept;
+ scoped_heap_profiling(size_t) noexcept;
~scoped_heap_profiling();
};

diff --git a/include/seastar/core/reactor_config.hh b/include/seastar/core/reactor_config.hh
--- a/include/seastar/core/reactor_config.hh
+++ b/include/seastar/core/reactor_config.hh
@@ -159,8 +159,12 @@ struct reactor_options : public program_options::option_group {
program_options::value<unsigned> max_networking_io_control_blocks;
/// \brief Enable seastar heap profiling.
///
+ /// Allocations will be sampled every N bytes on average. Zero means off.
+ ///
+ /// Default: 0
+ ///
/// \note Unused when seastar was compiled without heap profiling support.
- program_options::value<> heapprof;
+ program_options::value<unsigned> heapprof;
/// Ignore SIGINT (for gdb).
program_options::value<> no_handle_interrupt;

diff --git a/src/core/memory.cc b/src/core/memory.cc
--- a/src/core/memory.cc
+++ b/src/core/memory.cc
@@ -97,6 +97,7 @@ module seastar;
#include <seastar/util/alloc_failure_injector.hh>
#include <seastar/util/memory_diagnostics.hh>
#include <seastar/util/std-compat.hh>
+#include <seastar/util/sampler.hh>
#include <seastar/util/log.hh>
#include <seastar/core/aligned_buffer.hh>
#ifndef SEASTAR_DEFAULT_ALLOCATOR
@@ -543,7 +544,8 @@ struct cpu_pages {
alloc_sites_type alloc_sites;
} asu;
allocation_site_ptr alloc_site_list_head = nullptr; // For easy traversal of asu.alloc_sites from scylla-gdb.py
- bool collect_backtrace = false;
+ sampler heap_prof_sampler;
+
char* mem() { return memory; }

void link(page_list& list, page* span);
@@ -553,9 +555,9 @@ struct cpu_pages {
unsigned nr_pages;
};
void maybe_reclaim();
- void* allocate_large_and_trim(unsigned nr_pages);
- void* allocate_large(unsigned nr_pages);
- void* allocate_large_aligned(unsigned align_pages, unsigned nr_pages);
+ void* allocate_large_and_trim(unsigned nr_pages, bool should_sample);
+ void* allocate_large(unsigned nr_pages, bool should_sample);
+ void* allocate_large_aligned(unsigned align_pages, unsigned nr_pages, bool should_sample);
page* find_and_unlink_span(unsigned nr_pages);
page* find_and_unlink_span_reclaiming(unsigned n_pages);
void free_large(void* ptr);
@@ -587,6 +589,7 @@ struct cpu_pages {
void warn_large_allocation(size_t size);
allocation_site_ptr add_alloc_site(size_t allocated_size);
void remove_alloc_site(allocation_site_ptr alloc_site, size_t deallocated_size);
+ bool should_sample(size_t size);
memory::memory_layout memory_layout();
~cpu_pages();
};
@@ -599,51 +602,54 @@ static cpu_pages& get_cpu_mem();

#ifdef SEASTAR_HEAPPROF

-void set_heap_profiling_enabled(bool enable) {
- bool is_enabled = get_cpu_mem().collect_backtrace;
- if (enable) {
- if (!is_enabled) {
- seastar_logger.info("Enabling heap profiler");
+void set_heap_profiling_sampling_rate(size_t sample_rate) {
+ bool current_sample_rate = get_cpu_mem().heap_prof_sampler.sampling_interval();
+ if (sample_rate) {
+ if (!current_sample_rate) {
+ seastar_logger.info("Enabling heap profiler - using {} bytes sampling rate", sample_rate);
+ } else {
+ seastar_logger.warn("Ignoring change to heap profiler sample rate as heap profiling is already turned on");
+ return;
}
} else {
- if (is_enabled) {
+ if (current_sample_rate) {
seastar_logger.info("Disabling heap profiler");
}
}
- get_cpu_mem().collect_backtrace = enable;
+ get_cpu_mem().heap_prof_sampler.set_sampling_interval(sample_rate);
}

-bool get_heap_profiling_enabled() {
- return get_cpu_mem().collect_backtrace;
+size_t get_heap_profiling_sample_rate() {
+ return get_cpu_mem().heap_prof_sampler.sampling_interval();
}

static thread_local int64_t scoped_heap_profiling_embed_count = 0;

-scoped_heap_profiling::scoped_heap_profiling() noexcept {
+scoped_heap_profiling::scoped_heap_profiling(size_t sample_rate) noexcept {
++scoped_heap_profiling_embed_count;
- set_heap_profiling_enabled(true);
+ set_heap_profiling_sampling_rate(sample_rate);
}

scoped_heap_profiling::~scoped_heap_profiling() {
if (!--scoped_heap_profiling_embed_count) {
- set_heap_profiling_enabled(false);
+ set_heap_profiling_sampling_rate(0);
}
}

#else

-void set_heap_profiling_enabled(bool enable) {
+void set_heap_profiling_sampling_rate(size_t enable) {
seastar_logger.warn("Seastar compiled without heap profiling support, heap profiler not supported;"
" compile with the Seastar_HEAP_PROFILING=ON CMake option to add heap profiling support");
}

-bool get_heap_profiling_enabled() {
+size_t get_heap_profiling_sample_rate() {
// don't log here, called on all paths
- return false;
+ return 0;
}

-scoped_heap_profiling::scoped_heap_profiling() noexcept {
- set_heap_profiling_enabled(true); // let it print the warning
+scoped_heap_profiling::scoped_heap_profiling(size_t sample_rate) noexcept {
+ set_heap_profiling_sampling_rate(sample_rate); // let it print the warning
}

scoped_heap_profiling::~scoped_heap_profiling() {
@@ -766,7 +772,7 @@ void cpu_pages::maybe_reclaim() {
}

void*
-cpu_pages::allocate_large_and_trim(unsigned n_pages) {
+cpu_pages::allocate_large_and_trim(unsigned n_pages, bool should_sample) {
// Avoid exercising the reclaimers for requests we'll not be able to satisfy
// nr_pages might be zero during startup, so check for that too
if (nr_pages && n_pages >= nr_pages) {
@@ -789,7 +795,7 @@ cpu_pages::allocate_large_and_trim(unsigned n_pages) {
span->span_size = span_end->span_size = span_size;
span->pool = nullptr;
#ifdef SEASTAR_HEAPPROF
- if (get_heap_profiling_enabled()) {
+ if (should_sample) {
auto alloc_site = add_alloc_site(span->span_size * page_size);
span->alloc_site = alloc_site;
}
@@ -813,7 +819,7 @@ cpu_pages::add_alloc_site(size_t allocated_size) {
allocation_site_ptr alloc_site = get_allocation_site();
if (alloc_site) {
++alloc_site->count;
- alloc_site->size += allocated_size;
+ alloc_site->size += heap_prof_sampler.sample_size(allocated_size);
}

return alloc_site;
@@ -823,7 +829,9 @@ void
cpu_pages::remove_alloc_site(allocation_site_ptr alloc_site, size_t deallocated_size) {
if (alloc_site) {
--alloc_site->count;
- alloc_site->size -= deallocated_size;
+ auto sample_size = heap_prof_sampler.sample_size(deallocated_size);
+ // prevent underflow in case sample rate changed
+ alloc_site->size -= alloc_site->size < sample_size ? alloc_site->size : sample_size;
if (alloc_site->count == 0) {
if (alloc_site->prev) {
alloc_site->prev->next = alloc_site->next;
@@ -840,6 +848,11 @@ cpu_pages::remove_alloc_site(allocation_site_ptr alloc_site, size_t deallocated_
}
}

+bool
+cpu_pages::should_sample(size_t size) {
+ return heap_prof_sampler.should_sample(size);
+}
+
void
inline
cpu_pages::check_large_allocation(size_t size) {
@@ -849,25 +862,20 @@ cpu_pages::check_large_allocation(size_t size) {
}

void*
-cpu_pages::allocate_large(unsigned n_pages) {
+cpu_pages::allocate_large(unsigned n_pages, bool should_sample) {
check_large_allocation(n_pages * page_size);
- return allocate_large_and_trim(n_pages);
+ return allocate_large_and_trim(n_pages, should_sample);
}

void*
-cpu_pages::allocate_large_aligned(unsigned align_pages, unsigned n_pages) {
+cpu_pages::allocate_large_aligned(unsigned align_pages, unsigned n_pages, bool should_sample) {
check_large_allocation(n_pages * page_size);
// buddy allocation is always aligned
- return allocate_large_and_trim(n_pages);
+ return allocate_large_and_trim(n_pages, should_sample);
}

-disable_backtrace_temporarily::disable_backtrace_temporarily() {
- _old = get_cpu_mem().collect_backtrace;
- get_cpu_mem().collect_backtrace = false;
-}
-
-disable_backtrace_temporarily::~disable_backtrace_temporarily() {
- get_cpu_mem().collect_backtrace = _old;
+disable_backtrace_temporarily::disable_backtrace_temporarily()
+ : _disable_sampling(cpu_mem.heap_prof_sampler.pause_sampling()) {
}

static
@@ -878,7 +886,7 @@ simple_backtrace get_backtrace() noexcept {

static
allocation_site_ptr get_allocation_site() {
- if (!cpu_mem.is_initialized() || !cpu_mem.collect_backtrace) {
+ if (!cpu_mem.is_initialized() || !cpu_mem.heap_prof_sampler.sampling_interval()) {
return nullptr;
}
// TODO: limit size of alloc_sites
@@ -1164,7 +1172,7 @@ void cpu_pages::do_resize(size_t new_size, allocate_system_memory_fn alloc_sys_m
// one past last page structure is a sentinel
auto new_page_array_pages = align_up(sizeof(page) * (new_pages + 1), page_size) / page_size;
auto new_page_array
- = reinterpret_cast<page*>(allocate_large(new_page_array_pages));
+ = reinterpret_cast<page*>(allocate_large(new_page_array_pages, false));
if (!new_page_array) {
throw std::bad_alloc();
}
@@ -1342,10 +1350,10 @@ small_pool::add_more_objects() {
}
while (_free_count < goal) {
auto span_size = _span_sizes.preferred;
- auto data = reinterpret_cast<char*>(get_cpu_mem().allocate_large(span_size));
+ auto data = reinterpret_cast<char*>(get_cpu_mem().allocate_large(span_size, false));
if (!data) {
span_size = _span_sizes.fallback;
- data = reinterpret_cast<char*>(get_cpu_mem().allocate_large(span_size));
+ data = reinterpret_cast<char*>(get_cpu_mem().allocate_large(span_size, false));
if (!data) {
return;
}
@@ -1400,21 +1408,21 @@ abort_on_underflow(size_t size) {
}
}

-void* allocate_large(size_t size) {
+void* allocate_large(size_t size, bool should_sample) {
abort_on_underflow(size);
unsigned size_in_pages = (size + page_size - 1) >> page_bits;
if ((size_t(size_in_pages) << page_bits) < size) {
return nullptr; // (size + page_size - 1) caused an overflow
}
- return get_cpu_mem().allocate_large(size_in_pages);
+ return get_cpu_mem().allocate_large(size_in_pages, should_sample);

}

-void* allocate_large_aligned(size_t align, size_t size) {
+void* allocate_large_aligned(size_t align, size_t size, bool should_sample) {
abort_on_underflow(size);
unsigned size_in_pages = (size + page_size - 1) >> page_bits;
unsigned align_in_pages = std::max(align, page_size) >> page_bits;
- return get_cpu_mem().allocate_large_aligned(align_in_pages, size_in_pages);
+ return get_cpu_mem().allocate_large_aligned(align_in_pages, size_in_pages, should_sample);
}

void free_large(void* ptr) {
@@ -1494,18 +1502,24 @@ void* allocate(size_t size) {
if (size <= sizeof(free_object)) {
size = sizeof(free_object);
}
+
+#ifdef SEASTAR_HEAPPROF
+ bool should_sample = get_cpu_mem().should_sample(size);
+#else
+ bool should_sample = get_cpu_mem().should_sample(size);
+#endif
void* ptr;
if (size <= max_small_allocation) {
#ifdef SEASTAR_HEAPPROF
- if (get_heap_profiling_enabled()) {
+ if (should_sample) {
ptr = allocate_from_sampled_small_pool<alignment_t::unaligned>(size);
} else
#endif
{
ptr = allocate_from_small_pool<alignment_t::unaligned>(size);
}
} else {
- ptr = allocate_large(size);
+ ptr = allocate_large(size, should_sample);
}
if (!ptr) {
on_allocation_failure(size);
@@ -1531,20 +1545,25 @@ void* allocate_aligned(size_t align, size_t size) {
if (size <= sizeof(free_object)) {
size = std::max(sizeof(free_object), align);
}
+#ifdef SEASTAR_HEAPPROF
+ bool should_sample = get_cpu_mem().should_sample(size);
+#else
+ bool should_sample = false;
+#endif
void* ptr;
if (size <= max_small_allocation && align <= page_size) {
// Our small allocator only guarantees alignment for power-of-two
// allocations which are not larger than a page.
#ifdef SEASTAR_HEAPPROF
- if (get_heap_profiling_enabled()) {
+ if (should_sample) {
ptr = allocate_from_sampled_small_pool<alignment_t::aligned>(size);
} else
#endif
{
ptr = allocate_from_small_pool<alignment_t::aligned>(size);
}
} else {
- ptr = allocate_large_aligned(align, size);
+ ptr = allocate_large_aligned(align, size, should_sample);
}
if (!ptr) {
on_allocation_failure(size);
@@ -1983,13 +2002,13 @@ static bool try_trigger_error_injector() {
}
}

-std::vector<allocation_site> memory_profile() {
+std::vector<allocation_site> sampled_memory_profile() {
disable_backtrace_temporarily dbt;
std::vector<allocation_site> ret(get_cpu_mem().asu.alloc_sites.begin(), get_cpu_mem().asu.alloc_sites.end());
return ret;
}

-size_t memory_profile(allocation_site* output, size_t size) {
+size_t sampled_memory_profile(allocation_site* output, size_t size) {
auto to_copy = std::min(size, get_cpu_mem().asu.alloc_sites.size());
std::copy_n(get_cpu_mem().asu.alloc_sites.begin(), to_copy, output);
return to_copy;
@@ -2419,30 +2438,27 @@ namespace seastar {
namespace memory {

disable_backtrace_temporarily::disable_backtrace_temporarily() {
- (void)_old;
-}
-
-disable_backtrace_temporarily::~disable_backtrace_temporarily() {
+ (void)_disable_sampling;
}

-void set_heap_profiling_enabled(bool enabled) {
+void set_heap_profiling_sampling_rate(size_t) {
seastar_logger.warn("Seastar compiled with default allocator, heap profiler not supported");
}

-bool get_heap_profiling_enabled() {
- return false;
+size_t get_heap_profiling_sample_rate() {
+ return 0;
}

-std::vector<allocation_site> memory_profile() {
+std::vector<allocation_site> sampled_memory_profile() {
return {};
}

-size_t memory_profile(allocation_site* output, size_t size) {
+size_t sampled_memory_profile(allocation_site* output, size_t size) {
return 0;
}

-scoped_heap_profiling::scoped_heap_profiling() noexcept {
- set_heap_profiling_enabled(true); // let it print the warning
+scoped_heap_profiling::scoped_heap_profiling(size_t sample_rate) noexcept {
+ set_heap_profiling_sampling_rate(sample_rate); // let it print the warning
}

scoped_heap_profiling::~scoped_heap_profiling() {
diff --git a/src/core/reactor.cc b/src/core/reactor.cc
--- a/src/core/reactor.cc
+++ b/src/core/reactor.cc
@@ -3859,7 +3859,7 @@ reactor_options::reactor_options(program_options::option_group* parent_group)
"Maximum number of I/O control blocks (IOCBs) to allocate per shard. This translates to the number of sockets supported per shard."
" Requires tuning /proc/sys/fs/aio-max-nr. Only valid for the linux-aio reactor backend (see --reactor-backend).")
#ifdef SEASTAR_HEAPPROF
- , heapprof(*this, "heapprof", "enable seastar heap profiling")
+ , heapprof(*this, "heapprof", 0, "Enable seastar heap profiling. Sample every ARG bytes. 0 means off")
#else
, heapprof(*this, "heapprof", program_options::unused{})
#endif
@@ -4394,12 +4394,12 @@ void smp::configure(const smp_options& smp_opts, const reactor_options& reactor_
std::mutex mtx;

#ifdef SEASTAR_HEAPPROF
- bool heapprof_enabled = reactor_opts.heapprof;
- if (heapprof_enabled) {
- memory::set_heap_profiling_enabled(heapprof_enabled);
+ size_t heapprof_sampling_rate = reactor_opts.heapprof.get_value();
+ if (heapprof_sampling_rate) {
+ memory::set_heap_profiling_sampling_rate(heapprof_sampling_rate);
}
#else
- bool heapprof_enabled = false;
+ size_t heapprof_sampling_rate = 0;
#endif

#ifdef SEASTAR_HAVE_DPDK
@@ -4477,7 +4477,7 @@ void smp::configure(const smp_options& smp_opts, const reactor_options& reactor_
auto smp_tmain = smp::_tmain;
for (i = 1; i < smp::count; i++) {
auto allocation = allocations[i];
- create_thread([this, smp_tmain, inited, &reactors_registered, &smp_queues_constructed, &smp_opts, &reactor_opts, &reactors, hugepages_path, i, allocation, assign_io_queues, alloc_io_queues, thread_affinity, heapprof_enabled, mbind, backend_selector, reactor_cfg, &mtx, &layout, use_transparent_hugepages] {
+ create_thread([this, smp_tmain, inited, &reactors_registered, &smp_queues_constructed, &smp_opts, &reactor_opts, &reactors, hugepages_path, i, allocation, assign_io_queues, alloc_io_queues, thread_affinity, heapprof_sampling_rate, mbind, backend_selector, reactor_cfg, &mtx, &layout, use_transparent_hugepages] {
try {
// initialize thread_locals that are equal across all reacto threads of this smp instance
smp::_tmain = smp_tmain;
@@ -4491,8 +4491,8 @@ void smp::configure(const smp_options& smp_opts, const reactor_options& reactor_
auto guard = std::lock_guard(mtx);
*layout = memory::internal::merge(std::move(*layout), std::move(another_layout));
}
- if (heapprof_enabled) {
- memory::set_heap_profiling_enabled(heapprof_enabled);
+ if (heapprof_sampling_rate) {
+ memory::set_heap_profiling_sampling_rate(heapprof_sampling_rate);
}
sigset_t mask;
sigfillset(&mask);
diff --git a/tests/unit/alloc_test.cc b/tests/unit/alloc_test.cc
--- a/tests/unit/alloc_test.cc
+++ b/tests/unit/alloc_test.cc
@@ -331,35 +331,225 @@ SEASTAR_TEST_CASE(test_diagnostics_allocation) {

#ifdef SEASTAR_HEAPPROF

-SEASTAR_TEST_CASE(test_sampled_profile_collection)
+// small wrapper to disincentivize gcc from unrolling the loop
+[[gnu::noinline]]
+char* malloc_wrapper(size_t size) {
+ auto ret = static_cast<char*>(malloc(size));
+ *ret = 'c'; // to prevent compiler from considering this a dead allocation and optimizing it out
+ return ret;
+}
+
+namespace seastar::memory {
+std::ostream& operator<<(std::ostream& os, const allocation_site& site) {
+ os << "allocation_site[count: " << site.count << ", size: " << site.size << "]";
+ return os;
+}
+}
+
+SEASTAR_TEST_CASE(test_sampled_profile_collection_small)
{
- BOOST_REQUIRE(!seastar::memory::get_heap_profiling_enabled());
- seastar::memory::set_heap_profiling_enabled(true);
- BOOST_REQUIRE(seastar::memory::get_heap_profiling_enabled());
+ {
+ auto stats = seastar::memory::sampled_memory_profile();
+ BOOST_REQUIRE_EQUAL(stats.size(), 0);
+ }
+
+ std::size_t count = 100;
+ std::vector<volatile char*> ptrs(count);
+
+ seastar::memory::set_heap_profiling_sampling_rate(100);
+
+#ifdef __clang__
+ #pragma nounroll
+#endif
+ for (std::size_t i = 0; i < count / 2; ++i) {
+ ptrs[i] = malloc_wrapper(10);
+ }
+
+#ifdef __clang__
+ #pragma nounroll
+#endif
+ for (std::size_t i = count / 2; i < count; ++i) {
+ ptrs[i] = malloc_wrapper(10);
+ }
+
+ auto get_samples = []() {
+ auto stats0 = seastar::memory::sampled_memory_profile();
+ auto stats1 = seastar::memory::sampled_memory_profile();
+
+ // two back-to-back copies of the sample should have the same value
+ BOOST_CHECK_EQUAL(stats0, stats1);
+
+ // check that we get the same value from the raw array iterface
+ std::vector<seastar::memory::allocation_site> stats2(stats0.size());
+ auto sz2 = seastar::memory::sampled_memory_profile(stats2.data(), stats2.size());
+ BOOST_CHECK_EQUAL(stats0.size(), sz2);
+ BOOST_CHECK_EQUAL(stats0, stats2);
+
+ // check with +1 size, we expect to still only get size elements
+ std::vector<seastar::memory::allocation_site> stats3(stats0.size() + 1);
+ auto sz3 = seastar::memory::sampled_memory_profile(stats3.data(), stats3.size());
+ BOOST_CHECK_EQUAL(stats0.size(), sz3);
+ stats3.resize(sz3);
+ BOOST_CHECK_EQUAL(stats0, stats3);
+
+ return stats0;
+ };
+
+ // NB: the test framework allocates
+ seastar::memory::set_heap_profiling_sampling_rate(0);

{
- auto stats = seastar::memory::memory_profile();
+ auto stats = get_samples();
+ BOOST_REQUIRE_EQUAL(stats.size(), 2);
+ BOOST_REQUIRE_EQUAL(stats[0].size, stats[0].count * 100);
+ }
+
+ seastar::memory::set_heap_profiling_sampling_rate(100);
+
+ for (auto ptr : ptrs) {
+ free((void*)ptr);
+ }
+
+ seastar::memory::set_heap_profiling_sampling_rate(0);
+
+ {
+ auto stats = get_samples();
BOOST_REQUIRE_EQUAL(stats.size(), 0);
}

- volatile char* ptr = static_cast<char*>(malloc(10));
- *ptr = 'c'; // to prevent compiler from considering this a dead allocation and optimizing it out
+ return seastar::make_ready_future();
+}

+SEASTAR_TEST_CASE(test_sampled_profile_collection_large)
+{
{
- auto stats = seastar::memory::memory_profile();
- BOOST_REQUIRE_EQUAL(stats.size(), 1);
- BOOST_REQUIRE_EQUAL(stats[0].size, 32); // 10 + 8 falls into 32 byte pool
+ auto stats = seastar::memory::sampled_memory_profile();
+ BOOST_REQUIRE_EQUAL(stats.size(), 0);
}

- free((void*)ptr);
+ std::size_t count = 100;
+ std::vector<volatile char*> ptrs(count);
+
+ seastar::memory::set_heap_profiling_sampling_rate(1000000);
+
+#ifdef __clang__
+ #pragma nounroll
+#endif
+ for (std::size_t i = 0; i < count / 2; ++i) {
+ ptrs[i] = malloc_wrapper(100000);
+ }
+
+#ifdef __clang__
+ #pragma nounroll
+#endif
+ for (std::size_t i = count / 2; i < count; ++i) {
+ ptrs[i] = malloc_wrapper(100000);
+ }
+
+ // NB: the test framework allocate
+ seastar::memory::set_heap_profiling_sampling_rate(0);

{
- auto stats = seastar::memory::memory_profile();
+ auto stats = seastar::memory::sampled_memory_profile();
+ BOOST_REQUIRE_EQUAL(stats.size(), 2);
+ BOOST_REQUIRE_EQUAL(stats[0].size, stats[0].count * 1000000);
+ }
+
+ seastar::memory::set_heap_profiling_sampling_rate(1000000);
+
+ for (auto ptr : ptrs) {
+ free((void*)ptr);
+ }
+
+ seastar::memory::set_heap_profiling_sampling_rate(0);
+
+ {
+ auto stats = seastar::memory::sampled_memory_profile();
+ // NOTE this is because right now the tracking structure doesn't delete call sites ever
+ BOOST_REQUIRE_EQUAL(stats.size(), 0);
+ }
+
+ return seastar::make_ready_future();
+}
+
+SEASTAR_TEST_CASE(test_change_sample_rate)
+{
+ {
+ auto stats = seastar::memory::sampled_memory_profile();
BOOST_REQUIRE_EQUAL(stats.size(), 0);
}

- // Needed for now because we can't differentiate between sampled allocations and non-sampled ones
- seastar::memory::set_heap_profiling_enabled(false);
+ std::size_t sample_rate = 100;
+ std::size_t count = 10000;
+ std::vector<volatile char*> ptrs(count);
+
+ seastar::memory::set_heap_profiling_sampling_rate(sample_rate);
+
+#ifdef __clang__
+ #pragma nounroll
+#endif
+ for (std::size_t i = 0; i < count; ++i) {
+ ptrs[i] = malloc_wrapper(10);
+ }
+
+ // NB: the test framework allocates
+ seastar::memory::set_heap_profiling_sampling_rate(0);
+
+ size_t last_alloc_size = 0;
+ {
+ auto stats = seastar::memory::sampled_memory_profile();
+ BOOST_REQUIRE_EQUAL(stats.size(), 1);
+ last_alloc_size = stats[0].size;
+ BOOST_REQUIRE_EQUAL(stats[0].size, stats[0].count * sample_rate);
+ }
+
+ seastar::memory::set_heap_profiling_sampling_rate(sample_rate);
+
+ size_t free_iter = 0;
+ // free some of the allocations to check size changes
+ for (size_t i = 0; i < count / 4; ++i, ++free_iter) {
+ free((void*)ptrs[free_iter]);
+ }
+
+ seastar::memory::set_heap_profiling_sampling_rate(0);
+
+ {
+ auto stats = seastar::memory::sampled_memory_profile();
+ BOOST_REQUIRE_EQUAL(stats.size(), 1);
+ BOOST_REQUIRE_EQUAL(stats[0].size, stats[0].count * sample_rate);
+ BOOST_REQUIRE_NE(stats[0].size, last_alloc_size);
+ BOOST_REQUIRE_GT(stats[0].size, 0);
+ last_alloc_size = stats[0].size;
+ }
+
+ // now increase the sampling rate with outstanding allocations from the old rate
+ seastar::memory::set_heap_profiling_sampling_rate(sample_rate * 100);
+
+ for (size_t i = 0; i < count / 4; ++i, ++free_iter) {
+ free((void*)ptrs[free_iter]);
+ }
+
+ seastar::memory::set_heap_profiling_sampling_rate(0);
+
+ {
+ auto stats = seastar::memory::sampled_memory_profile();
+ BOOST_REQUIRE_EQUAL(stats.size(), 1);
+ BOOST_REQUIRE_LT(stats[0].size, last_alloc_size); // should not have underflowed
+ }
+
+ seastar::memory::set_heap_profiling_sampling_rate(sample_rate);
+
+ // free the rest
+ for (size_t i = 0; i < count / 2; ++i, ++free_iter) {
+ free((void*)ptrs[free_iter]);
+ }
+
+ seastar::memory::set_heap_profiling_sampling_rate(0);
+
+ {
+ auto stats = seastar::memory::sampled_memory_profile();
+ BOOST_REQUIRE_EQUAL(stats.size(), 0);
+ }

return seastar::make_ready_future();
}

Reply all

Reply to author

Forward

0 new messages