[PATCH seastar v1] memory: reduce small_pool vulnerability to fragmentation further

2 views
Skip to first unread message

Avi Kivity

<avi@scylladb.com>
unread,
Aug 26, 2017, 4:44:57 PM8/26/17
to seastar-dev@googlegroups.com
small_pool tries to allocate large-ish spans to reduce waste due to bad
packing. However, if large spans aren't available, we fail allocation.

Improve this by giving small_pool a second chance: if allocation fails,
try again with a smaller span, just large enough to hold at least one
object.
---


Note: untested

core/memory.cc | 51 ++++++++++++++++++++++++++++++---------------------
1 file changed, 30 insertions(+), 21 deletions(-)

diff --git a/core/memory.cc b/core/memory.cc
index ed5a0db1a..cdb9d74ad 100644
--- a/core/memory.cc
+++ b/core/memory.cc
@@ -272,20 +272,18 @@ class page_list {
friend void on_allocation_failure(size_t);
};

class small_pool {
unsigned _object_size;
- unsigned _span_size;
+ uint8_t _span_sizes[2]; // idx 0 = preferred, idx 1 = fallback
free_object* _free = nullptr;
size_t _free_count = 0;
unsigned _min_free;
unsigned _max_free;
- unsigned _spans_in_use = 0;
+ unsigned _pages_in_use = 0;
page_list _span_list;
static constexpr unsigned idx_frac_bits = 2;
-private:
- size_t span_bytes() const { return _span_size * page_size; }
public:
explicit small_pool(unsigned object_size) noexcept;
~small_pool();
void* allocate();
void deallocate(void* object);
@@ -295,11 +293,10 @@ class small_pool {
static constexpr unsigned idx_to_size(unsigned idx);
allocation_site_ptr& alloc_site_holder(void* ptr);
private:
void add_more_objects();
void trim_free_list();
- float waste();
friend void on_allocation_failure(size_t);
};

// index 0b0001'1100 -> size (1 << 4) + 0b11 << (4 - 2)

@@ -1071,16 +1068,25 @@ void cpu_pages::set_min_free_pages(size_t pages) {
min_free_pages = pages;
maybe_reclaim();
}

small_pool::small_pool(unsigned object_size) noexcept
- : _object_size(object_size), _span_size(1) {
+ : _object_size(object_size) {
+ unsigned span_size = 1;
+ auto span_bytes = [&] { return span_size * page_size; };
+ auto waste = [&] { return (span_bytes() % _object_size) / (1.0 * span_bytes()); };
while (_object_size > span_bytes()
- || (_span_size < 32 && waste() > 0.05)
+ || (span_size < 32 && waste() > 0.05)
|| (span_bytes() / object_size < 4)) {
- ++_span_size;
+ ++span_size;
+ }
+ _span_sizes[0] = span_size;
+ span_size = 1;
+ while (object_size > span_bytes()) {
+ ++span_size;
}
+ _span_sizes[1] = span_size;
_max_free = std::max<unsigned>(100, span_bytes() * 2 / _object_size);
_min_free = _max_free / 2;
}

small_pool::~small_pool() {
@@ -1131,23 +1137,30 @@ small_pool::add_more_objects() {
++span.nr_small_alloc;
}
}
while (_free_count < goal) {
disable_backtrace_temporarily dbt;
- auto data = reinterpret_cast<char*>(cpu_mem.allocate_large(_span_size));
+ auto which = 0;
+ auto span_size = _span_sizes[0];
+ auto data = reinterpret_cast<char*>(cpu_mem.allocate_large(span_size));
if (!data) {
- return;
+ which = 1;
+ span_size = _span_sizes[1];
+ auto data = reinterpret_cast<char*>(cpu_mem.allocate_large(span_size));
+ if (!data) {
+ return;
+ }
}
- ++_spans_in_use;
+ _pages_in_use += span_size;
auto span = cpu_mem.to_page(data);
- for (unsigned i = 0; i < _span_size; ++i) {
+ for (unsigned i = 0; i < span_size; ++i) {
span[i].offset_in_span = i;
span[i].pool = this;
}
span->nr_small_alloc = 0;
span->freelist = nullptr;
- for (unsigned offset = 0; offset <= span_bytes() - _object_size; offset += _object_size) {
+ for (unsigned offset = 0; offset <= span_size * page_size - _object_size; offset += _object_size) {
auto h = reinterpret_cast<free_object*>(data + offset);
h->next = _free;
_free = h;
++_free_count;
++span->nr_small_alloc;
@@ -1169,21 +1182,17 @@ small_pool::trim_free_list() {
_span_list.push_front(cpu_mem.pages, *span);
}
obj->next = span->freelist;
span->freelist = obj;
if (--span->nr_small_alloc == 0) {
+ _pages_in_use -= span->span_size;
_span_list.erase(cpu_mem.pages, *span);
cpu_mem.free_span(span - cpu_mem.pages, span->span_size);
- --_spans_in_use;
}
}
}

-float small_pool::waste() {
- return (span_bytes() % _object_size) / (1.0 * span_bytes());
-}
-
void
abort_on_underflow(size_t size) {
if (std::make_signed_t<size_t>(size) < 0) {
// probably a logic error, stop hard
abort();
@@ -1409,14 +1418,14 @@ void on_allocation_failure(size_t size) {
seastar_memory_logger.debug("Used memory: {} Free memory: {} Total memory: {}", total_mem - free_mem, free_mem, total_mem);
seastar_memory_logger.debug("Small pools:");
seastar_memory_logger.debug("objsz spansz usedobj memory wst%");
for (unsigned i = 0; i < cpu_mem.small_pools.nr_small_pools; i++) {
auto& sp = cpu_mem.small_pools[i];
- auto memory = sp._spans_in_use * sp.span_bytes();
- auto use_count = sp._spans_in_use * sp.span_bytes() / sp.object_size() - sp._free_count;
+ auto use_count = sp._pages_in_use * page_size / sp.object_size() - sp._free_count;
+ auto memory = sp._pages_in_use * page_size;
auto wasted_percent = memory ? sp._free_count * sp.object_size() * 100.0 / memory : 0;
- seastar_memory_logger.debug("{} {} {} {} {}", sp.object_size(), sp.span_bytes(), use_count, memory, wasted_percent);
+ seastar_memory_logger.debug("{} {} {} {} {}", sp.object_size(), sp._span_sizes[0] * page_size, use_count, memory, wasted_percent);
}
seastar_memory_logger.debug("Page spans:");
seastar_memory_logger.debug("index size [B] free [B]");
for (unsigned i = 0; i< cpu_mem.nr_span_lists; i++) {
auto& span_list = cpu_mem.fsu.free_spans[i];
--
2.13.5

Botond Dénes

<bdenes@scylladb.com>
unread,
Aug 28, 2017, 1:49:07 AM8/28/17
to Avi Kivity, seastar-dev@googlegroups.com
On Sat, Aug 26, 2017 at 11:44 PM, Avi Kivity <a...@scylladb.com> wrote:
> small_pool tries to allocate large-ish spans to reduce waste due to bad
> packing. However, if large spans aren't available, we fail allocation.
>
> Improve this by giving small_pool a second chance: if allocation fails,
> try again with a smaller span, just large enough to hold at least one
> object.
> ---
>
>
> Note: untested
>
> core/memory.cc | 51 ++++++++++++++++++++++++++++++---------------------
> 1 file changed, 30 insertions(+), 21 deletions(-)
>
> diff --git a/core/memory.cc b/core/memory.cc
> index ed5a0db1a..cdb9d74ad 100644
> --- a/core/memory.cc
> +++ b/core/memory.cc
> @@ -272,20 +272,18 @@ class page_list {
> friend void on_allocation_failure(size_t);
> };
>
> class small_pool {
> unsigned _object_size;
> - unsigned _span_size;
> + uint8_t _span_sizes[2]; // idx 0 = preferred, idx 1 = fallback
Why not use a
struct span_sizes {
uint8_t preferred;
uint8_t fallback;
};
so that the code documents itself? I had to scroll back here at least
once to remind myself what is 0 and 1.
> --
> You received this message because you are subscribed to the Google Groups "seastar-dev" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to seastar-dev...@googlegroups.com.
> To post to this group, send email to seast...@googlegroups.com.
> Visit this group at https://groups.google.com/group/seastar-dev.
> To view this discussion on the web visit https://groups.google.com/d/msgid/seastar-dev/20170826204451.7896-1-avi%40scylladb.com.
> For more options, visit https://groups.google.com/d/optout.

Avi Kivity

<avi@scylladb.com>
unread,
Aug 28, 2017, 2:06:30 AM8/28/17
to Botond Dénes, seastar-dev@googlegroups.com
When the patch started out, I saved the index into the array somewhere
in struct page. It turned out not to be needed, so a span_sizes struct
would  be better.

Tomasz Grabiec

<tgrabiec@scylladb.com>
unread,
Aug 28, 2017, 4:06:45 AM8/28/17
to Avi Kivity, seastar-dev
Looks good to me. +1 for Botond's suggestion.

--
2.13.5

--
You received this message because you are subscribed to the Google Groups "seastar-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email to seastar-dev+unsubscribe@googlegroups.com.

Avi Kivity

<avi@scylladb.com>
unread,
Aug 28, 2017, 4:43:24 AM8/28/17
to seastar-dev@googlegroups.com
small_pool tries to allocate large-ish spans to reduce waste due to bad
packing. However, if large spans aren't available, we fail allocation.

Improve this by giving small_pool a second chance: if allocation fails,
try again with a smaller span, just large enough to hold at least one
object.
---

v2:
use struct instead of array for preferred, fallback sizes
calculate preferred size last so that _min_free, _max_free are correct

core/memory.cc | 53 ++++++++++++++++++++++++++++++++---------------------
1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/core/memory.cc b/core/memory.cc
index ed5a0db1a..d47881e4f 100644
--- a/core/memory.cc
+++ b/core/memory.cc
@@ -271,21 +271,23 @@ class page_list {
}
friend void on_allocation_failure(size_t);
};

class small_pool {
+ struct span_sizes {
+ uint8_t preferred;
+ uint8_t fallback;
+ };
unsigned _object_size;
- unsigned _span_size;
+ span_sizes _span_sizes;
free_object* _free = nullptr;
size_t _free_count = 0;
unsigned _min_free;
unsigned _max_free;
- unsigned _spans_in_use = 0;
+ unsigned _pages_in_use = 0;
page_list _span_list;
static constexpr unsigned idx_frac_bits = 2;
-private:
- size_t span_bytes() const { return _span_size * page_size; }
public:
explicit small_pool(unsigned object_size) noexcept;
~small_pool();
void* allocate();
void deallocate(void* object);
@@ -295,11 +297,10 @@ class small_pool {
static constexpr unsigned idx_to_size(unsigned idx);
allocation_site_ptr& alloc_site_holder(void* ptr);
private:
void add_more_objects();
void trim_free_list();
- float waste();
friend void on_allocation_failure(size_t);
};

// index 0b0001'1100 -> size (1 << 4) + 0b11 << (4 - 2)

@@ -1071,16 +1072,25 @@ void cpu_pages::set_min_free_pages(size_t pages) {
min_free_pages = pages;
maybe_reclaim();
}

small_pool::small_pool(unsigned object_size) noexcept
- : _object_size(object_size), _span_size(1) {
+ : _object_size(object_size) {
+ unsigned span_size = 1;
+ auto span_bytes = [&] { return span_size * page_size; };
+ auto waste = [&] { return (span_bytes() % _object_size) / (1.0 * span_bytes()); };
+ while (object_size > span_bytes()) {
+ ++span_size;
+ }
+ _span_sizes.fallback = span_size;
+ span_size = 1;
while (_object_size > span_bytes()
- || (_span_size < 32 && waste() > 0.05)
+ || (span_size < 32 && waste() > 0.05)
|| (span_bytes() / object_size < 4)) {
- ++_span_size;
+ ++span_size;
}
+ _span_sizes.preferred = span_size;
_max_free = std::max<unsigned>(100, span_bytes() * 2 / _object_size);
_min_free = _max_free / 2;
}

small_pool::~small_pool() {
@@ -1131,23 +1141,28 @@ small_pool::add_more_objects() {
++span.nr_small_alloc;
}
}
while (_free_count < goal) {
disable_backtrace_temporarily dbt;
- auto data = reinterpret_cast<char*>(cpu_mem.allocate_large(_span_size));
+ auto span_size = _span_sizes.preferred;
+ auto data = reinterpret_cast<char*>(cpu_mem.allocate_large(span_size));
if (!data) {
- return;
+ span_size = _span_sizes.fallback;
+ auto data = reinterpret_cast<char*>(cpu_mem.allocate_large(span_size));
+ if (!data) {
+ return;
+ }
}
- ++_spans_in_use;
+ _pages_in_use += span_size;
auto span = cpu_mem.to_page(data);
- for (unsigned i = 0; i < _span_size; ++i) {
+ for (unsigned i = 0; i < span_size; ++i) {
span[i].offset_in_span = i;
span[i].pool = this;
}
span->nr_small_alloc = 0;
span->freelist = nullptr;
- for (unsigned offset = 0; offset <= span_bytes() - _object_size; offset += _object_size) {
+ for (unsigned offset = 0; offset <= span_size * page_size - _object_size; offset += _object_size) {
auto h = reinterpret_cast<free_object*>(data + offset);
h->next = _free;
_free = h;
++_free_count;
++span->nr_small_alloc;
@@ -1169,21 +1184,17 @@ small_pool::trim_free_list() {
_span_list.push_front(cpu_mem.pages, *span);
}
obj->next = span->freelist;
span->freelist = obj;
if (--span->nr_small_alloc == 0) {
+ _pages_in_use -= span->span_size;
_span_list.erase(cpu_mem.pages, *span);
cpu_mem.free_span(span - cpu_mem.pages, span->span_size);
- --_spans_in_use;
}
}
}

-float small_pool::waste() {
- return (span_bytes() % _object_size) / (1.0 * span_bytes());
-}
-
void
abort_on_underflow(size_t size) {
if (std::make_signed_t<size_t>(size) < 0) {
// probably a logic error, stop hard
abort();
@@ -1409,14 +1420,14 @@ void on_allocation_failure(size_t size) {
seastar_memory_logger.debug("Used memory: {} Free memory: {} Total memory: {}", total_mem - free_mem, free_mem, total_mem);
seastar_memory_logger.debug("Small pools:");
seastar_memory_logger.debug("objsz spansz usedobj memory wst%");
for (unsigned i = 0; i < cpu_mem.small_pools.nr_small_pools; i++) {
auto& sp = cpu_mem.small_pools[i];
- auto memory = sp._spans_in_use * sp.span_bytes();
- auto use_count = sp._spans_in_use * sp.span_bytes() / sp.object_size() - sp._free_count;
+ auto use_count = sp._pages_in_use * page_size / sp.object_size() - sp._free_count;
+ auto memory = sp._pages_in_use * page_size;
auto wasted_percent = memory ? sp._free_count * sp.object_size() * 100.0 / memory : 0;
- seastar_memory_logger.debug("{} {} {} {} {}", sp.object_size(), sp.span_bytes(), use_count, memory, wasted_percent);
+ seastar_memory_logger.debug("{} {} {} {} {}", sp.object_size(), sp._span_sizes.preferred * page_size, use_count, memory, wasted_percent);

Commit Bot

<bot@cloudius-systems.com>
unread,
Aug 28, 2017, 6:43:44 AM8/28/17
to seastar-dev@googlegroups.com, Avi Kivity
From: Avi Kivity <a...@scylladb.com>
Committer: Tomasz Grabiec <tgra...@scylladb.com>
Branch: master

memory: reduce small_pool vulnerability to fragmentation further

small_pool tries to allocate large-ish spans to reduce waste due to bad
packing. However, if large spans aren't available, we fail allocation.

Improve this by giving small_pool a second chance: if allocation fails,
try again with a smaller span, just large enough to hold at least one
object.
Message-Id: <2017082808431...@scylladb.com>

---
diff --git a/core/memory.cc b/core/memory.cc
--- a/core/memory.cc
+++ b/core/memory.cc
@@ -273,17 +273,19 @@ class page_list {
};

class small_pool {
+ struct span_sizes {
+ uint8_t preferred;
+ uint8_t fallback;
+ };
unsigned _object_size;
- unsigned _span_size;
+ span_sizes _span_sizes;
free_object* _free = nullptr;
size_t _free_count = 0;
unsigned _min_free;
unsigned _max_free;
- unsigned _spans_in_use = 0;
+ unsigned _pages_in_use = 0;
page_list _span_list;
static constexpr unsigned idx_frac_bits = 2;
-private:
- size_t span_bytes() const { return _span_size * page_size; }
public:
explicit small_pool(unsigned object_size) noexcept;
~small_pool();
@@ -297,7 +299,6 @@ class small_pool {
private:
void add_more_objects();
void trim_free_list();
- float waste();
friend void on_allocation_failure(size_t);
};

@@ -1073,12 +1074,21 @@ void cpu_pages::set_min_free_pages(size_t pages) {
}

small_pool::small_pool(unsigned object_size) noexcept
- : _object_size(object_size), _span_size(1) {
+ : _object_size(object_size) {
+ unsigned span_size = 1;
+ auto span_bytes = [&] { return span_size * page_size; };
+ auto waste = [&] { return (span_bytes() % _object_size) / (1.0 *
span_bytes()); };
+ while (object_size > span_bytes()) {
+ ++span_size;
+ }
+ _span_sizes.fallback = span_size;
+ span_size = 1;
while (_object_size > span_bytes()
- || (_span_size < 32 && waste() > 0.05)
+ || (span_size < 32 && waste() > 0.05)
|| (span_bytes() / object_size < 4)) {
- ++_span_size;
+ ++span_size;
}
+ _span_sizes.preferred = span_size;
_max_free = std::max<unsigned>(100, span_bytes() * 2 / _object_size);
_min_free = _max_free / 2;
}
@@ -1133,19 +1143,24 @@ small_pool::add_more_objects() {
@@ -1171,17 +1186,13 @@ small_pool::trim_free_list() {
obj->next = span->freelist;
span->freelist = obj;
if (--span->nr_small_alloc == 0) {
+ _pages_in_use -= span->span_size;
_span_list.erase(cpu_mem.pages, *span);
cpu_mem.free_span(span - cpu_mem.pages, span->span_size);
- --_spans_in_use;
}
}
}

-float small_pool::waste() {
- return (span_bytes() % _object_size) / (1.0 * span_bytes());
-}
-
void
abort_on_underflow(size_t size) {
if (std::make_signed_t<size_t>(size) < 0) {
@@ -1411,10 +1422,10 @@ void on_allocation_failure(size_t size) {
Reply all
Reply to author
Forward
0 new messages