[QUEUED scylla next] Merge "Choose the user max-result-size for service levels" from Botond

35 views
Skip to first unread message

Commit Bot

<bot@cloudius-systems.com>
unread,
May 5, 2021, 11:11:51 AM5/5/21
to scylladb-dev@googlegroups.com, Avi Kivity
From: Avi Kivity <a...@scylladb.com>
Committer: Avi Kivity <a...@scylladb.com>
Branch: next

Merge "Choose the user max-result-size for service levels" from Botond

"
Choosing the max-result-size for unlimited queries is broken for unknown
scheduling groups. In this case the system limit (unlimited) will be
chosen. A prime example of this break-down is when service levels are
used.

This series fixes this in the same spirit as the similar semaphore
selection issue (#8508) was fixed: use the user limit as the fall-back
in case of unknown scheduling groups.
To ensure future fixes automatically apply to both query-classification
related configurations, selecting the max result size for unlimited
queries is now delegated to the database, sharing the query
classification logic with the semaphore selection.

Fixes: #8591

Tests: unit(dev)
"

* 'query-max-size-service-level-fix/v2' of https://github.com/denesb/scylla:
service/storage_proxy: get_max_result_size() defer to db for unlimited queries
database: add get_unlimited_query_max_result_size()
query_class_config: add operator== for max_result_size
database: get_reader_concurrency_semaphore(): extract query classification logic

---
diff --git a/database.cc b/database.cc
--- a/database.cc
+++ b/database.cc
@@ -1493,12 +1493,20 @@ void database::register_connection_drop_notifier(netw::messaging_service& ms) {
});
}

-reader_concurrency_semaphore& database::get_reader_concurrency_semaphore() {
+namespace {
+
+enum class query_class {
+ user,
+ system,
+ maintenance,
+};
+
+query_class classify_query(const database_config& _dbcfg) {
const auto current_group = current_scheduling_group();

// Everything running in the statement group is considered a user query
if (current_group == _dbcfg.statement_scheduling_group) {
- return _read_concurrency_sem;
+ return query_class::user;
// System queries run in the default (main) scheduling group
// All queries executed on behalf of internal work also uses the system semaphore
} else if (current_group == default_scheduling_group()
@@ -1507,13 +1515,33 @@ reader_concurrency_semaphore& database::get_reader_concurrency_semaphore() {
|| current_group == _dbcfg.memory_compaction_scheduling_group
|| current_group == _dbcfg.memtable_scheduling_group
|| current_group == _dbcfg.memtable_to_cache_scheduling_group) {
- return _system_read_concurrency_sem;
+ return query_class::system;
// Reads done on behalf of view update generation run in the streaming group
} else if (current_scheduling_group() == _dbcfg.streaming_scheduling_group) {
- return _streaming_concurrency_sem;
+ return query_class::maintenance;
// Everything else is considered a user query
} else {
- return _read_concurrency_sem;
+ return query_class::user;
+ }
+}
+
+} // anonymous namespace
+
+query::max_result_size database::get_unlimited_query_max_result_size() const {
+ switch (classify_query(_dbcfg)) {
+ case query_class::user:
+ return query::max_result_size(_cfg.max_memory_for_unlimited_query_soft_limit(), _cfg.max_memory_for_unlimited_query_hard_limit());
+ case query_class::system: [[fallthrough]];
+ case query_class::maintenance:
+ return query::max_result_size(query::result_memory_limiter::unlimited_result_size);
+ }
+}
+
+reader_concurrency_semaphore& database::get_reader_concurrency_semaphore() {
+ switch (classify_query(_dbcfg)) {
+ case query_class::user: return _read_concurrency_sem;
+ case query_class::system: return _system_read_concurrency_sem;
+ case query_class::maintenance: return _streaming_concurrency_sem;
}
}

diff --git a/database.hh b/database.hh
--- a/database.hh
+++ b/database.hh
@@ -1598,6 +1598,10 @@ public:
return _supports_infinite_bound_range_deletions;
}

+ // Get the maximum result size for an unlimited query, appropriate for the
+ // query class, which is deduced from the current scheduling group.
+ query::max_result_size get_unlimited_query_max_result_size() const;
+
// Get the reader concurrency semaphore, appropriate for the query class,
// which is deduced from the current scheduling group.
reader_concurrency_semaphore& get_reader_concurrency_semaphore();
diff --git a/query_class_config.hh b/query_class_config.hh
--- a/query_class_config.hh
+++ b/query_class_config.hh
@@ -36,6 +36,10 @@ struct max_result_size {
explicit max_result_size(uint64_t soft_limit, uint64_t hard_limit) : soft_limit(soft_limit), hard_limit(hard_limit) { }
};

+inline bool operator==(const max_result_size& a, const max_result_size& b) {
+ return a.soft_limit == b.soft_limit && a.hard_limit == b.hard_limit;
+}
+
struct query_class_config {
reader_concurrency_semaphore& semaphore;
max_result_size max_memory_for_unlimited_query;
diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc
--- a/service/storage_proxy.cc
+++ b/service/storage_proxy.cc
@@ -1320,13 +1320,7 @@ endpoints_to_replica_ids(const locator::token_metadata& tm, const std::vector<gm
query::max_result_size storage_proxy::get_max_result_size(const query::partition_slice& slice) const {
// Unpaged and reverse queries.
if (!slice.options.contains<query::partition_slice::option::allow_short_read>() || slice.options.contains<query::partition_slice::option::reversed>()) {
- auto& db = _db.local();
- // We only limit user queries.
- if (current_scheduling_group() == db.get_statement_scheduling_group()) {
- return query::max_result_size(db.get_config().max_memory_for_unlimited_query_soft_limit(), db.get_config().max_memory_for_unlimited_query_hard_limit());
- } else {
- return query::max_result_size(query::result_memory_limiter::unlimited_result_size);
- }
+ return _db.local().get_unlimited_query_max_result_size();
} else {
return query::max_result_size(query::result_memory_limiter::maximum_result_size);
}
diff --git a/test/boost/database_test.cc b/test/boost/database_test.cc
--- a/test/boost/database_test.cc
+++ b/test/boost/database_test.cc
@@ -689,3 +689,66 @@ SEASTAR_THREAD_TEST_CASE(reader_concurrency_semaphore_selection_test) {
}
}, std::move(cfg)).get();
}
+
+SEASTAR_THREAD_TEST_CASE(max_result_size_for_unlimited_query_selection_test) {
+ cql_test_config cfg;
+
+ cfg.db_config->max_memory_for_unlimited_query_soft_limit(1 * 1024 * 1024, utils::config_file::config_source::CommandLine);
+ cfg.db_config->max_memory_for_unlimited_query_hard_limit(2 * 1024 * 1024, utils::config_file::config_source::CommandLine);
+
+ cfg.dbcfg.emplace();
+ cfg.dbcfg->available_memory = memory::stats().total_memory();
+
+ scheduling_group unknown_scheduling_group;
+
+ const auto user_max_result_size = query::max_result_size(cfg.db_config->max_memory_for_unlimited_query_soft_limit(),
+ cfg.db_config->max_memory_for_unlimited_query_hard_limit());
+ const auto system_max_result_size = query::max_result_size(query::result_memory_limiter::unlimited_result_size);
+ const auto maintenance_max_result_size = system_max_result_size;
+
+ std::vector<std::pair<scheduling_group, query::max_result_size>> scheduling_group_and_expected_max_result_size{
+ {default_scheduling_group(), system_max_result_size}
+ };
+
+ auto clean_up_sched_groups = defer([&scheduling_group_and_expected_max_result_size] {
+ for (const auto& [sched_group, _] : scheduling_group_and_expected_max_result_size) {
+ if (!sched_group.is_main()) {
+ destroy_scheduling_group(sched_group).get();
+ }
+ }
+ });
+
+ auto create_sched_group = [&scheduling_group_and_expected_max_result_size] (const char* name, unsigned shares, scheduling_group& target,
+ query::max_result_size max_size) mutable {
+ target = create_scheduling_group(name, shares).get();
+ scheduling_group_and_expected_max_result_size.emplace_back(target, max_size);
+ };
+
+ create_sched_group("unknown", 800, unknown_scheduling_group, user_max_result_size);
+
+ create_sched_group("compaction", 1000, cfg.dbcfg->compaction_scheduling_group, system_max_result_size);
+ create_sched_group("mem_compaction", 1000, cfg.dbcfg->memory_compaction_scheduling_group, system_max_result_size);
+ create_sched_group("streaming", 200, cfg.dbcfg->streaming_scheduling_group, maintenance_max_result_size);
+ create_sched_group("statement", 1000, cfg.dbcfg->statement_scheduling_group, user_max_result_size);
+ create_sched_group("memtable", 1000, cfg.dbcfg->memtable_scheduling_group, system_max_result_size);
+ create_sched_group("memtable_to_cache", 200, cfg.dbcfg->memtable_to_cache_scheduling_group, system_max_result_size);
+ create_sched_group("gossip", 1000, cfg.dbcfg->gossip_scheduling_group, system_max_result_size);
+
+ do_with_cql_env_thread([&scheduling_group_and_expected_max_result_size] (cql_test_env& e) {
+ auto& db = e.local_db();
+ database_test tdb(db);
+ for (const auto& [sched_group, expected_max_size] : scheduling_group_and_expected_max_result_size) {
+ with_scheduling_group(sched_group, [&db, sched_group = sched_group, expected_max_size = expected_max_size] {
+ const auto max_size = db.get_unlimited_query_max_result_size();
+ if (max_size != expected_max_size) {
+ BOOST_FAIL(fmt::format("Unexpected max_size for scheduling group {}, expected {{{}, {}}}, got {{{}, {}}}",
+ sched_group.name(),
+ expected_max_size.soft_limit,
+ expected_max_size.hard_limit,
+ max_size.soft_limit,
+ max_size.hard_limit));
+ }
+ }).get();
+ }
+ }, std::move(cfg)).get();
+}

Commit Bot

<bot@cloudius-systems.com>
unread,
May 5, 2021, 3:59:00 PM5/5/21
to scylladb-dev@googlegroups.com, Avi Kivity
From: Avi Kivity <a...@scylladb.com>
Committer: Avi Kivity <a...@scylladb.com>
Branch: master
Reply all
Reply to author
Forward
0 new messages