We use a dedicated thread (similarly to the nemesis thread)
to periodically perform reconfigurations.
The commit also improves the liveness check at the end of the test. The
previous check would find a leader once and assume that it does not
change, and that the first attempt at sending a request to this leader
succeeds. In reality the leader may change at the end of the test (e.g.
it may be in the middle of stepping down when we find it) and in general
it may take some time for the cluster to stabilize; introducing
reconfigurations exposed this problem. The new check tries a few times
to find a leader and perform a request - until a time limit is reached.
---
test/raft/randomized_nemesis_test.cc | 162 +++++++++++++++++++++------
1 file changed, 130 insertions(+), 32 deletions(-)
diff --git a/test/raft/randomized_nemesis_test.cc b/test/raft/randomized_nemesis_test.cc
index a89a2de56..477c82c6b 100644
--- a/test/raft/randomized_nemesis_test.cc
+++ b/test/raft/randomized_nemesis_test.cc
@@ -1067,6 +1067,10 @@ class raft_server {
return _id;
}
+ raft::configuration get_configuration() const {
+ return _server->get_configuration();
+ }
+
void deliver(raft::server_id src, const typename rpc<typename M::state_t>::message_t& m) {
assert(_started);
_queue->push(src, m);
@@ -1355,7 +1359,9 @@ struct wait_for_leader {
}(env.weak_from_this(), std::move(nodes)));
assert(l != raft::server_id{});
- assert(env.get_server(l).accepts_requests());
+
+ // Note: `l` may no longer accept requests at this point if there was a yield at the `co_await` above
+ // and `l` decided to step down, was restarted, or just got removed from the configuration.
co_return l;
}
@@ -2038,10 +2044,17 @@ std::ostream& operator<<(std::ostream& os, const AppendReg::ret& r) {
return os << format("ret{{{}, {}}}", r.x, r.prev);
}
+namespace raft {
+std::ostream& operator<<(std::ostream& os, const raft::server_address& a) {
+ return os <<
a.id;
+}
+}
+
SEASTAR_TEST_CASE(basic_generator_test) {
using op_type = operation::invocable<operation::either_of<
raft_call<AppendReg>,
- network_majority_grudge<AppendReg>
+ network_majority_grudge<AppendReg>,
+ reconfiguration<AppendReg>
>>;
using history_t = utils::chunked_vector<std::variant<op_type, operation::completion<op_type>>>;
@@ -2068,39 +2081,77 @@ SEASTAR_TEST_CASE(basic_generator_test) {
// Wait for the server to elect itself as a leader.
assert(co_await wait_for_leader<AppendReg>{}(env, {leader_id}, timer, timer.now() + 1000_t) == leader_id);
+ size_t no_all_servers = 10;
+ std::vector<raft::server_id> all_servers{leader_id};
+ for (size_t i = 1; i < no_all_servers; ++i) {
+ all_servers.push_back(co_await env.new_server(false));
+ }
- size_t no_servers = 5;
- std::unordered_set<raft::server_id> servers{leader_id};
- for (size_t i = 1; i < no_servers; ++i) {
- servers.insert(co_await env.new_server(false));
+ size_t no_init_servers = 5;
+
+ // `known_config` represents the set of servers that may potentially be in the cluster configuration.
+ //
+ // It is not possible to determine in general what the 'true' current configuration is (if even such notion
+ // makes sense at all). Given a sequence of reconfiguration requests, assuming that all except possibly the last
+ // requests have finished, then:
+ // - if the last request has finished successfully, then the current configuration must be equal
+ // to the one chosen in the last request;
+ // - but if it hasn't finished yet, or it finished with a failure, the current configuration may contain servers
+ // from the one chosen in the last request or from the previously known set of servers.
+ //
+ // The situation is even worse considering that requests may never 'finish', i.e. we may never get a response
+ // to a reconfiguration request (in which case we eventually timeout). These requests may in theory execute
+ // at any point in the future. We take a practical approach when updating `known_config`: we assume
+ // that our timeouts for reconfiguration requests are large enough so that if a reconfiguration request
+ // has timed out, it has either already finished or it never will.
+ // TODO: this may not be true and we may end up with `known_config` that does not contain the current leader
+ // (not observed in practice yet though... I think) Come up with a better approach.
+ std::unordered_set<raft::server_id> known_config;
+
+ for (size_t i = 0; i < no_init_servers; ++i) {
+ known_config.insert(all_servers[i]);
}
assert(std::holds_alternative<std::monostate>(
co_await env.get_server(leader_id).reconfigure(
- std::vector<raft::server_id>{servers.begin(), servers.end()}, timer.now() + 100_t, timer)));
+ std::vector<raft::server_id>{known_config.begin(), known_config.end()}, timer.now() + 100_t, timer)));
- auto threads = operation::make_thread_set(servers.size() + 1);
+ auto threads = operation::make_thread_set(all_servers.size() + 2);
auto nemesis_thread = some(threads);
- auto seed = tests::random::get_int<int32_t>();
+ auto threads_without_nemesis = threads;
+ threads_without_nemesis.erase(nemesis_thread);
+
+ auto reconfig_thread = some(threads_without_nemesis);
- // TODO: make it dynamic based on the current configuration
- std::unordered_set<raft::server_id>& known = servers;
+ auto seed = tests::random::get_int<int32_t>();
raft_call<AppendReg>::state_type db_call_state {
.env = env,
- .known = known,
+ .known = known_config,
.timer = timer
};
network_majority_grudge<AppendReg>::state_type network_majority_grudge_state {
.env = env,
- .known = known,
+ .known = known_config,
.timer = timer,
.rnd = std::mt19937{seed}
};
- auto init_state = op_type::state_type{std::move(db_call_state), std::move(network_majority_grudge_state)};
+ reconfiguration<AppendReg>::state_type reconfiguration_state {
+ .all_servers = all_servers,
+ .env = env,
+ .known = known_config,
+ .timer = timer,
+ .rnd = std::mt19937{seed}
+ };
+
+ auto init_state = op_type::state_type{
+ std::move(db_call_state),
+ std::move(network_majority_grudge_state),
+ std::move(reconfiguration_state)
+ };
using namespace generator;
@@ -2123,11 +2174,16 @@ SEASTAR_TEST_CASE(basic_generator_test) {
return op_type{network_majority_grudge<AppendReg>{raft::logical_clock::duration{dist(engine)}}};
})
),
- stagger(seed, timer.now(), 0_t, 50_t,
- sequence(1, [] (int32_t i) {
- assert(i > 0);
- return op_type{raft_call<AppendReg>{AppendReg::append{i}, 200_t}};
- })
+ pin(reconfig_thread,
+ stagger(seed, timer.now() + 1000_t, 500_t, 500_t,
+ constant([] () { return op_type{reconfiguration<AppendReg>{500_t}}; })
+ ),
+ stagger(seed, timer.now(), 0_t, 50_t,
+ sequence(1, [] (int32_t i) {
+ assert(i > 0);
+ return op_type{raft_call<AppendReg>{AppendReg::append{i}, 200_t}};
+ })
+ )
)
)
);
@@ -2170,6 +2226,10 @@ SEASTAR_TEST_CASE(basic_generator_test) {
} else {
tlogger.debug("completion {}", c);
}
+
+ // TODO: check consistency of reconfiguration completions
+ // (there's not much to check, but for example: we should not get back `conf_change_in_progress`
+ // if our last reconfiguration was successful?).
}
};
@@ -2179,20 +2239,58 @@ SEASTAR_TEST_CASE(basic_generator_test) {
consistency_checker{}};
co_await interp.run();
- // All network partitions are healed, this should succeed:
- auto last_leader = co_await wait_for_leader<AppendReg>{}(env, std::vector<raft::server_id>{servers.begin(), servers.end()}, timer, timer.now() + 10000_t)
- .handle_exception_type([] (logical_timer::timed_out<raft::server_id>) -> raft::server_id {
- tlogger.error("Failed to find a leader after 10000 ticks at the end of test (network partitions are healed).");
- assert(false);
- });
+ tlogger.debug("Finished generator run, time: {}", timer.now());
+
+ // Liveness check: we must be able to obtain a final response after all the nemeses have stopped.
+ // Due to possible multiple leaders at this point and the cluster stabilizing after reconfigurations,
+ // we may need to try sending requests multiple times to different servers to obtain the last result.
+
+ auto limit = timer.now() + 10000_t;
+ size_t cnt = 0;
+ for (; timer.now() < limit; ++cnt) {
+ tlogger.debug("Trying to obtain last result: attempt number {}", cnt + 1);
+
+ auto now = timer.now();
+ auto leader = co_await wait_for_leader<AppendReg>{}(env,
+ std::vector<raft::server_id>{all_servers.begin(), all_servers.end()}, timer, limit)
+ .handle_exception_type([&timer, now] (logical_timer::timed_out<raft::server_id>) -> raft::server_id {
+ tlogger.error("Failed to find a leader after {} ticks at the end of test.", timer.now() - now);
+ assert(false);
+ });
+
+ if (env.get_server(leader).accepts_requests()) {
+ tlogger.debug("Leader {} found after {} ticks", leader, timer.now() - now);
+ } else {
+ tlogger.warn("Leader {} found after {} ticks, but suddenly lost leadership", leader, timer.now() - now);
+ continue;
+ }
+
+ auto config = env.get_server(leader).get_configuration();
+ tlogger.debug("Leader {} configuration: current {} previous {}", leader, config.current, config.previous);
- // Should also succeed
- auto last_res = co_await env.get_server(last_leader).call(AppendReg::append{-1}, timer.now() + 10000_t, timer);
- if (!std::holds_alternative<typename AppendReg::ret>(last_res)) {
- tlogger.error(
- "Expected success on the last call in the test (after electing a leader; network partitions are healed)."
- " Got: {}", last_res);
- assert(false);
+ for (auto& s: all_servers) {
+ auto& srv = env.get_server(s);
+ if (srv.is_leader() && s != leader) {
+ auto conf = srv.get_configuration();
+ tlogger.debug("There is another leader: {}, configuration: current {} previous {}", s, conf.current, conf.previous);
+ }
+ }
+
+ tlogger.debug("From the clients' point of view, the possible cluster members are: {}", known_config);
+
+ auto [res, last_attempted_server] = co_await bouncing{[&timer, &env] (raft::server_id id) {
+ return env.get_server(id).call(AppendReg::append{-1}, timer.now() + 200_t, timer);
+ }}(timer, known_config, leader, known_config.size() + 1, 10_t, 10_t);
+
+ if (std::holds_alternative<typename AppendReg::ret>(res)) {
+ tlogger.debug("Last result: {}", res);
+ co_return;
+ }
+
+ tlogger.warn("Failed to obtain last result at end of test: {} returned by {}", res, last_attempted_server);
}
+
+ tlogger.error("Failed to obtain a final successful response at the end of the test. Number of attempts: {}", cnt);
+ assert(false);
});
}
--
2.31.1