<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:08:54 AMFeb 24

to scylladb-dev@googlegroups.com

---
gms/gossiper.hh | 1 -
alternator/server.cc | 2 +-
api/failure_detector.cc | 6 ++---
db/virtual_tables.cc | 5 +++--
gms/gossiper.cc | 40 +++++++++++++--------------------
service/migration_manager.cc | 2 +-
service/storage_service.cc | 17 +++++---------
service/topology_coordinator.cc | 2 +-
8 files changed, 29 insertions(+), 46 deletions(-)

diff --git a/gms/gossiper.hh b/gms/gossiper.hh
index 38c2d5796a2..45132c03f70 100644
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -514,7 +514,6 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
template<typename ID>
future<> wait_alive_helper(noncopyable_function<std::vector<ID>()> get_nodes, std::chrono::milliseconds timeout);
public:
- bool is_alive(inet_address ep) const;
bool is_alive(locator::host_id id) const;

bool is_dead_state(const endpoint_state& eps) const;
diff --git a/alternator/server.cc b/alternator/server.cc
index c1a29cead6f..a9eb1f72494 100644
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -238,7 +238,7 @@ class local_nodelist_handler : public gated_handler {
// Note that it's not enough for the node to be is_alive() - a
// node joining the cluster is also "alive" but not responsive to
// requests. We alive *and* normal. See #19694, #21538.
- if (_gossiper.is_alive(ip) && _gossiper.is_normal(ip)) {
+ if (_gossiper.is_alive(id) && _gossiper.is_normal(ip)) {
// Use the gossiped broadcast_rpc_address if available instead
// of the internal IP address "ip". See discussion in #18711.
rjson::push_back(results, rjson::from_string(_gossiper.get_rpc_address(ip)));
diff --git a/api/failure_detector.cc b/api/failure_detector.cc
index cb712aebc23..00b4d4030a5 100644
--- a/api/failure_detector.cc
+++ b/api/failure_detector.cc
@@ -25,7 +25,7 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
g.for_each_endpoint_state([&] (const gms::inet_address& addr, const gms::endpoint_state& eps) {
fd::endpoint_state val;
val.addrs = fmt::to_string(addr);
- val.is_alive = g.is_alive(addr);
+ val.is_alive = g.is_alive(eps.get_host_id());
val.generation = eps.get_heart_beat_state().get_generation().value();
val.version = eps.get_heart_beat_state().get_heart_beat_version().value();
val.update_time = eps.get_update_timestamp().time_since_epoch().count();
@@ -65,8 +65,8 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
fd::get_simple_states.set(r, [&g] (std::unique_ptr<request> req) {
return g.container().invoke_on(0, [] (gms::gossiper& g) {
std::map<sstring, sstring> nodes_status;
- g.for_each_endpoint_state([&] (const gms::inet_address& node, const gms::endpoint_state&) {
- nodes_status.emplace(fmt::to_string(node), g.is_alive(node) ? "UP" : "DOWN");
+ g.for_each_endpoint_state([&] (const gms::inet_address& node, const gms::endpoint_state& es) {
+ nodes_status.emplace(fmt::to_string(node), g.is_alive(es.get_host_id()) ? "UP" : "DOWN");
});
return make_ready_future<json::json_return_type>(map_to_key_value<fd::mapper>(nodes_status));
});
diff --git a/db/virtual_tables.cc b/db/virtual_tables.cc
index 947b96010fd..d5c8a0224c7 100644
--- a/db/virtual_tables.cc
+++ b/db/virtual_tables.cc
@@ -80,13 +80,14 @@ class cluster_status_table : public memtable_filling_virtual_table {
mutation m(s, partition_key::from_single_value(*s, data_value(endpoint).serialize_nonnull()));
row& cr = m.partition().clustered_row(*schema(), clustering_key::make_empty()).cells();

- set_cell(cr, "up", gossiper.is_alive(endpoint));
+ auto hostid = eps.get_host_id();
+
+ set_cell(cr, "up", gossiper.is_alive(hostid));
if (!ss.raft_topology_change_enabled() || gossiper.is_shutdown(endpoint)) {
set_cell(cr, "status", gossiper.get_gossip_status(endpoint));
}
set_cell(cr, "load", gossiper.get_application_state_value(endpoint, gms::application_state::LOAD));

- auto hostid = eps.get_host_id();
if (ss.raft_topology_change_enabled() && !gossiper.is_shutdown(endpoint)) {
set_cell(cr, "status", boost::to_upper_copy<std::string>(fmt::format("{}", ss.get_node_state(hostid))));
}
diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index 56f91f27dcd..d73550dfa29 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -453,7 +453,7 @@ future<> gossiper::handle_echo_msg(gms::inet_address from, const locator::host_i

co_await container().invoke_on(0, [from, from_hid, timeout, &normal] (gossiper& g) -> future<> {

try {
// Wait to see the node as normal. It may node be the case if the node bootstraps

- while (rpc::rpc_clock_type::now() < *timeout && !(normal(g, *from_hid) && g.is_alive(from))) {
+ while (rpc::rpc_clock_type::now() < *timeout && !(normal(g, *from_hid) && g.is_alive(*from_hid))) {

co_await sleep_abortable(std::chrono::milliseconds(100), g._abort_source);
}
} catch(...) {

@@ -619,7 +619,7 @@ future<> gossiper::do_apply_state_locally(gms::inet_address node, endpoint_state
} else {
logger.debug("Ignoring remote version {} <= {} for {}", remote_max_version, local_max_version, node);
}
- if (!is_alive(node) && !is_dead_state(get_endpoint_state(node)) && !shadow_round) { // unless of course, it was dead
+ if (!is_alive(es->get_host_id()) && !is_dead_state(get_endpoint_state(node)) && !shadow_round) { // unless of course, it was dead
mark_alive(node);
}
} else {
@@ -771,7 +771,7 @@ future<> gossiper::do_status_check() {
continue;
}
auto& ep_state = *eps;
- bool is_alive = this->is_alive(endpoint);
+ bool is_alive = this->is_alive(ep_state.get_host_id());
auto update_timestamp = ep_state.get_update_timestamp();

// check if this is a fat client. fat clients are removed automatically from

@@ -1235,7 +1235,7 @@ int64_t gossiper::get_endpoint_downtime(locator::host_id ep) const noexcept {
future<> gossiper::convict(inet_address endpoint) {
auto permit = co_await lock_endpoint(endpoint, null_permit_id);
auto state = get_endpoint_state_ptr(endpoint);
- if (!state || !is_alive(endpoint)) {
+ if (!state || !is_alive(state->get_host_id())) {
co_return;
}
if (is_shutdown(endpoint)) {
@@ -2371,19 +2371,6 @@ clk::time_point gossiper::compute_expire_time() {
return now() + A_VERY_LONG_TIME;
}

-bool gossiper::is_alive(inet_address ep) const {
- if (ep == get_broadcast_address()) {
- return true;
- }
-
- auto sptr = get_endpoint_state_ptr(ep);
- if (!sptr) {
- return false;
- }
-
- return _live_endpoints.contains(sptr->get_host_id());
-}
-
bool gossiper::is_alive(locator::host_id id) const {
if (id == my_host_id()) {
return true;
@@ -2409,12 +2396,15 @@ future<> gossiper::wait_alive_helper(noncopyable_function<std::vector<ID>()> get
auto nodes = get_nodes();
std::vector<ID> live_nodes;
for (const auto& node: nodes) {
- size_t nr_alive = co_await container().map_reduce0([node] (gossiper& g) -> size_t {
- return g.is_alive(node) ? 1 : 0;
- }, 0, std::plus<size_t>());
- logger.debug("Marked node={} as alive on {} out of {} shards", node, nr_alive, smp::count);
- if (nr_alive == smp::count) {
- live_nodes.push_back(node);
+ auto es = get_endpoint_state_ptr(node);
+ if (es) {
+ size_t nr_alive = co_await container().map_reduce0([node = es->get_host_id()] (gossiper& g) -> size_t {
+ return g.is_alive(node) ? 1 : 0;
+ }, 0, std::plus<size_t>());
+ logger.debug("Marked node={} as alive on {} out of {} shards", node, nr_alive, smp::count);
+ if (nr_alive == smp::count) {
+ live_nodes.push_back(node);
+ }
}
}
logger.debug("Waited for marking node as up, replace_nodes={}, live_nodes={}", nodes, live_nodes);
@@ -2728,8 +2718,8 @@ int gossiper::get_down_endpoint_count() const noexcept {
}

int gossiper::get_up_endpoint_count() const noexcept {
- return std::ranges::count_if(_endpoint_state_map | std::views::keys, [this] (const inet_address& ep) {
- return is_alive(ep);
+ return std::ranges::count_if(_endpoint_state_map | std::views::values, [this] (const endpoint_state_ptr& es) {
+ return is_alive(es->get_host_id());
});
}

diff --git a/service/migration_manager.cc b/service/migration_manager.cc
index a90c7200a51..67a0c4d5a44 100644
--- a/service/migration_manager.cc
+++ b/service/migration_manager.cc
@@ -238,7 +238,7 @@ bool migration_manager::have_schema_agreement() {
bool match = false;
static thread_local logger::rate_limit rate_limit{std::chrono::seconds{5}};
_gossiper.for_each_endpoint_state_until([&, my_address = _messaging.broadcast_address()] (const gms::inet_address& endpoint, const gms::endpoint_state& eps) {
- if (endpoint == my_address || !_gossiper.is_alive(endpoint)) {
+ if (endpoint == my_address || !_gossiper.is_alive(eps.get_host_id())) {
return stop_iteration::no;
}
mlogger.debug("Checking schema state for {}.", endpoint);
diff --git a/service/storage_service.cc b/service/storage_service.cc
index 14a1e79ba2c..42ee1fe2834 100644
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -2741,7 +2741,7 @@ future<> storage_service::on_dead(gms::inet_address endpoint, gms::endpoint_stat
future<> storage_service::on_restart(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id pid) {
slogger.debug("endpoint={} on_restart: permit_id={}", endpoint, pid);
// If we have restarted before the node was even marked down, we need to reset the connection pool
- if (endpoint != get_broadcast_address() && _gossiper.is_alive(endpoint)) {
+ if (endpoint != get_broadcast_address() && _gossiper.is_alive(state->get_host_id())) {
return on_dead(endpoint, state, pid);
}
return make_ready_future();
@@ -4036,18 +4036,11 @@ future<> storage_service::raft_removenode(locator::host_id host_id, locator::hos
id));
}

- const auto& am = _address_map;
- auto ip = am.find(host_id);
- if (!ip) {
- // What to do if there is no mapping? Wait and retry?
- on_fatal_internal_error(rtlogger, ::format("Remove node cannot find a mapping from node id {} to its ip", id));
- }
-
- if (_gossiper.is_alive(*ip)) {
+ if (_gossiper.is_alive(host_id)) {
const std::string message = ::format(
- "removenode: Rejected removenode operation for node {} ip {} "
+ "removenode: Rejected removenode operation for node {}"
"the node being removed is alive, maybe you should use decommission instead?",
- id, *ip);
+ id);
rtlogger.warn("{}", message);
throw std::runtime_error(message);
}
@@ -7406,7 +7399,7 @@ future<> endpoint_lifecycle_notifier::notify_up(gms::inet_address endpoint) {
}

future<> storage_service::notify_up(inet_address endpoint) {
- if (!_gossiper.is_cql_ready(endpoint) || !_gossiper.is_alive(endpoint)) {
+ if (!_gossiper.is_cql_ready(endpoint) || !_gossiper.is_alive(_gossiper.get_host_id(endpoint))) {
co_return;
}
co_await container().invoke_on_all([endpoint] (auto&& ss) {
diff --git a/service/topology_coordinator.cc b/service/topology_coordinator.cc
index e6c95054f5a..877b7fad810 100644
--- a/service/topology_coordinator.cc
+++ b/service/topology_coordinator.cc
@@ -779,7 +779,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
std::chrono::duration_cast<std::chrono::seconds>(std::chrono::high_resolution_clock::now().time_since_epoch()).count();
auto generation = eps.get_heart_beat_state().get_generation().value();
auto host_id = eps.get_host_id();
- if (current_timestamp - generation > timeout && !_topo_sm._topology.contains(raft::server_id{host_id.id}) && !_gossiper.is_alive(addr)) {
+ if (current_timestamp - generation > timeout && !_topo_sm._topology.contains(raft::server_id{host_id.id}) && !_gossiper.is_alive(host_id)) {
topology_mutation_builder builder(guard.write_timestamp());
// This topology mutation moves a node to left state and bans it. Hence, the value of below fields are not useful.
// The dummy_value used for few fields indicates the trivialness of this row entry, and is used to detect this special case.
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:08:54 AMFeb 24

to scylladb-dev@googlegroups.com

Send digest ack and ack2 by host ids as well now since the id->ip
mapping is available after receiving digest syn. It allows to convert
more code to host id here.
---
gms/gossiper.hh | 14 ++++++-------
idl/gossip.idl.hh | 4 ++--
gms/gossiper.cc | 46 +++++++++++++++++++++---------------------
test/manual/message.cc | 13 ++++++------
4 files changed, 38 insertions(+), 39 deletions(-)

diff --git a/gms/gossiper.hh b/gms/gossiper.hh
index 45132c03f70..dd6b33f42bb 100644
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -101,13 +101,13 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar

void init_messaging_service_handler();
future<> uninit_messaging_service_handler();
- future<> handle_syn_msg(msg_addr from, gossip_digest_syn syn_msg);
- future<> handle_ack_msg(msg_addr from, gossip_digest_ack ack_msg);
- future<> handle_ack2_msg(msg_addr from, gossip_digest_ack2 msg);
+ future<> handle_syn_msg(locator::host_id from, gossip_digest_syn syn_msg);
+ future<> handle_ack_msg(locator::host_id from, gossip_digest_ack ack_msg);
+ future<> handle_ack2_msg(locator::host_id from, gossip_digest_ack2 msg);

future<> handle_echo_msg(inet_address from, const locator::host_id* id, seastar::rpc::opt_time_point, std::optional<int64_t> generation_number_opt, bool notify_up);

future<> handle_shutdown_msg(inet_address from, std::optional<int64_t> generation_number_opt);

- future<> do_send_ack_msg(msg_addr from, gossip_digest_syn syn_msg);
- future<> do_send_ack2_msg(msg_addr from, utils::chunked_vector<gossip_digest> ack_msg_digest);
+ future<> do_send_ack_msg(locator::host_id from, gossip_digest_syn syn_msg);
+ future<> do_send_ack2_msg(locator::host_id from, utils::chunked_vector<gossip_digest> ack_msg_digest);
future<gossip_get_endpoint_states_response> handle_get_endpoint_states_msg(gossip_get_endpoint_states_request request);
static constexpr uint32_t _default_cpuid = 0;
msg_addr get_msg_addr(inet_address to) const noexcept;
@@ -117,8 +117,8 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
semaphore _callback_running{1};
semaphore _apply_state_locally_semaphore{100};
seastar::gate _background_msg;
- std::unordered_map<gms::inet_address, syn_msg_pending> _syn_handlers;
- std::unordered_map<gms::inet_address, ack_msg_pending> _ack_handlers;
+ std::unordered_map<locator::host_id, syn_msg_pending> _syn_handlers;
+ std::unordered_map<locator::host_id, ack_msg_pending> _ack_handlers;
// Map ip address and generation number
generation_for_nodes _advertise_to_nodes;
future<> _failure_detector_loop_done{make_ready_future<>()} ;
diff --git a/idl/gossip.idl.hh b/idl/gossip.idl.hh
index f1d9a402a0b..737d5edc1e9 100644
--- a/idl/gossip.idl.hh
+++ b/idl/gossip.idl.hh
@@ -12,7 +12,7 @@ namespace gms {

verb [[with_client_info, with_timeout]] gossip_echo (int64_t generation_number [[version 4.6.0]], bool notify_up [[version 6.1.0]])
verb [[one_way]] gossip_shutdown (gms::inet_address from, int64_t generation_number [[version 4.6.0]])

verb [[with_client_info, one_way, ip]] gossip_digest_syn (gms::gossip_digest_syn syn)

-verb [[with_client_info, one_way, ip]] gossip_digest_ack (gms::gossip_digest_ack ask)
-verb [[with_client_info, one_way, ip]] gossip_digest_ack2 (gms::gossip_digest_ack2 ask)
+verb [[with_client_info, one_way]] gossip_digest_ack (gms::gossip_digest_ack ask)
+verb [[with_client_info, one_way]] gossip_digest_ack2 (gms::gossip_digest_ack2 ask)

verb [[with_client_info, with_timeout, ip]] gossip_get_endpoint_states (gms::gossip_get_endpoint_states_request req) -> gms::gossip_get_endpoint_states_response
}

diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index d73550dfa29..4bda2b50364 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -171,7 +171,7 @@ void gossiper::do_sort(utils::chunked_vector<gossip_digest>& g_digest_list) cons

// Depends on
// - no external dependency
-future<> gossiper::handle_syn_msg(msg_addr from, gossip_digest_syn syn_msg) {
+future<> gossiper::handle_syn_msg(locator::host_id from, gossip_digest_syn syn_msg) {
logger.trace("handle_syn_msg():from={},cluster_name:peer={},local={},group0_id:peer={},local={},partitioner_name:peer={},local={}",
from, syn_msg.cluster_id(), get_cluster_name(), syn_msg.group0_id(), get_group0_id(), syn_msg.partioner(), get_partitioner_name());
if (!is_enabled()) {
@@ -180,22 +180,22 @@ future<> gossiper::handle_syn_msg(msg_addr from, gossip_digest_syn syn_msg) {

/* If the message is from a different cluster throw it away. */
if (syn_msg.cluster_id() != get_cluster_name()) {
- logger.warn("ClusterName mismatch from {} {}!={}", from.addr, syn_msg.cluster_id(), get_cluster_name());
+ logger.warn("ClusterName mismatch from {} {}!={}", from, syn_msg.cluster_id(), get_cluster_name());
co_return;
}

/* If the message is from a node with a different group0 id throw it away. */
if (syn_msg.group0_id() && get_group0_id() && syn_msg.group0_id() != get_group0_id()) {
- logger.warn("Group0Id mismatch from {} {} != {}", from.addr, syn_msg.group0_id(), get_group0_id());
+ logger.warn("Group0Id mismatch from {} {} != {}", from, syn_msg.group0_id(), get_group0_id());
co_return;
}

if (syn_msg.partioner() != "" && syn_msg.partioner() != get_partitioner_name()) {
- logger.warn("Partitioner mismatch from {} {}!={}", from.addr, syn_msg.partioner(), get_partitioner_name());
+ logger.warn("Partitioner mismatch from {} {}!={}", from, syn_msg.partioner(), get_partitioner_name());
co_return;
}

- syn_msg_pending& p = _syn_handlers[from.addr];
+ syn_msg_pending& p = _syn_handlers[from];
if (p.pending) {
// The latest syn message from peer has the latest information, so
// it is safe to drop the previous syn message and keep the latest
@@ -211,10 +211,10 @@ future<> gossiper::handle_syn_msg(msg_addr from, gossip_digest_syn syn_msg) {
for (;;) {
try {
co_await do_send_ack_msg(from, std::move(syn_msg));
- if (!_syn_handlers.contains(from.addr)) {
+ if (!_syn_handlers.contains(from)) {
co_return;
}
- syn_msg_pending& p = _syn_handlers[from.addr];
+ syn_msg_pending& p = _syn_handlers[from];
if (p.syn_msg) {
// Process pending gossip syn msg and send ack msg back
logger.debug("Handle queued gossip syn msg from node {}, syn_msg={}, pending={}",
@@ -231,8 +231,8 @@ future<> gossiper::handle_syn_msg(msg_addr from, gossip_digest_syn syn_msg) {
}
} catch (...) {
auto ep = std::current_exception();
- if (_syn_handlers.contains(from.addr)) {
- syn_msg_pending& p = _syn_handlers[from.addr];
+ if (_syn_handlers.contains(from)) {
+ syn_msg_pending& p = _syn_handlers[from];
p.pending = false;
p.syn_msg = {};
}
@@ -242,7 +242,7 @@ future<> gossiper::handle_syn_msg(msg_addr from, gossip_digest_syn syn_msg) {
}
}

-future<> gossiper::do_send_ack_msg(msg_addr from, gossip_digest_syn syn_msg) {
+future<> gossiper::do_send_ack_msg(locator::host_id from, gossip_digest_syn syn_msg) {
auto g_digest_list = syn_msg.get_gossip_digests();
do_sort(g_digest_list);
utils::chunked_vector<gossip_digest> delta_gossip_digest_list;
@@ -289,7 +289,7 @@ static bool should_count_as_msg_processing(const std::map<inet_address, endpoint
// - on_restart callbacks
// - on_join callbacks
// - on_alive
-future<> gossiper::handle_ack_msg(msg_addr id, gossip_digest_ack ack_msg) {
+future<> gossiper::handle_ack_msg(locator::host_id id, gossip_digest_ack ack_msg) {
logger.trace("handle_ack_msg():from={},msg={}", id, ack_msg);

if (!is_enabled()) {
@@ -316,7 +316,7 @@ future<> gossiper::handle_ack_msg(msg_addr id, gossip_digest_ack ack_msg) {

auto from = id;
auto ack_msg_digest = std::move(g_digest_list);
- ack_msg_pending& p = _ack_handlers[from.addr];
+ ack_msg_pending& p = _ack_handlers[from];
if (p.pending) {
// The latest ack message digests from peer has the latest information, so
// it is safe to drop the previous ack message digests and keep the latest
@@ -332,10 +332,10 @@ future<> gossiper::handle_ack_msg(msg_addr id, gossip_digest_ack ack_msg) {
for (;;) {
try {
co_await do_send_ack2_msg(from, std::move(ack_msg_digest));
- if (!_ack_handlers.contains(from.addr)) {
+ if (!_ack_handlers.contains(from)) {
co_return;
}
- ack_msg_pending& p = _ack_handlers[from.addr];
+ ack_msg_pending& p = _ack_handlers[from];
if (p.ack_msg_digest) {
// Process pending gossip ack msg digests and send ack2 msg back
logger.debug("Handle queued gossip ack msg digests from node {}, ack_msg_digest={}, pending={}",
@@ -352,8 +352,8 @@ future<> gossiper::handle_ack_msg(msg_addr id, gossip_digest_ack ack_msg) {
}
} catch (...) {
auto ep = std::current_exception();
- if (_ack_handlers.contains(from.addr)) {
- ack_msg_pending& p = _ack_handlers[from.addr];
+ if (_ack_handlers.contains(from)) {
+ ack_msg_pending& p = _ack_handlers[from];
p.pending = false;
p.ack_msg_digest = {};
}
@@ -363,7 +363,7 @@ future<> gossiper::handle_ack_msg(msg_addr id, gossip_digest_ack ack_msg) {
}
}

-future<> gossiper::do_send_ack2_msg(msg_addr from, utils::chunked_vector<gossip_digest> ack_msg_digest) {
+future<> gossiper::do_send_ack2_msg(locator::host_id from, utils::chunked_vector<gossip_digest> ack_msg_digest) {
/* Get the state required to send to this gossipee - construct GossipDigestAck2Message */
std::map<inet_address, endpoint_state> delta_ep_state_map;
for (auto g_digest : ack_msg_digest) {
@@ -396,7 +396,7 @@ future<> gossiper::do_send_ack2_msg(msg_addr from, utils::chunked_vector<gossip_
// - on_restart callbacks
// - on_join callbacks
// - on_alive callbacks
-future<> gossiper::handle_ack2_msg(msg_addr from, gossip_digest_ack2 msg) {
+future<> gossiper::handle_ack2_msg(locator::host_id from, gossip_digest_ack2 msg) {
logger.trace("handle_ack2_msg():msg={}", msg);
if (!is_enabled()) {
co_return;
@@ -520,19 +520,19 @@ future<rpc::no_wait_type> gossiper::background_msg(sstring type, noncopyable_fun

void gossiper::init_messaging_service_handler() {
ser::gossip_rpc_verbs::register_gossip_digest_syn(&_messaging, [this] (const rpc::client_info& cinfo, gossip_digest_syn syn_msg) {
- auto from = netw::messaging_service::get_source(cinfo);
+ auto from = cinfo.retrieve_auxiliary<locator::host_id>("host_id");
return background_msg("GOSSIP_DIGEST_SYN", [from, syn_msg = std::move(syn_msg)] (gms::gossiper& gossiper) mutable {
return gossiper.handle_syn_msg(from, std::move(syn_msg));
});
});
ser::gossip_rpc_verbs::register_gossip_digest_ack(&_messaging, [this] (const rpc::client_info& cinfo, gossip_digest_ack msg) {
- auto from = netw::messaging_service::get_source(cinfo);
+ auto from = cinfo.retrieve_auxiliary<locator::host_id>("host_id");
return background_msg("GOSSIP_DIGEST_ACK", [from, msg = std::move(msg)] (gms::gossiper& gossiper) mutable {
return gossiper.handle_ack_msg(from, std::move(msg));
});
});
ser::gossip_rpc_verbs::register_gossip_digest_ack2(&_messaging, [this] (const rpc::client_info& cinfo, gossip_digest_ack2 msg) {
- auto from = netw::messaging_service::get_source(cinfo);
+ auto from = cinfo.retrieve_auxiliary<locator::host_id>("host_id");
return background_msg("GOSSIP_DIGEST_ACK2", [from, msg = std::move(msg)] (gms::gossiper& gossiper) mutable {
return gossiper.handle_ack2_msg(from, std::move(msg));
});
@@ -737,9 +737,9 @@ future<> gossiper::remove_endpoint(inet_address endpoint, permit_id pid) {
was_alive = data.live.erase(host_id);
data.unreachable.erase(host_id);
});
+ _syn_handlers.erase(host_id);
+ _ack_handlers.erase(host_id);
}
- _syn_handlers.erase(endpoint);
- _ack_handlers.erase(endpoint);
quarantine_endpoint(endpoint);
logger.info("Removed endpoint {}", endpoint);

diff --git a/test/manual/message.cc b/test/manual/message.cc
index d32a9891a8d..36ef8453677 100644
--- a/test/manual/message.cc
+++ b/test/manual/message.cc
@@ -39,6 +39,7 @@ class tester {
messaging_service& ms;
gms::inet_address _server;
uint32_t _cpuid;
+ locator::host_id _server_id = locator::host_id{utils::UUID("00000000-0000-1000-0000-000000000001")};
public:
tester(netw::messaging_service& ms_) : ms(ms_) {}
using msg_addr = netw::messaging_service::msg_addr;
@@ -64,7 +65,7 @@ class tester {
ser::gossip_rpc_verbs::register_gossip_digest_syn(&ms, [this] (const rpc::client_info& cinfo, gms::gossip_digest_syn msg) {
test_logger.info("Server got syn msg = {}", msg);

- auto from = netw::messaging_service::get_source(cinfo);
+ auto from = cinfo.retrieve_auxiliary<locator::host_id>("host_id");
auto ep1 = inet_address("1.1.1.1");
auto ep2 = inet_address("2.2.2.2");
gms::generation_type gen(800);
@@ -86,7 +87,7 @@ class tester {

ser::gossip_rpc_verbs::register_gossip_digest_ack(&ms, [this] (const rpc::client_info& cinfo, gms::gossip_digest_ack msg) {
test_logger.info("Server got ack msg = {}", msg);
- auto from = netw::messaging_service::get_source(cinfo);
+ auto from = cinfo.retrieve_auxiliary<locator::host_id>("host_id");
// Prepare gossip_digest_ack2 message
auto ep1 = inet_address("3.3.3.3");
std::map<inet_address, endpoint_state> eps{
@@ -144,10 +145,9 @@ class tester {

future<> test_gossip_shutdown() {
test_logger.info("=== {} ===", __func__);
- auto id = get_msg_addr();
inet_address from("127.0.0.1");
int64_t gen = 0x1;
- return ser::gossip_rpc_verbs::send_gossip_shutdown(&ms, id, from, gen).then([] () {
+ return ser::gossip_rpc_verbs::send_gossip_shutdown(&ms, _server_id, from, gen).then([] () {
test_logger.info("Client sent gossip_shutdown got reply = void");
return make_ready_future<>();
});
@@ -155,9 +155,8 @@ class tester {

future<> test_echo() {
test_logger.info("=== {} ===", __func__);
- auto id = get_msg_addr();
int64_t gen = 0x1;
- return ser::gossip_rpc_verbs::send_gossip_echo(&ms, id, netw::messaging_service::clock_type::now() + std::chrono::seconds(10), gen, false).then_wrapped([] (auto&& f) {
+ return ser::gossip_rpc_verbs::send_gossip_echo(&ms, _server_id, netw::messaging_service::clock_type::now() + std::chrono::seconds(10), gen, false).then_wrapped([] (auto&& f) {
try {
f.get();
return make_ready_future<>();
@@ -212,7 +211,7 @@ int main(int ac, char ** av) {
gossip_address_map.start().get();
seastar::sharded<netw::messaging_service> messaging;

messaging.start(locator::host_id{}, listen, 7000, std::ref(feature_service),
- std::ref(gossip_address_map), std::ref(compressor_tracker),
+ std::ref(gossip_address_map), gms::generation_type{}, std::ref(compressor_tracker),
std::ref(sl_controller)).get();
auto stop_messaging = deferred_stop(messaging);

seastar::sharded<tester> testers;
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:08:55 AMFeb 24

to scylladb-dev@googlegroups.com

Provide default implementation for them instead. Will be easier to rework them later.
---
service/endpoint_lifecycle_subscriber.hh | 8 ++++----
service/qos/service_level_controller.hh | 3 ---
service/storage_proxy.hh | 2 --
service/qos/service_level_controller.cc | 6 ------
service/storage_proxy.cc | 4 ----
service/topology_coordinator.cc | 2 --
6 files changed, 4 insertions(+), 21 deletions(-)

diff --git a/service/endpoint_lifecycle_subscriber.hh b/service/endpoint_lifecycle_subscriber.hh
index 84a00db69bc..59697d5d0ac 100644
--- a/service/endpoint_lifecycle_subscriber.hh
+++ b/service/endpoint_lifecycle_subscriber.hh
@@ -35,7 +35,7 @@ class endpoint_lifecycle_subscriber {
*
* @param endpoint the newly added endpoint.
*/
- virtual void on_join_cluster(const gms::inet_address& endpoint) = 0;
+ virtual void on_join_cluster(const gms::inet_address& endpoint) {}

/**
* Called when a new node leave the cluster (decommission or removeToken).
@@ -43,21 +43,21 @@ class endpoint_lifecycle_subscriber {
* @param endpoint the IP of the endpoint that is leaving.
* @param host_id the host ID of the endpoint that is leaving.
*/
- virtual void on_leave_cluster(const gms::inet_address& endpoint, const locator::host_id& host_id) = 0;
+ virtual void on_leave_cluster(const gms::inet_address& endpoint, const locator::host_id& host_id) {}

/**
* Called when a node is marked UP.
*
* @param endpoint the endpoint marked UP.
*/
- virtual void on_up(const gms::inet_address& endpoint) = 0;
+ virtual void on_up(const gms::inet_address& endpoint) {}

/**
* Called when a node is marked DOWN.
*
* @param endpoint the endpoint marked DOWN.
*/
- virtual void on_down(const gms::inet_address& endpoint) = 0;
+ virtual void on_down(const gms::inet_address& endpoint) {}
};

class endpoint_lifecycle_notifier {
diff --git a/service/qos/service_level_controller.hh b/service/qos/service_level_controller.hh
index 55674e1c76a..e1ebcbf116e 100644
--- a/service/qos/service_level_controller.hh
+++ b/service/qos/service_level_controller.hh
@@ -439,10 +439,7 @@ class service_level_controller : public peering_sharded_service<service_level_co

static sstring default_service_level_name;

- virtual void on_join_cluster(const gms::inet_address& endpoint) override;
virtual void on_leave_cluster(const gms::inet_address& endpoint, const locator::host_id& hid) override;
- virtual void on_up(const gms::inet_address& endpoint) override;
- virtual void on_down(const gms::inet_address& endpoint) override;
};

future<shared_ptr<service_level_controller::service_level_distributed_data_accessor>>
diff --git a/service/storage_proxy.hh b/service/storage_proxy.hh
index a307a4ffb04..3396e1c41d3 100644
--- a/service/storage_proxy.hh
+++ b/service/storage_proxy.hh
@@ -741,9 +741,7 @@ class storage_proxy : public seastar::async_sharded_service<storage_proxy>, publ
return _pending_writes_phaser.advance_and_await();
}

- virtual void on_join_cluster(const gms::inet_address& endpoint) override;
virtual void on_leave_cluster(const gms::inet_address& endpoint, const locator::host_id& hid) override;
- virtual void on_up(const gms::inet_address& endpoint) override;
virtual void on_down(const gms::inet_address& endpoint) override;

friend class abstract_read_executor;
diff --git a/service/qos/service_level_controller.cc b/service/qos/service_level_controller.cc
index c76421f3c71..b965d98e62b 100644
--- a/service/qos/service_level_controller.cc
+++ b/service/qos/service_level_controller.cc
@@ -891,8 +891,6 @@ future<> service_level_controller::do_remove_service_level(sstring name, bool re
return make_ready_future();
}

-void service_level_controller::on_join_cluster(const gms::inet_address& endpoint) { }
-
void service_level_controller::on_leave_cluster(const gms::inet_address& endpoint, const locator::host_id& hid) {
if (this_shard_id() == global_controller && _token_metadata.get()->get_topology().is_me(hid)) {
_global_controller_db->dist_data_update_aborter.request_abort();
@@ -900,10 +898,6 @@ void service_level_controller::on_leave_cluster(const gms::inet_address& endpoin
}
}

-void service_level_controller::on_up(const gms::inet_address& endpoint) { }
-
-void service_level_controller::on_down(const gms::inet_address& endpoint) { }
-
void service_level_controller::register_subscriber(qos_configuration_change_subscriber* subscriber) {
_subscribers.add(subscriber);
}
diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc
index fc219b3dcff..48f85c1f261 100644
--- a/service/storage_proxy.cc
+++ b/service/storage_proxy.cc
@@ -6898,16 +6898,12 @@ future<> storage_proxy::wait_for_hint_sync_point(const db::hints::sync_point spo
co_return;
}

-void storage_proxy::on_join_cluster(const gms::inet_address& endpoint) {};
-
void storage_proxy::on_leave_cluster(const gms::inet_address& endpoint, const locator::host_id& hid) {
// Discarding these futures is safe. They're awaited by db::hints::manager::stop().
(void) _hints_manager.drain_for(hid, endpoint);
(void) _hints_for_views_manager.drain_for(hid, endpoint);
}

-void storage_proxy::on_up(const gms::inet_address& endpoint) {};
-
void storage_proxy::cancel_write_handlers(noncopyable_function<bool(const abstract_write_response_handler&)> filter_fun) {
SCYLLA_ASSERT(thread::running_in_thread());
auto it = _cancellable_write_handlers_list->begin();
diff --git a/service/topology_coordinator.cc b/service/topology_coordinator.cc
index 877b7fad810..9dd8d6251e3 100644
--- a/service/topology_coordinator.cc
+++ b/service/topology_coordinator.cc
@@ -2870,8 +2870,6 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
future<> run();
future<> stop();

- virtual void on_join_cluster(const gms::inet_address& endpoint) {}
- virtual void on_leave_cluster(const gms::inet_address& endpoint, const locator::host_id& hid) {};
virtual void on_up(const gms::inet_address& endpoint) { _topo_sm.event.broadcast(); };
virtual void on_down(const gms::inet_address& endpoint) { _topo_sm.event.broadcast(); };
};
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:08:55 AMFeb 24

to scylladb-dev@googlegroups.com

Index live and dead endpoints by host id. It also allows to simplify
some code that does a translation.
---
gms/gossiper.hh | 24 ++---
api/gossiper.cc | 2 +-
db/hints/manager.cc | 50 +----------
dht/boot_strapper.cc | 2 +-
gms/gossiper.cc | 177 ++++++++++++++++++-------------------
service/storage_service.cc | 6 +-
6 files changed, 107 insertions(+), 154 deletions(-)

diff --git a/gms/gossiper.hh b/gms/gossiper.hh
index 7d4f2657cb8..38c2d5796a2 100644
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -222,17 +222,17 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
*/
atomic_vector<shared_ptr<i_endpoint_state_change_subscriber>> _subscribers;

- std::list<std::vector<inet_address>> _endpoints_to_talk_with;
+ std::list<std::vector<locator::host_id>> _endpoints_to_talk_with;

/* live member set */
- std::unordered_set<inet_address> _live_endpoints;
+ std::unordered_set<locator::host_id> _live_endpoints;
uint64_t _live_endpoints_version = 0;

/* nodes are being marked as alive */
std::unordered_set<inet_address> _pending_mark_alive_endpoints;

/* unreachable member set */
- std::unordered_map<inet_address, clk::time_point> _unreachable_endpoints;
+ std::unordered_map<locator::host_id, clk::time_point> _unreachable_endpoints;

semaphore _endpoint_update_semaphore = semaphore(1);

@@ -255,8 +255,8 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
future<semaphore_units<>> lock_endpoint_update_semaphore();

struct live_and_unreachable_endpoints {
- std::unordered_set<inet_address> live;
- std::unordered_map<inet_address, clk::time_point> unreachable;
+ std::unordered_set<locator::host_id> live;
+ std::unordered_map<locator::host_id, clk::time_point> unreachable;
};

// Must be called on shard 0.
@@ -302,15 +302,14 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
/**
* @return a list of unreachable gossip participants, including fat clients
*/
- std::set<inet_address> get_unreachable_members() const;
- std::set<locator::host_id> get_unreachable_host_ids() const;
+ std::set<locator::host_id> get_unreachable_members() const;

/**
* @return a list of unreachable nodes
*/
std::set<locator::host_id> get_unreachable_nodes() const;

- int64_t get_endpoint_downtime(inet_address ep) const noexcept;
+ int64_t get_endpoint_downtime(locator::host_id ep) const noexcept;

/**
* Return either: the greatest heartbeat or application state
@@ -410,10 +409,12 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
* @param epSet a set of endpoint from which a random endpoint is chosen.
* @return true if the chosen endpoint is also a seed.
*/
- future<> send_gossip(gossip_digest_syn message, std::set<inet_address> epset);
+ template<typename T>
+ future<> send_gossip(gossip_digest_syn message, std::set<T> epset);

/* Sends a Gossip message to a live member */
- future<> do_gossip_to_live_member(gossip_digest_syn message, inet_address ep);
+ template<typename T>
+ future<> do_gossip_to_live_member(gossip_digest_syn message, T ep);

/* Sends a Gossip message to an unreachable member */
future<> do_gossip_to_unreachable_member(gossip_digest_syn message);
@@ -521,7 +522,6 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
future<> wait_alive(std::vector<gms::inet_address> nodes, std::chrono::milliseconds timeout);
future<> wait_alive(std::vector<locator::host_id> nodes, std::chrono::milliseconds timeout);
future<> wait_alive(noncopyable_function<std::vector<locator::host_id>()> get_nodes, std::chrono::milliseconds timeout);
- std::set<inet_address> get_live_members_helper() const;

// Wait for `n` live nodes to show up in gossip (including ourself).
future<> wait_for_live_nodes_to_show_up(size_t n);
@@ -703,7 +703,7 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
}
private:
future<> failure_detector_loop();
- future<> failure_detector_loop_for_node(gms::inet_address node, generation_type gossip_generation, uint64_t live_endpoints_version);
+ future<> failure_detector_loop_for_node(locator::host_id node, generation_type gossip_generation, uint64_t live_endpoints_version);
};

diff --git a/api/gossiper.cc b/api/gossiper.cc
index 4826c2f3eb3..e5fcd2e1076 100644
--- a/api/gossiper.cc
+++ b/api/gossiper.cc
@@ -35,7 +35,7 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {

gms::inet_address ep(req->get_path_param("addr"));

// synchronize unreachable_members on all shards
co_await g.get_unreachable_members_synchronized();
- co_return g.get_endpoint_downtime(ep);
+ co_return g.get_endpoint_downtime(g.get_host_id(ep));
});

httpd::gossiper_json::get_current_generation_number.set(r, [&g] (std::unique_ptr<http::request> req) {
diff --git a/db/hints/manager.cc b/db/hints/manager.cc
index 77ca698b2ae..be6eb99a0f7 100644
--- a/db/hints/manager.cc
+++ b/db/hints/manager.cc
@@ -461,59 +461,13 @@ bool manager::store_hint(endpoint_id host_id, schema_ptr s, lw_shared_ptr<const
}
}

-/// Checks if there is a node corresponding to a given host ID that hasn't been down for longer
-/// than a given amount of time. The function relies on information obtained from the passed `gms::gossiper`.
-static bool endpoint_downtime_not_bigger_than(const gms::gossiper& gossiper, const locator::host_id& host_id,
- uint64_t max_downtime_us)
-{
- // We want to enforce small buffer optimization in the call
- // to `gms::gossiper::for_each_endpoint_state_until()` below
- // to avoid an unnecessary allocation.
- // Since we need all these four pieces of information in the lambda,
- // the function object passed to the function might be too big.
- // That's why we create it locally on the stack and only pass a reference to it.
- struct sbo_info {
- locator::host_id host_id;
- const gms::gossiper& gossiper;
- int64_t max_hint_window_us;
- bool small_node_downtime;
- };
-
- sbo_info info {
- .host_id = host_id,
- .gossiper = gossiper,
- .max_hint_window_us = max_downtime_us,
- .small_node_downtime = false
- };
-
- gossiper.for_each_endpoint_state_until(
- [&info] (const gms::inet_address& ip, const gms::endpoint_state& state) {
- const auto* app_state = state.get_application_state_ptr(gms::application_state::HOST_ID);
- if (!app_state) {
- manager_logger.error("Host ID application state for {} has not been found. Endpoint state: {}", ip, state);
- return stop_iteration::no;
- }
- const auto host_id = locator::host_id{utils::UUID{app_state->value()}};
- if (host_id != info.host_id) {
- return stop_iteration::no;
- }
- if (info.gossiper.get_endpoint_downtime(ip) <= info.max_hint_window_us) {
- info.small_node_downtime = true;
- return stop_iteration::yes;
- }
- return stop_iteration::no;
- });
-
- return info.small_node_downtime;
-}
-
bool manager::too_many_in_flight_hints_for(endpoint_id ep) const noexcept {
// There is no need to check the DC here because if there is an in-flight hint for this
// endpoint, then this means that its DC has already been checked and found to be ok.
return _stats.size_of_hints_in_progress > max_size_of_hints_in_progress()
&& !_proxy.local_db().get_token_metadata().get_topology().is_me(ep)
&& hints_in_progress_for(ep) > 0
- && endpoint_downtime_not_bigger_than(local_gossiper(), ep, _max_hint_window_us);
+ && local_gossiper().get_endpoint_downtime(ep) <= _max_hint_window_us;
}

bool manager::can_hint_for(endpoint_id ep) const noexcept {
@@ -548,7 +502,7 @@ bool manager::can_hint_for(endpoint_id ep) const noexcept {
return false;
}

- const bool node_is_alive = endpoint_downtime_not_bigger_than(local_gossiper(), ep, _max_hint_window_us);
+ const bool node_is_alive = local_gossiper().get_endpoint_downtime(ep) <= _max_hint_window_us;
if (!node_is_alive) {
manager_logger.trace("{} has been down for too long, not hinting", ep);
return false;
diff --git a/dht/boot_strapper.cc b/dht/boot_strapper.cc
index 5656634a86f..05836917c1a 100644
--- a/dht/boot_strapper.cc
+++ b/dht/boot_strapper.cc
@@ -42,7 +42,7 @@ future<> boot_strapper::bootstrap(streaming::stream_reason reason, gms::gossiper
}
try {
auto streamer = make_lw_shared<range_streamer>(_db, _stream_manager, _token_metadata_ptr, _abort_source, _tokens, _address, _dr, description, reason, topo_guard);
- auto nodes_to_filter = gossiper.get_unreachable_host_ids();
+ auto nodes_to_filter = gossiper.get_unreachable_members();
if (reason == streaming::stream_reason::replace) {
nodes_to_filter.insert(std::move(replace_address));
}
diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index af36f599ee5..56f91f27dcd 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -24,6 +24,7 @@
#include "db/system_keyspace.hh"
#include <fmt/chrono.h>
#include <fmt/ranges.h>
+#include <ranges>
#include <seastar/core/sleep.hh>
#include <seastar/core/thread.hh>
#include <seastar/core/metrics.hh>
@@ -558,8 +559,9 @@ future<> gossiper::uninit_messaging_service_handler() {
return ser::gossip_rpc_verbs::unregister(&ms);
}

-future<> gossiper::send_gossip(gossip_digest_syn message, std::set<inet_address> epset) {
- utils::chunked_vector<inet_address> __live_endpoints(epset.begin(), epset.end());
+template<typename T>
+future<> gossiper::send_gossip(gossip_digest_syn message, std::set<T> epset) {
+ utils::chunked_vector<T> __live_endpoints(epset.begin(), epset.end());
size_t size = __live_endpoints.size();
if (size < 1) {
return make_ready_future<>();
@@ -567,8 +569,7 @@ future<> gossiper::send_gossip(gossip_digest_syn message, std::set<inet_address>
/* Generate a random number from 0 -> size */
std::uniform_int_distribution<int> dist(0, size - 1);
int index = dist(_random_engine);
- inet_address to = __live_endpoints[index];
- auto id = get_msg_addr(to);
+ std::conditional_t<std::is_same_v<T, gms::inet_address>, netw::msg_addr, T> id{__live_endpoints[index]};
logger.trace("Sending a GossipDigestSyn to {} ...", id);
return ser::gossip_rpc_verbs::send_gossip_digest_syn(&_messaging, id, std::move(message)).handle_exception([id] (auto ep) {
// It is normal to reach here because it is normal that a node
@@ -720,17 +721,29 @@ future<> gossiper::remove_endpoint(inet_address endpoint, permit_id pid) {

auto state = get_endpoint_state_ptr(endpoint);

+ if (!state) {
+ logger.warn("There is no state for the removed IP {}", endpoint);
+ co_return;
+ }
+
+ auto host_id = state->get_host_id();
+
bool was_alive = false;
- co_await mutate_live_and_unreachable_endpoints([endpoint, &was_alive] (live_and_unreachable_endpoints& data) {
- was_alive = data.live.erase(endpoint);
- data.unreachable.erase(endpoint);
- });
+
+ if (_address_map.find(host_id) == endpoint) {
+ // During IP address change we may have a situation where we work on old address
+ // but there is a new address for the same host id, so no need to mark host id as down
+ co_await mutate_live_and_unreachable_endpoints([host_id, &was_alive] (live_and_unreachable_endpoints& data) {
+ was_alive = data.live.erase(host_id);
+ data.unreachable.erase(host_id);
+ });
+ }
_syn_handlers.erase(endpoint);

_ack_handlers.erase(endpoint);
quarantine_endpoint(endpoint);
logger.info("Removed endpoint {}", endpoint);

- if (was_alive && state) {
+ if (was_alive) {
try {
logger.info("InetAddress {}/{} is now DOWN, status = {}", state->get_host_id(), endpoint, get_gossip_status(*state));
co_await do_on_dead_notifications(endpoint, std::move(state), pid);
@@ -930,7 +943,7 @@ future<std::set<inet_address>> gossiper::get_live_members_synchronized() {
return container().invoke_on(0, [] (gms::gossiper& g) -> future<std::set<inet_address>> {
// Make sure the value we return is synchronized on all shards
auto lock = co_await g.lock_endpoint_update_semaphore();
- co_return g.get_live_members_helper();
+ co_return g.get_live_members() | std::views::transform([&g] (auto id) { return g._address_map.get(id); }) | std::ranges::to<std::set>();
});
}

@@ -938,17 +951,16 @@ future<std::set<inet_address>> gossiper::get_unreachable_members_synchronized()
return container().invoke_on(0, [] (gms::gossiper& g) -> future<std::set<inet_address>> {
// Make sure the value we return is synchronized on all shards
auto lock = co_await g.lock_endpoint_update_semaphore();
- co_return g.get_unreachable_members();
+ co_return g.get_unreachable_members() | std::views::transform([&g] (auto id) { return g._address_map.get(id); }) | std::ranges::to<std::set>();
});
}

-future<> gossiper::failure_detector_loop_for_node(gms::inet_address node, generation_type gossip_generation, uint64_t live_endpoints_version) {
+future<> gossiper::failure_detector_loop_for_node(locator::host_id host_id, generation_type gossip_generation, uint64_t live_endpoints_version) {
auto last = gossiper::clk::now();
auto diff = gossiper::clk::duration(0);
auto echo_interval = std::chrono::seconds(2);
auto max_duration = echo_interval + std::chrono::milliseconds(_gcfg.failure_detector_timeout_ms());
- auto host_id = get_host_id(node);
-
+ auto node = _address_map.get(host_id);
while (is_enabled()) {
bool failed = false;
try {
@@ -1000,7 +1012,7 @@ future<> gossiper::failure_detector_loop() {
co_await sleep_abortable(std::chrono::seconds(1), _abort_source);
continue;
}
- auto nodes = _live_endpoints | std::ranges::to<std::vector<inet_address>>();
+ auto nodes = _live_endpoints | std::ranges::to<std::vector>();
auto live_endpoints_version = _live_endpoints_version;
auto generation_number = my_endpoint_state().get_heart_beat_state().get_generation();
co_await coroutine::parallel_for_each(std::views::iota(0u, nodes.size()), [this, generation_number, live_endpoints_version, &nodes] (size_t idx) {
@@ -1091,7 +1103,7 @@ void gossiper::run() {

gossip_digest_syn message(get_cluster_name(), get_partitioner_name(), g_digests, get_group0_id());

if (_endpoints_to_talk_with.empty() && !_live_endpoints.empty()) {
- auto live_endpoints = _live_endpoints | std::ranges::to<std::vector<inet_address>>();
+ auto live_endpoints = _live_endpoints | std::ranges::to<std::vector>();

std::shuffle(live_endpoints.begin(), live_endpoints.end(), _random_engine);
// This guarantees the local node will talk with all nodes
// in live_endpoints at least once within nr_rounds gossip rounds.

@@ -1099,17 +1111,11 @@ void gossiper::run() {
// https://www.cs.cornell.edu/projects/Quicksilver/public_pdfs/SWIM.pdf

constexpr size_t nr_rounds = 10;
size_t nodes_per_round = (live_endpoints.size() + nr_rounds - 1) / nr_rounds;

- _endpoints_to_talk_with = live_endpoints | std::views::chunk(nodes_per_round) | std::ranges::to<std::list<std::vector<inet_address>>>();
+ _endpoints_to_talk_with = live_endpoints | std::views::chunk(nodes_per_round) | std::ranges::to<std::list<std::vector<locator::host_id>>>();

logger.debug("Set live nodes to talk: endpoint_state_map={}, all_live_nodes={}, endpoints_to_talk_with={}",
_endpoint_state_map.size(), live_endpoints, _endpoints_to_talk_with);
}

- if (_endpoints_to_talk_with.empty()) {
- auto nodes = std::vector<inet_address>(_seeds.begin(), _seeds.end());
- logger.debug("No live nodes yet: try initial contact point nodes={}", nodes);
- if (!nodes.empty()) {
- _endpoints_to_talk_with.push_back(std::move(nodes));
- }
- }
+
if (!_endpoints_to_talk_with.empty()) {
auto live_nodes = std::move(_endpoints_to_talk_with.front());
_endpoints_to_talk_with.pop_front();
@@ -1121,6 +1127,15 @@ void gossiper::run() {
});
});
}
+ } else if (!_seeds.empty()) {
+ logger.debug("No live nodes yet: try initial contact point nodes={}", _seeds);
+ for (auto& ep: _seeds) {
+ (void)with_gate(_background_msg, [this, message, ep] () mutable {
+ return do_gossip_to_live_member(message, ep).handle_exception([] (auto ep) {
+ logger.trace("Failed to send gossip to live members: {}", ep);
+ });
+ });
+ }
} else {
logger.debug("No one to talk with");
}
@@ -1170,21 +1185,17 @@ future<> gossiper::unregister_(shared_ptr<i_endpoint_state_change_subscriber> su
return _subscribers.remove(subscriber);
}

-std::set<inet_address> gossiper::get_live_members_helper() const {
- std::set<inet_address> live_members(_live_endpoints.begin(), _live_endpoints.end());
+std::set<locator::host_id> gossiper::get_live_members() const {
+ std::set<locator::host_id> live_members(_live_endpoints.begin(), _live_endpoints.end());
auto myip = get_broadcast_address();
logger.debug("live_members before={}", live_members);
if (!is_shutdown(myip)) {
- live_members.insert(myip);
+ live_members.insert(my_host_id());
}
logger.debug("live_members after={}", live_members);
return live_members;
}

-std::set<locator::host_id> gossiper::get_live_members() const {
- return get_live_members_helper() | std::views::transform([this] (inet_address ip) { return get_host_id(ip); }) | std::ranges::to<std::set>();
-}
-
std::set<locator::host_id> gossiper::get_live_token_owners() const {
std::set<locator::host_id> token_owners;
auto normal_token_owners = get_token_metadata_ptr()->get_normal_token_owners();
@@ -1208,7 +1219,7 @@ std::set<locator::host_id> gossiper::get_unreachable_nodes() const {
}

// Return downtime in microseconds
-int64_t gossiper::get_endpoint_downtime(inet_address ep) const noexcept {
+int64_t gossiper::get_endpoint_downtime(locator::host_id ep) const noexcept {
auto it = _unreachable_endpoints.find(ep);
if (it != _unreachable_endpoints.end()) {
auto& downtime = it->second;
@@ -1234,19 +1245,8 @@ future<> gossiper::convict(inet_address endpoint) {
}
}

-std::set<inet_address> gossiper::get_unreachable_members() const {
- std::set<inet_address> ret;
- for (auto&& x : _unreachable_endpoints) {
- ret.insert(x.first);
- }
- return ret;
-}
-
-std::set<locator::host_id> gossiper::get_unreachable_host_ids() const {
- return get_unreachable_members() |
- std::views::transform([this] (gms::inet_address ip) { return get_host_id(ip); }) |
- std::ranges::to<std::set>();
-
+std::set<locator::host_id> gossiper::get_unreachable_members() const {
+ return _unreachable_endpoints | std::views::keys | std::ranges::to<std::set>();
}

version_type gossiper::get_max_endpoint_state_version(const endpoint_state& state) const noexcept {
@@ -1260,14 +1260,18 @@ version_type gossiper::get_max_endpoint_state_version(const endpoint_state& stat

future<> gossiper::evict_from_membership(inet_address endpoint, permit_id pid) {
verify_permit(endpoint, pid);
- co_await mutate_live_and_unreachable_endpoints([endpoint] (live_and_unreachable_endpoints& data) {
- data.unreachable.erase(endpoint);
- data.live.erase(endpoint);
- });
+ auto hid = get_host_id(endpoint);
+ if (_address_map.find(hid) == endpoint) {
+ // During IP address change we may have a situation where we work on old address
+ // but there is a new address for the same host id, so no need to mark host id as down
+ co_await mutate_live_and_unreachable_endpoints([hid] (live_and_unreachable_endpoints& data) {
+ data.unreachable.erase(hid);
+ data.live.erase(hid);
+ });
+ }

- co_await container().invoke_on_all([endpoint] (auto& g) {
+ co_await container().invoke_on_all([endpoint, hid] (auto& g) {
if (this_shard_id() == 0) {
- auto hid = g.get_endpoint_state_ptr(endpoint)->get_host_id();
if (g._address_map.find(hid) == endpoint) {
// During IP address change we may have a situation where we remove old gossiper state
// but there is a new address for the same host id, so no need to make it expiring
@@ -1435,8 +1439,9 @@ future<version_type> gossiper::get_current_heart_beat_version(inet_address endpo
});
}

-future<> gossiper::do_gossip_to_live_member(gossip_digest_syn message, gms::inet_address ep) {
- return send_gossip(message, {ep});
+template<typename T>
+future<> gossiper::do_gossip_to_live_member(gossip_digest_syn message, T ep) {
+ return send_gossip<T>(message, {ep});
}

future<> gossiper::do_gossip_to_unreachable_member(gossip_digest_syn message) {
@@ -1448,10 +1453,10 @@ future<> gossiper::do_gossip_to_unreachable_member(gossip_digest_syn message) {
std::uniform_real_distribution<double> dist(0, 1);
double rand_dbl = dist(_random_engine);
if (rand_dbl < prob) {
- std::set<inet_address> addrs;
+ std::set<locator::host_id> addrs;
for (auto&& x : _unreachable_endpoints) {
// Ignore the node which is decommissioned
- if (get_gossip_status(x.first) != sstring(versioned_value::STATUS_LEFT)) {
+ if (get_gossip_status(_address_map.get(x.first)) != sstring(versioned_value::STATUS_LEFT)) {
addrs.insert(x.first);
}
}
@@ -1751,7 +1756,7 @@ future<> gossiper::real_mark_alive(inet_address addr) {

logger.debug("removing expire time for endpoint : {}", addr);
bool was_live = false;
- co_await mutate_live_and_unreachable_endpoints([addr, &was_live] (live_and_unreachable_endpoints& data) {
+ co_await mutate_live_and_unreachable_endpoints([addr = es->get_host_id(), &was_live] (live_and_unreachable_endpoints& data) {
data.unreachable.erase(addr);
auto [it_, inserted] = data.live.insert(addr);
was_live = !inserted;
@@ -1762,9 +1767,9 @@ future<> gossiper::real_mark_alive(inet_address addr) {
}

if (_endpoints_to_talk_with.empty()) {
- _endpoints_to_talk_with.push_back({addr});
+ _endpoints_to_talk_with.push_back({es->get_host_id()});
} else {
- _endpoints_to_talk_with.front().push_back(addr);
+ _endpoints_to_talk_with.front().push_back(es->get_host_id());
}

logger.info("InetAddress {}/{} is now UP, status = {}", es->get_host_id(), addr, status);
@@ -1778,10 +1783,15 @@ future<> gossiper::real_mark_alive(inet_address addr) {
future<> gossiper::mark_dead(inet_address addr, endpoint_state_ptr state, permit_id pid) {
logger.trace("marking as down {}", addr);
verify_permit(addr, pid);
- co_await mutate_live_and_unreachable_endpoints([addr] (live_and_unreachable_endpoints& data) {
- data.live.erase(addr);
- data.unreachable[addr] = now();
- });
+
+ if (_address_map.find(state->get_host_id()) == addr) {
+ // During IP address change we may have a situation where we work on old address
+ // but there is a new address for the same host id, so no need to mark host id as down
+ co_await mutate_live_and_unreachable_endpoints([addr = state->get_host_id()] (live_and_unreachable_endpoints& data) {
+ data.live.erase(addr);
+ data.unreachable[addr] = now();
+ });
+ }
logger.info("InetAddress {}/{} is now DOWN, status = {}", state->get_host_id(), addr, get_gossip_status(*state));
co_await do_on_dead_notifications(addr, std::move(state), pid);
}
@@ -2207,7 +2217,7 @@ future<> gossiper::add_saved_endpoint(locator::host_id host_id, gms::loaded_endp
}
auto generation = ep_state.get_heart_beat_state().get_generation();
co_await replicate(ep, std::move(ep_state), permit.id());
- _unreachable_endpoints[ep] = now();
+ _unreachable_endpoints[host_id] = now();
logger.trace("Adding saved endpoint {} {}", ep, generation);
}

@@ -2290,8 +2300,7 @@ future<> gossiper::do_stop_gossiping() {
logger.info("Announcing shutdown");

co_await add_local_application_state(application_state::STATUS, versioned_value::shutdown(true));
auto live_endpoints = _live_endpoints;

- for (inet_address addr : live_endpoints) {
- auto id = get_host_id(addr);
+ for (locator::host_id id : live_endpoints) {

logger.info("Sending a GossipShutdown to {} with generation {}", id, local_generation);
try {
co_await ser::gossip_rpc_verbs::send_gossip_shutdown(&_messaging, id, get_broadcast_address(), local_generation.value());

@@ -2366,41 +2375,31 @@ bool gossiper::is_alive(inet_address ep) const {
if (ep == get_broadcast_address()) {
return true;
}
- bool is_alive = _live_endpoints.contains(ep);

-#ifndef SCYLLA_BUILD_MODE_RELEASE
- // Live endpoints must always have a valid endpoint_state.
- // Verify that in testing mode to reduce the overhead in production.
- if (is_alive && !get_endpoint_state_ptr(ep)) {
- on_internal_error(logger, fmt::format("Node {} is alive but has no endpoint state", ep));
+ auto sptr = get_endpoint_state_ptr(ep);
+ if (!sptr) {
+ return false;
}
-#endif

- return is_alive;
+ return _live_endpoints.contains(sptr->get_host_id());

}

bool gossiper::is_alive(locator::host_id id) const {

- auto ip_opt = _address_map.find(id);
-
- if (!ip_opt) {
- // if host ID is not in the gossiper state (and hence not in the address map) it is dead
- return false;
+ if (id == my_host_id()) {
+ return true;
}

- auto ep = get_endpoint_state_ptr(*ip_opt);
- if (!ep) {
- // _address_map may have stale entry since we rely on gc to remove entries there
- // FIXME: add function to address_map to remove immediately
- return false;
- }
+ bool is_alive = _live_endpoints.contains(id);

- if (id != ep->get_host_id()) {
- // If IDs do not match it means that the node with provided ID was replaced
- // with a new node with same IP address and hence it is dead
- return false;
+#ifndef SCYLLA_BUILD_MODE_RELEASE
+ // Live endpoints must always have a valid endpoint_state.
+ // Verify that in testing mode to reduce the overhead in production.
+ if (is_alive && !get_endpoint_state_ptr(id)) {
+ on_internal_error(logger, fmt::format("Node {} is alive but has no endpoint state", id));
}
+#endif

- return is_alive(*ip_opt);
+ return is_alive;
}

template<typename ID>
diff --git a/service/storage_service.cc b/service/storage_service.cc
index 47a43037b65..14a1e79ba2c 100644
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -4865,7 +4865,7 @@ future<> storage_service::rebuild(utils::optional_param source_dc) {
} else {
auto streamer = make_lw_shared<dht::range_streamer>(ss._db, ss._stream_manager, tmptr, ss._abort_source,
tmptr->get_my_id(), ss._snitch.local()->get_location(), "Rebuild", streaming::stream_reason::rebuild, null_topology_guard);
- streamer->add_source_filter(std::make_unique<dht::range_streamer::failure_detector_source_filter>(ss._gossiper.get_unreachable_host_ids()));
+ streamer->add_source_filter(std::make_unique<dht::range_streamer::failure_detector_source_filter>(ss._gossiper.get_unreachable_members()));
if (source_dc) {
streamer->add_source_filter(std::make_unique<dht::range_streamer::single_datacenter_filter>(*source_dc));
}
@@ -5764,7 +5764,7 @@ future<raft_topology_cmd_result> storage_service::raft_topology_cmd_handler(raft
} else {
auto streamer = make_lw_shared<dht::range_streamer>(_db, _stream_manager, tmptr, _abort_source,
tmptr->get_my_id(), _snitch.local()->get_location(), "Rebuild", streaming::stream_reason::rebuild, _topology_state_machine._topology.session);
- streamer->add_source_filter(std::make_unique<dht::range_streamer::failure_detector_source_filter>(_gossiper.get_unreachable_host_ids()));
+ streamer->add_source_filter(std::make_unique<dht::range_streamer::failure_detector_source_filter>(_gossiper.get_unreachable_members()));
if (source_dc != "") {
streamer->add_source_filter(std::make_unique<dht::range_streamer::single_datacenter_filter>(source_dc));
}
@@ -6130,7 +6130,7 @@ future<> storage_service::stream_tablet(locator::global_tablet_id tablet) {
std::move(tables));
tm = nullptr;
streamer->add_source_filter(std::make_unique<dht::range_streamer::failure_detector_source_filter>(
- _gossiper.get_unreachable_host_ids()));
+ _gossiper.get_unreachable_members()));

std::unordered_map<locator::host_id, dht::token_range_vector> ranges_per_endpoint;
for (auto r: streaming_info.read_from) {
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:08:56 AMFeb 24

to scylladb-dev@googlegroups.com

Do not iterate over all client indexed by hos id to search for those
with given IP. Look up by host id directly since now we know it in down
notification. In cases host id is not known look it up by ip.
---
message/messaging_service.hh | 2 +-
api/messaging_service.cc | 2 +-
message/messaging_service.cc | 14 ++++++--------
service/storage_service.cc | 2 +-
4 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/message/messaging_service.hh b/message/messaging_service.hh
index 17a382949a4..ae6ea7bab07 100644
--- a/message/messaging_service.hh
+++ b/message/messaging_service.hh
@@ -472,7 +472,7 @@ class messaging_service : public seastar::async_sharded_service<messaging_servic
void remove_error_rpc_client(messaging_verb verb, msg_addr id);
void remove_error_rpc_client(messaging_verb verb, locator::host_id id);
void remove_rpc_client_with_ignored_topology(msg_addr id, locator::host_id hid);
- void remove_rpc_client(msg_addr id);
+ void remove_rpc_client(msg_addr id, std::optional<locator::host_id> hid);
connection_drop_registration_t when_connection_drops(connection_drop_slot_t& slot) {
return _connection_dropped.connect(slot);
}
diff --git a/api/messaging_service.cc b/api/messaging_service.cc
index 3fc38b127c0..62b1d85a5ea 100644
--- a/api/messaging_service.cc
+++ b/api/messaging_service.cc
@@ -148,7 +148,7 @@ void set_messaging_service(http_context& ctx, routes& r, sharded<netw::messaging
hf::inject_disconnect.set(r, [&ms] (std::unique_ptr<request> req) -> future<json::json_return_type> {
auto ip = msg_addr(req->get_path_param("ip"));
co_await ms.invoke_on_all([ip] (netw::messaging_service& ms) {
- ms.remove_rpc_client(ip);
+ ms.remove_rpc_client(ip, std::nullopt);
});
co_return json::json_void();

});
diff --git a/message/messaging_service.cc b/message/messaging_service.cc

index fdd57f895c0..5656fdee95e 100644
--- a/message/messaging_service.cc
+++ b/message/messaging_service.cc
@@ -993,7 +993,7 @@ void messaging_service::cache_preferred_ip(gms::inet_address ep, gms::inet_addre
// _preferred_ip_cache so that they reopen with the preferred IPs we've
// just read.
//
- remove_rpc_client(msg_addr(ep));
+ remove_rpc_client(msg_addr(ep), std::nullopt);
}

void messaging_service::init_feature_listeners() {
@@ -1221,17 +1221,15 @@ void messaging_service::remove_error_rpc_client(messaging_verb verb, locator::ho

// Removes client to id.addr in both _client and _clients_with_host_id
// FIXME: make removing from _clients_with_host_id more efficient
-void messaging_service::remove_rpc_client(msg_addr id) {
+void messaging_service::remove_rpc_client(msg_addr id, std::optional<locator::host_id> hid) {
for (auto& c : _clients) {
find_and_remove_client(c, id, [] (const auto&) { return true; });
}
+ if (!hid) {
+ hid = _address_to_host_id_mapper(id.addr);
+ }
for (auto& c : _clients_with_host_id) {
- for (auto it = c.begin(); it != c.end();) {
- auto& [hid, si] = *it++;
- if (id.addr == si.endpoint) {
- find_and_remove_client(c, hid, [] (const auto&) { return true; });
- }
- }
+ find_and_remove_client(c, *hid, [] (const auto&) { return true; });
}
}

diff --git a/service/storage_service.cc b/service/storage_service.cc
index 85b05a49228..bcfebb56061 100644
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -7330,7 +7330,7 @@ future<> endpoint_lifecycle_notifier::notify_down(gms::inet_address endpoint, lo

future<> storage_service::notify_down(inet_address endpoint, locator::host_id hid) {
co_await container().invoke_on_all([endpoint, hid] (auto&& ss) {
- ss._messaging.local().remove_rpc_client(netw::msg_addr{endpoint, 0});
+ ss._messaging.local().remove_rpc_client(netw::msg_addr{endpoint, 0}, hid);
return ss._lifecycle_notifier.notify_down(endpoint, hid);
});
slogger.debug("Notify node {}/{} has been down", endpoint, hid);
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:08:57 AMFeb 24

to scylladb-dev@googlegroups.com

Index _expire_time_endpoint_map map by host id instead of ip
---
gms/gossiper.hh | 6 +++---
service/storage_service.hh | 2 +-
gms/gossiper.cc | 27 ++++++++++++++-------------
service/storage_service.cc | 6 +++---
4 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/gms/gossiper.hh b/gms/gossiper.hh
index a9cd760bcd5..61f12fdd783 100644
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -244,7 +244,7 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
* gossip gets propagated to all nodes */
std::map<locator::host_id, clk::time_point> _just_removed_endpoints;

- std::map<inet_address, clk::time_point> _expire_time_endpoint_map;
+ std::map<locator::host_id, clk::time_point> _expire_time_endpoint_map;

bool _in_shadow_round = false;

@@ -419,7 +419,7 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
future<> do_status_check();

public:
- clk::time_point get_expire_time_for_endpoint(inet_address endpoint) const noexcept;
+ clk::time_point get_expire_time_for_endpoint(locator::host_id endpoint) const noexcept;

// Gets a shared pointer to the endpoint_state, if exists.
// Otherwise, returns a null ptr.
@@ -639,7 +639,7 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
bool is_enabled() const;

public:
- void add_expire_time_for_endpoint(inet_address endpoint, clk::time_point expire_time);
+ void add_expire_time_for_endpoint(locator::host_id endpoint, clk::time_point expire_time);

static clk::time_point compute_expire_time();
public:
diff --git a/service/storage_service.hh b/service/storage_service.hh
index c6d171ec12b..7642e793bc3 100644
--- a/service/storage_service.hh
+++ b/service/storage_service.hh
@@ -583,7 +583,7 @@ class storage_service : public service::migration_listener, public gms::i_endpoi
/** unlike excise we just need this endpoint gone without going through any notifications **/
future<> remove_endpoint(inet_address endpoint, gms::permit_id pid);

- void add_expire_time_if_found(inet_address endpoint, int64_t expire_time);
+ void add_expire_time_if_found(locator::host_id endpoint, int64_t expire_time);

int64_t extract_expire_time(const std::vector<sstring>& pieces) const {
return std::stoll(pieces[2]);
diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index e517c89ce6b..d4a6b498b51 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -782,7 +782,7 @@ future<> gossiper::do_status_check() {
}

// check for dead state removal
- auto expire_time = get_expire_time_for_endpoint(endpoint);
+ auto expire_time = get_expire_time_for_endpoint(host_id);
if (!is_alive && (now > expire_time)) {
const auto host_id = eps->get_host_id();
if (!host_id) {
@@ -1276,7 +1276,7 @@ future<> gossiper::evict_from_membership(inet_address endpoint, permit_id pid) {
}
g._endpoint_state_map.erase(endpoint);
});
- _expire_time_endpoint_map.erase(endpoint);
+ _expire_time_endpoint_map.erase(hid);
quarantine_endpoint(hid);
logger.debug("evicting {} from gossip", endpoint);
}
@@ -1359,7 +1359,7 @@ future<> gossiper::advertise_token_removed(inet_address endpoint, locator::host_
auto expire_time = compute_expire_time();
eps.add_application_state(application_state::STATUS, versioned_value::removed_nonlocal(host_id, expire_time.time_since_epoch().count()));
logger.info("Completing removal of {}", endpoint);
- add_expire_time_for_endpoint(endpoint, expire_time);
+ add_expire_time_for_endpoint(host_id, expire_time);
co_await replicate(endpoint, std::move(eps), pid);
// ensure at least one gossip round occurs before returning
co_await sleep_abortable(INTERVAL * 2, _abort_source);
@@ -1468,9 +1468,9 @@ bool gossiper::is_gossip_only_member(locator::host_id host_id) const {

return !is_dead_state(*es) && (!node || !node->is_member());
}

-clk::time_point gossiper::get_expire_time_for_endpoint(inet_address endpoint) const noexcept {
+clk::time_point gossiper::get_expire_time_for_endpoint(locator::host_id id) const noexcept {
/* default expire_time is A_VERY_LONG_TIME */
- auto it = _expire_time_endpoint_map.find(endpoint);
+ auto it = _expire_time_endpoint_map.find(id);
if (it == _expire_time_endpoint_map.end()) {
return compute_expire_time();
} else {
@@ -1745,28 +1745,29 @@ future<> gossiper::real_mark_alive(inet_address addr) {
// prevents do_status_check from racing us and evicting if it was down > A_VERY_LONG_TIME
update_timestamp(es);

+ auto host_id = es->get_host_id();

logger.debug("removing expire time for endpoint : {}", addr);
bool was_live = false;

- co_await mutate_live_and_unreachable_endpoints([addr = es->get_host_id(), &was_live] (live_and_unreachable_endpoints& data) {
+ co_await mutate_live_and_unreachable_endpoints([addr = host_id, &was_live] (live_and_unreachable_endpoints& data) {

data.unreachable.erase(addr);
auto [it_, inserted] = data.live.insert(addr);
was_live = !inserted;

});
- _expire_time_endpoint_map.erase(addr);
+ _expire_time_endpoint_map.erase(host_id);
if (was_live) {
co_return;
}

if (_endpoints_to_talk_with.empty()) {
- _endpoints_to_talk_with.push_back({es->get_host_id()});
+ _endpoints_to_talk_with.push_back({host_id});
} else {
- _endpoints_to_talk_with.front().push_back(es->get_host_id());
+ _endpoints_to_talk_with.front().push_back(host_id);
}

- logger.info("InetAddress {}/{} is now UP, status = {}", es->get_host_id(), addr, status);
+ logger.info("InetAddress {}/{} is now UP, status = {}", host_id, addr, status);

- co_await _subscribers.for_each([addr, es, pid = permit.id()] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) -> future<> {
- co_await subscriber->on_alive(addr, es->get_host_id(), es, pid);
+ co_await _subscribers.for_each([addr, host_id, es, pid = permit.id()] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) -> future<> {
+ co_await subscriber->on_alive(addr, host_id, es, pid);
logger.trace("Notified {}", fmt::ptr(subscriber.get()));
});
}
@@ -2351,7 +2352,7 @@ bool gossiper::is_enabled() const {
return _enabled && !_abort_source.abort_requested();
}

-void gossiper::add_expire_time_for_endpoint(inet_address endpoint, clk::time_point expire_time) {
+void gossiper::add_expire_time_for_endpoint(locator::host_id endpoint, clk::time_point expire_time) {
auto now_ = now();
auto diff = std::chrono::duration_cast<std::chrono::seconds>(expire_time - now_).count();
logger.info("Node {} will be removed from gossip at [{:%Y-%m-%d %T}]: (expire = {}, now = {}, diff = {} seconds)",
diff --git a/service/storage_service.cc b/service/storage_service.cc
index bcfebb56061..c015c03bb16 100644
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -2562,7 +2562,7 @@ future<> storage_service::handle_state_removed(inet_address endpoint, locator::h
std::unordered_set<token> tmp(remove_tokens.begin(), remove_tokens.end());
co_await excise(std::move(tmp), endpoint, host_id, extract_expire_time(pieces), pid);
} else { // now that the gossiper has told us about this nonexistent member, notify the gossiper to remove it
- add_expire_time_if_found(endpoint, extract_expire_time(pieces));
+ add_expire_time_if_found(host_id, extract_expire_time(pieces));
co_await remove_endpoint(endpoint, pid);
}
}
@@ -5047,7 +5047,7 @@ future<> storage_service::excise(std::unordered_set<token> tokens, inet_address

future<> storage_service::excise(std::unordered_set<token> tokens, inet_address endpoint_ip,
locator::host_id endpoint_hid, int64_t expire_time, gms::permit_id pid) {
- add_expire_time_if_found(endpoint_ip, expire_time);
+ add_expire_time_if_found(endpoint_hid, expire_time);
return excise(tokens, endpoint_ip, endpoint_hid, pid);
}

@@ -5099,7 +5099,7 @@ storage_service::stream_ranges(std::unordered_map<sstring, std::unordered_multim
}
}

-void storage_service::add_expire_time_if_found(inet_address endpoint, int64_t expire_time) {
+void storage_service::add_expire_time_if_found(locator::host_id endpoint, int64_t expire_time) {
if (expire_time != 0L) {
using clk = gms::gossiper::clk;
auto time = clk::time_point(clk::duration(expire_time));
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:08:57 AMFeb 24

to scylladb-dev@googlegroups.com

---
gms/gossiper.hh | 1 -
gms/gossiper.cc | 4 ----
2 files changed, 5 deletions(-)

diff --git a/gms/gossiper.hh b/gms/gossiper.hh
index 343882af0e2..8f2efd527c8 100644
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -110,7 +110,6 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar

future<> do_send_ack2_msg(locator::host_id from, utils::chunked_vector<gossip_digest> ack_msg_digest);
future<gossip_get_endpoint_states_response> handle_get_endpoint_states_msg(gossip_get_endpoint_states_request request);
static constexpr uint32_t _default_cpuid = 0;

- msg_addr get_msg_addr(inet_address to) const noexcept;
void do_sort(utils::chunked_vector<gossip_digest>& g_digest_list) const;
timer<lowres_clock> _scheduled_gossip_task;
bool _enabled = false;
diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index a11fef48dce..10d2b46f181 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -59,10 +59,6 @@ constexpr std::chrono::milliseconds gossiper::INTERVAL;
constexpr std::chrono::hours gossiper::A_VERY_LONG_TIME;
constexpr generation_type::value_type gossiper::MAX_GENERATION_DIFFERENCE;

-netw::msg_addr gossiper::get_msg_addr(inet_address to) const noexcept {
- return msg_addr{to, _default_cpuid};
-}
-
const sstring& gossiper::get_cluster_name() const noexcept {
return _gcfg.cluster_name;
}
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:08:58 AMFeb 24

to scylladb-dev@googlegroups.com

Index _just_removed_endpoints map by host id instead of ip
---
gms/gossiper.hh | 6 +++---
gms/gossiper.cc | 23 ++++++++++++-----------
2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/gms/gossiper.hh b/gms/gossiper.hh
index 8f2efd527c8..a9cd760bcd5 100644
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -242,7 +242,7 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
* gossip. We will ignore any gossip regarding these endpoints for QUARANTINE_DELAY time
* after removal to prevent nodes from falsely reincarnating during the time when removal

* gossip gets propagated to all nodes */

- std::map<inet_address, clk::time_point> _just_removed_endpoints;
+ std::map<locator::host_id, clk::time_point> _just_removed_endpoints;

std::map<inet_address, clk::time_point> _expire_time_endpoint_map;

@@ -353,7 +353,7 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
*
* @param endpoint
*/
- void quarantine_endpoint(inet_address endpoint);
+ void quarantine_endpoint(locator::host_id id);

/**
* Quarantines the endpoint until quarantine_start + QUARANTINE_DELAY
@@ -361,7 +361,7 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
* @param endpoint
* @param quarantine_start
*/
- void quarantine_endpoint(inet_address endpoint, clk::time_point quarantine_start);
+ void quarantine_endpoint(locator::host_id id, clk::time_point quarantine_start);

private:
/**
diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index 10d2b46f181..e517c89ce6b 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -659,8 +659,8 @@ future<> gossiper::apply_state_locally(std::map<inet_address, endpoint_state> ma
return make_ready_future<>();
}
} else {
- if (_just_removed_endpoints.contains(ep)) {
- logger.trace("Ignoring gossip for {} because it is quarantined", ep);
+ if (_just_removed_endpoints.contains(hid)) {
+ logger.trace("Ignoring gossip for {} because it is quarantined", hid);
return make_ready_future<>();
}
}
@@ -735,7 +735,7 @@ future<> gossiper::remove_endpoint(inet_address endpoint, permit_id pid) {
_syn_handlers.erase(host_id);
_ack_handlers.erase(host_id);
}
- quarantine_endpoint(endpoint);
+ quarantine_endpoint(host_id);

logger.info("Removed endpoint {}", endpoint);

if (was_alive) {
@@ -766,13 +766,14 @@ future<> gossiper::do_status_check() {

continue;
}
auto& ep_state = *eps;

- bool is_alive = this->is_alive(ep_state.get_host_id());
+ auto host_id = ep_state.get_host_id();
+ bool is_alive = this->is_alive(host_id);

auto update_timestamp = ep_state.get_update_timestamp();

// check if this is a fat client. fat clients are removed automatically from

// gossip after FatClientTimeout. Do not remove dead states here.

- if (is_gossip_only_member(ep_state.get_host_id())
- && !_just_removed_endpoints.contains(endpoint)
+ if (is_gossip_only_member(host_id)
+ && !_just_removed_endpoints.contains(host_id)

&& ((now - update_timestamp) > fat_client_timeout)) {
logger.info("FatClient {} has been silent for {}ms, removing from gossip", endpoint, fat_client_timeout.count());

co_await remove_endpoint(endpoint, pid); // will put it in _just_removed_endpoints to respect quarantine delay
@@ -1276,18 +1277,18 @@ future<> gossiper::evict_from_membership(inet_address endpoint, permit_id pid) {
g._endpoint_state_map.erase(endpoint);
});
_expire_time_endpoint_map.erase(endpoint);
- quarantine_endpoint(endpoint);
+ quarantine_endpoint(hid);

logger.debug("evicting {} from gossip", endpoint);
}

-void gossiper::quarantine_endpoint(inet_address endpoint) {
- quarantine_endpoint(endpoint, now());
+void gossiper::quarantine_endpoint(locator::host_id id) {
+ quarantine_endpoint(id, now());
}

-void gossiper::quarantine_endpoint(inet_address endpoint, clk::time_point quarantine_start) {
+void gossiper::quarantine_endpoint(locator::host_id id, clk::time_point quarantine_start) {
if (!_topo_sm) {
// In raft topology mode the coodinator maintains banned nodes list
- _just_removed_endpoints[endpoint] = quarantine_start;
+ _just_removed_endpoints[id] = quarantine_start;
}
}

--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:09:00 AMFeb 24

to scylladb-dev@googlegroups.com

---
service/endpoint_lifecycle_subscriber.hh | 12 ++--
service/storage_proxy.hh | 2 +-
service/storage_service.hh | 10 +--
transport/server.hh | 6 +-
service/storage_proxy.cc | 4 +-
service/storage_service.cc | 79 ++++++++++++------------
service/topology_coordinator.cc | 4 +-
transport/event_notifier.cc | 6 +-
8 files changed, 62 insertions(+), 61 deletions(-)

diff --git a/service/endpoint_lifecycle_subscriber.hh b/service/endpoint_lifecycle_subscriber.hh
index 59697d5d0ac..bd84c94d5ed 100644

--- a/service/endpoint_lifecycle_subscriber.hh
+++ b/service/endpoint_lifecycle_subscriber.hh
@@ -35,7 +35,7 @@ class endpoint_lifecycle_subscriber {
*
* @param endpoint the newly added endpoint.
*/

- virtual void on_join_cluster(const gms::inet_address& endpoint) {}
+ virtual void on_join_cluster(const gms::inet_address& endpoint, locator::host_id host_id) {}

/**

* Called when a new node leave the cluster (decommission or removeToken).

@@ -50,14 +50,14 @@ class endpoint_lifecycle_subscriber {
*

* @param endpoint the endpoint marked UP.
*/

- virtual void on_up(const gms::inet_address& endpoint) {}
+ virtual void on_up(const gms::inet_address& endpoint, locator::host_id host_id) {}

/**

* Called when a node is marked DOWN.
*
* @param endpoint the endpoint marked DOWN.
*/

- virtual void on_down(const gms::inet_address& endpoint) {}
+ virtual void on_down(const gms::inet_address& endpoint, locator::host_id host_id) {}
};

class endpoint_lifecycle_notifier {
@@ -67,10 +67,10 @@ class endpoint_lifecycle_notifier {
void register_subscriber(endpoint_lifecycle_subscriber* subscriber);
future<> unregister_subscriber(endpoint_lifecycle_subscriber* subscriber) noexcept;

- future<> notify_down(gms::inet_address endpoint);
+ future<> notify_down(gms::inet_address endpoint, locator::host_id host_id);
future<> notify_left(gms::inet_address endpoint, locator::host_id host_id);
- future<> notify_up(gms::inet_address endpoint);
- future<> notify_joined(gms::inet_address endpoint);
+ future<> notify_up(gms::inet_address endpoint, locator::host_id host_id);
+ future<> notify_joined(gms::inet_address endpoint, locator::host_id host_id);
};

}
diff --git a/service/storage_proxy.hh b/service/storage_proxy.hh
index 3396e1c41d3..8063a3a6f20 100644
--- a/service/storage_proxy.hh
+++ b/service/storage_proxy.hh
@@ -742,7 +742,7 @@ class storage_proxy : public seastar::async_sharded_service<storage_proxy>, publ

}

virtual void on_leave_cluster(const gms::inet_address& endpoint, const locator::host_id& hid) override;

- virtual void on_down(const gms::inet_address& endpoint) override;

+ virtual void on_down(const gms::inet_address& endpoint, locator::host_id hid) override;

friend class abstract_read_executor;
friend class abstract_write_response_handler;
diff --git a/service/storage_service.hh b/service/storage_service.hh
index 429926fbcef..c6d171ec12b 100644
--- a/service/storage_service.hh
+++ b/service/storage_service.hh
@@ -776,11 +776,11 @@ class storage_service : public service::migration_listener, public gms::i_endpoi
void do_isolate_on_error(disk_error type);
future<> isolate();

- future<> notify_down(inet_address endpoint);
+ future<> notify_down(inet_address endpoint, locator::host_id hid);
future<> notify_left(inet_address endpoint, locator::host_id hid);
- future<> notify_up(inet_address endpoint);
- future<> notify_joined(inet_address endpoint);
- future<> notify_cql_change(inet_address endpoint, bool ready);
+ future<> notify_up(inet_address endpoint, locator::host_id hid);
+ future<> notify_joined(inet_address endpoint, locator::host_id hid);
+ future<> notify_cql_change(inet_address endpoint, locator::host_id hid,bool ready);
future<> remove_rpc_client_with_ignored_topology(inet_address endpoint, locator::host_id id);
public:
future<bool> is_cleanup_allowed(sstring keyspace);
@@ -955,7 +955,7 @@ class storage_service : public service::migration_listener, public gms::i_endpoi

struct nodes_to_notify_after_sync {
std::vector<std::pair<gms::inet_address, locator::host_id>> left;
- std::vector<gms::inet_address> joined;
+ std::vector<std::pair<gms::inet_address, locator::host_id>> joined;
};

using host_id_to_ip_map_t = std::unordered_map<locator::host_id, gms::inet_address>;
diff --git a/transport/server.hh b/transport/server.hh
index 546f03ebd8c..ca6f152245e 100644
--- a/transport/server.hh
+++ b/transport/server.hh
@@ -389,10 +389,10 @@ class cql_server::event_notifier : public service::migration_listener,
virtual future<> on_before_service_level_change(qos::service_level_options slo_before, qos::service_level_options slo_after, qos::service_level_info sl_info) override;
virtual future<> on_effective_service_levels_cache_reloaded() override;

- virtual void on_join_cluster(const gms::inet_address& endpoint) override;

+ virtual void on_join_cluster(const gms::inet_address& endpoint, locator::host_id hid) override;

virtual void on_leave_cluster(const gms::inet_address& endpoint, const locator::host_id& hid) override;
- virtual void on_up(const gms::inet_address& endpoint) override;
- virtual void on_down(const gms::inet_address& endpoint) override;

+ virtual void on_up(const gms::inet_address& endpoint, locator::host_id hid) override;
+ virtual void on_down(const gms::inet_address& endpoint, locator::host_id hid) override;
};

inline service::endpoint_lifecycle_subscriber* cql_server::get_lifecycle_listener() const noexcept { return _notifier.get(); }
diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc
index 48f85c1f261..30ffdbd4aee 100644
--- a/service/storage_proxy.cc
+++ b/service/storage_proxy.cc
@@ -6920,9 +6920,9 @@ void storage_proxy::cancel_write_handlers(noncopyable_function<bool(const abstra
}
}

-void storage_proxy::on_down(const gms::inet_address& endpoint) {
+void storage_proxy::on_down(const gms::inet_address& endpoint, locator::host_id id) {
// FIXME: make gossiper notifictaions to pass host ids
- return cancel_write_handlers([id = remote().gossiper().get_host_id(endpoint)] (const abstract_write_response_handler& handler) {
+ return cancel_write_handlers([id] (const abstract_write_response_handler& handler) {
const auto& targets = handler.get_targets();
return std::ranges::find(targets, id) != targets.end();
});
diff --git a/service/storage_service.cc b/service/storage_service.cc
index 07a7375eacb..85b05a49228 100644
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -121,6 +121,7 @@
#include <stdexcept>
#include <unistd.h>
#include <variant>
+#include <utility>

using token = dht::token;
using UUID = utils::UUID;
@@ -459,7 +460,7 @@ future<> storage_service::raft_topology_update_ip(locator::host_id id, gms::inet
}

if (nodes_to_notify) {
- nodes_to_notify->joined.emplace_back(ip);
+ nodes_to_notify->joined.emplace_back(ip, id);
}

if (const auto it = host_id_to_ip_map.find(id); it != host_id_to_ip_map.end() && it->second != ip) {
@@ -651,8 +652,8 @@ future<> storage_service::notify_nodes_after_sync(nodes_to_notify_after_sync&& n
for (auto [ip, host_id] : nodes_to_notify.left) {
co_await notify_left(ip, host_id);
}
- for (auto ip : nodes_to_notify.joined) {
- co_await notify_joined(ip);
+ for (auto [ip, host_id] : nodes_to_notify.joined) {
+ co_await notify_joined(ip, host_id);
}
}

@@ -2505,7 +2506,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, locator::ho

// Send joined notification only when this node was not a member prior to this
if (do_notify_joined) {
- co_await notify_joined(endpoint);
+ co_await notify_joined(endpoint, host_id);
co_await remove_rpc_client_with_ignored_topology(endpoint, host_id);
}

@@ -2576,7 +2577,7 @@ future<> storage_service::on_alive(gms::inet_address endpoint, locator::host_id
slogger.debug("endpoint={}/{} on_alive: permit_id={}", endpoint, host_id, pid);
const auto* node = tm.get_topology().find_node(host_id);
if (node && node->is_member()) {
- co_await notify_up(endpoint);
+ co_await notify_up(endpoint, host_id);
} else if (raft_topology_change_enabled()) {
slogger.debug("ignore on_alive since topology changes are using raft and "
"endpoint {}/{} is not a topology member", endpoint, host_id);
@@ -2649,7 +2650,7 @@ future<> storage_service::on_change(gms::inet_address endpoint, locator::host_id
}
if (states.contains(application_state::RPC_READY)) {
slogger.debug("Got application_state::RPC_READY for node {}, is_cql_ready={}", endpoint, ep_state->is_cql_ready());
- co_await notify_cql_change(endpoint, ep_state->is_cql_ready());
+ co_await notify_cql_change(endpoint, host_id, ep_state->is_cql_ready());
}
if (auto it = states.find(application_state::INTERNAL_IP); it != states.end()) {
co_await maybe_reconnect_to_preferred_ip(endpoint, inet_address(it->second.value()));
@@ -2703,7 +2704,7 @@ future<> storage_service::on_remove(gms::inet_address endpoint, locator::host_id

future<> storage_service::on_dead(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr state, gms::permit_id pid) {
slogger.debug("endpoint={}/{} on_dead: permit_id={}", endpoint, id, pid);
- return notify_down(endpoint);
+ return notify_down(endpoint, id);
}

future<> storage_service::on_restart(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr state, gms::permit_id pid) {
@@ -7315,24 +7316,24 @@ storage_service::get_natural_endpoints(const sstring& keyspace,
return replicas | std::views::transform([&] (locator::host_id id) { return _address_map.get(id); }) | std::ranges::to<inet_address_vector_replica_set>();
}

-future<> endpoint_lifecycle_notifier::notify_down(gms::inet_address endpoint) {
- return seastar::async([this, endpoint] {
- _subscribers.thread_for_each([endpoint] (endpoint_lifecycle_subscriber* subscriber) {
+future<> endpoint_lifecycle_notifier::notify_down(gms::inet_address endpoint, locator::host_id hid) {
+ return seastar::async([this, endpoint, hid] {
+ _subscribers.thread_for_each([endpoint, hid] (endpoint_lifecycle_subscriber* subscriber) {
try {
- subscriber->on_down(endpoint);
+ subscriber->on_down(endpoint, hid);
} catch (...) {
- slogger.warn("Down notification failed {}: {}", endpoint, std::current_exception());
+ slogger.warn("Down notification failed {}/{}: {}", endpoint, hid, std::current_exception());
}
});
});
}

-future<> storage_service::notify_down(inet_address endpoint) {
- co_await container().invoke_on_all([endpoint] (auto&& ss) {
+future<> storage_service::notify_down(inet_address endpoint, locator::host_id hid) {
+ co_await container().invoke_on_all([endpoint, hid] (auto&& ss) {

ss._messaging.local().remove_rpc_client(netw::msg_addr{endpoint, 0});

- return ss._lifecycle_notifier.notify_down(endpoint);
+ return ss._lifecycle_notifier.notify_down(endpoint, hid);
});
- slogger.debug("Notify node {} has been down", endpoint);
+ slogger.debug("Notify node {}/{} has been down", endpoint, hid);
}

future<> endpoint_lifecycle_notifier::notify_left(gms::inet_address endpoint, locator::host_id hid) {
@@ -7341,7 +7342,7 @@ future<> endpoint_lifecycle_notifier::notify_left(gms::inet_address endpoint, lo
try {
subscriber->on_leave_cluster(endpoint, hid);
} catch (...) {
- slogger.warn("Leave cluster notification failed {}: {}", endpoint, std::current_exception());
+ slogger.warn("Leave cluster notification failed {}/{}: {}", endpoint, hid, std::current_exception());
}
});
});
@@ -7354,48 +7355,48 @@ future<> storage_service::notify_left(inet_address endpoint, locator::host_id hi
slogger.debug("Notify node {} has left the cluster", endpoint);
}

-future<> endpoint_lifecycle_notifier::notify_up(gms::inet_address endpoint) {
- return seastar::async([this, endpoint] {
- _subscribers.thread_for_each([endpoint] (endpoint_lifecycle_subscriber* subscriber) {
+future<> endpoint_lifecycle_notifier::notify_up(gms::inet_address endpoint, locator::host_id hid) {
+ return seastar::async([this, endpoint, hid] {
+ _subscribers.thread_for_each([endpoint, hid] (endpoint_lifecycle_subscriber* subscriber) {
try {
- subscriber->on_up(endpoint);
+ subscriber->on_up(endpoint, hid);
} catch (...) {
- slogger.warn("Up notification failed {}: {}", endpoint, std::current_exception());
+ slogger.warn("Up notification failed {}/{}: {}", endpoint, hid, std::current_exception());
}
});
});
}

-future<> storage_service::notify_up(inet_address endpoint) {
- if (!_gossiper.is_cql_ready(endpoint) || !_gossiper.is_alive(_gossiper.get_host_id(endpoint))) {
+future<> storage_service::notify_up(inet_address endpoint, locator::host_id hid) {
+ if (!_gossiper.is_cql_ready(endpoint) || !_gossiper.is_alive(hid)) {
co_return;
}
- co_await container().invoke_on_all([endpoint] (auto&& ss) {
- return ss._lifecycle_notifier.notify_up(endpoint);
+ co_await container().invoke_on_all([endpoint, hid] (auto&& ss) {
+ return ss._lifecycle_notifier.notify_up(endpoint, hid);
});
- slogger.debug("Notify node {} has been up", endpoint);
+ slogger.debug("Notify node {}/{} has been up", endpoint, hid);
}

-future<> endpoint_lifecycle_notifier::notify_joined(gms::inet_address endpoint) {
- return seastar::async([this, endpoint] {
- _subscribers.thread_for_each([endpoint] (endpoint_lifecycle_subscriber* subscriber) {
+future<> endpoint_lifecycle_notifier::notify_joined(gms::inet_address endpoint, locator::host_id hid) {
+ return seastar::async([this, endpoint, hid] {
+ _subscribers.thread_for_each([endpoint, hid] (endpoint_lifecycle_subscriber* subscriber) {
try {
- subscriber->on_join_cluster(endpoint);
+ subscriber->on_join_cluster(endpoint, hid);
} catch (...) {
- slogger.warn("Join cluster notification failed {}: {}", endpoint, std::current_exception());
+ slogger.warn("Join cluster notification failed {}/{}: {}", endpoint, hid,std::current_exception());
}
});
});
}

-future<> storage_service::notify_joined(inet_address endpoint) {
+future<> storage_service::notify_joined(inet_address endpoint, locator::host_id hid) {
co_await utils::get_local_injector().inject(
"storage_service_notify_joined_sleep", std::chrono::milliseconds{500});

- co_await container().invoke_on_all([endpoint] (auto&& ss) {
- return ss._lifecycle_notifier.notify_joined(endpoint);
+ co_await container().invoke_on_all([endpoint, hid] (auto&& ss) {
+ return ss._lifecycle_notifier.notify_joined(endpoint, hid);
});
- slogger.debug("Notify node {} has joined the cluster", endpoint);
+ slogger.debug("Notify node {}/{} has joined the cluster", endpoint, hid);
}

future<> storage_service::remove_rpc_client_with_ignored_topology(inet_address endpoint, locator::host_id id) {
@@ -7404,11 +7405,11 @@ future<> storage_service::remove_rpc_client_with_ignored_topology(inet_address e
});
}

-future<> storage_service::notify_cql_change(inet_address endpoint, bool ready) {
+future<> storage_service::notify_cql_change(inet_address endpoint, locator::host_id hid, bool ready) {
if (ready) {
- co_await notify_up(endpoint);
+ co_await notify_up(endpoint, hid);
} else {
- co_await notify_down(endpoint);
+ co_await notify_down(endpoint, hid);
}
}

diff --git a/service/topology_coordinator.cc b/service/topology_coordinator.cc
index 9dd8d6251e3..4edccf8afac 100644
--- a/service/topology_coordinator.cc
+++ b/service/topology_coordinator.cc
@@ -2870,8 +2870,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
future<> run();
future<> stop();

- virtual void on_up(const gms::inet_address& endpoint) { _topo_sm.event.broadcast(); };
- virtual void on_down(const gms::inet_address& endpoint) { _topo_sm.event.broadcast(); };
+ virtual void on_up(const gms::inet_address& endpoint, locator::host_id hid) { _topo_sm.event.broadcast(); };
+ virtual void on_down(const gms::inet_address& endpoint, locator::host_id hid) { _topo_sm.event.broadcast(); };
};

future<std::optional<group0_guard>> topology_coordinator::maybe_migrate_system_tables(group0_guard guard) {
diff --git a/transport/event_notifier.cc b/transport/event_notifier.cc
index 66f5d1c713b..3d4001baa60 100644
--- a/transport/event_notifier.cc
+++ b/transport/event_notifier.cc
@@ -232,7 +232,7 @@ future<> cql_server::event_notifier::on_effective_service_levels_cache_reloaded(
return _server.update_connections_service_level_params();
}

-void cql_server::event_notifier::on_join_cluster(const gms::inet_address& endpoint)
+void cql_server::event_notifier::on_join_cluster(const gms::inet_address& endpoint, locator::host_id hid)
{
if (!_server._gossiper.is_cql_ready(endpoint)) {
_endpoints_pending_joined_notification.insert(endpoint);
@@ -262,7 +262,7 @@ void cql_server::event_notifier::on_leave_cluster(const gms::inet_address& endpo
}
}

-void cql_server::event_notifier::on_up(const gms::inet_address& endpoint)
+void cql_server::event_notifier::on_up(const gms::inet_address& endpoint, locator::host_id hid)
{
if (_endpoints_pending_joined_notification.erase(endpoint)) {
send_join_cluster(endpoint);
@@ -280,7 +280,7 @@ void cql_server::event_notifier::on_up(const gms::inet_address& endpoint)
}
}

-void cql_server::event_notifier::on_down(const gms::inet_address& endpoint)
+void cql_server::event_notifier::on_down(const gms::inet_address& endpoint, locator::host_id hid)
{
bool was_down = _last_status_change.contains(endpoint) && _last_status_change.at(endpoint) == event::status_change::status_type::DOWN;
_last_status_change[endpoint] = event::status_change::status_type::DOWN;
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:15:13 AMFeb 24

to scylladb-dev@googlegroups.com

Provide default implementation for them instead. Will be easier to rework them later.
---

cdc/generation_service.hh | 5 -----
gms/i_endpoint_state_change_subscriber.hh | 12 ++++++------
service/load_broadcaster.hh | 6 ------
service/migration_manager.hh | 3 ---
service/view_update_backlog_broker.hh | 5 -----
streaming/stream_manager.hh | 3 ---
gms/feature_service.cc | 4 ----
repair/row_level.cc | 18 ------------------
service/storage_service.cc | 19 -------------------
9 files changed, 6 insertions(+), 69 deletions(-)

diff --git a/cdc/generation_service.hh b/cdc/generation_service.hh
index efad713821f..165a0aab1ec 100644
--- a/cdc/generation_service.hh
+++ b/cdc/generation_service.hh
@@ -110,11 +110,6 @@ class generation_service : public peering_sharded_service<generation_service>
return _cdc_metadata;
}

- virtual future<> on_alive(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_dead(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_remove(gms::inet_address, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_restart(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
-
virtual future<> on_join(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override;
virtual future<> on_change(gms::inet_address, const gms::application_state_map&, gms::permit_id) override;

diff --git a/gms/i_endpoint_state_change_subscriber.hh b/gms/i_endpoint_state_change_subscriber.hh
index 3c15c43d425..44e32bb5ce9 100644
--- a/gms/i_endpoint_state_change_subscriber.hh
+++ b/gms/i_endpoint_state_change_subscriber.hh
@@ -46,15 +46,15 @@ class i_endpoint_state_change_subscriber {
* @param endpoint endpoint for which the state change occurred.
* @param epState state that actually changed for the above endpoint.
*/
- virtual future<> on_join(inet_address endpoint, endpoint_state_ptr ep_state, permit_id) = 0;
+ virtual future<> on_join(inet_address endpoint, endpoint_state_ptr ep_state, permit_id) { return make_ready_future<>(); }

- virtual future<> on_change(inet_address endpoint, const application_state_map& states, permit_id) = 0;
+ virtual future<> on_change(inet_address endpoint, const application_state_map& states, permit_id) { return make_ready_future<>(); }

- virtual future<> on_alive(inet_address endpoint, endpoint_state_ptr state, permit_id) = 0;
+ virtual future<> on_alive(inet_address endpoint, endpoint_state_ptr state, permit_id) { return make_ready_future<>(); };

- virtual future<> on_dead(inet_address endpoint, endpoint_state_ptr state, permit_id) = 0;
+ virtual future<> on_dead(inet_address endpoint, endpoint_state_ptr state, permit_id) { return make_ready_future<>(); };

- virtual future<> on_remove(inet_address endpoint, permit_id) = 0;
+ virtual future<> on_remove(inet_address endpoint, permit_id) { return make_ready_future<>(); };

/**
* Called whenever a node is restarted.
@@ -62,7 +62,7 @@ class i_endpoint_state_change_subscriber {
* previously marked down. It will have only if {@code state.isAlive() == false}
* as {@code state} is from before the restarted node is marked up.
*/
- virtual future<> on_restart(inet_address endpoint, endpoint_state_ptr state, permit_id) = 0;
+ virtual future<> on_restart(inet_address endpoint, endpoint_state_ptr state, permit_id) { return make_ready_future<>(); };
};

} // namespace gms
diff --git a/service/load_broadcaster.hh b/service/load_broadcaster.hh
index abbce7eb00d..2f13ae4b5ed 100644
--- a/service/load_broadcaster.hh
+++ b/service/load_broadcaster.hh
@@ -51,12 +51,6 @@ class load_broadcaster : public gms::i_endpoint_state_change_subscriber, public
}
return make_ready_future();
}
-
- future<> on_alive(gms::inet_address endpoint, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
-
- future<> on_dead(gms::inet_address endpoint, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
-
- future<> on_restart(gms::inet_address endpoint, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }

virtual future<> on_remove(gms::inet_address endpoint, gms::permit_id) override {
_load_info.erase(endpoint);
diff --git a/service/migration_manager.hh b/service/migration_manager.hh
index 508d1291b40..50ea39eb5f9 100644
--- a/service/migration_manager.hh
+++ b/service/migration_manager.hh
@@ -188,9 +188,6 @@ class migration_manager : public seastar::async_sharded_service<migration_manage
virtual future<> on_join(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id) override;
virtual future<> on_change(gms::inet_address endpoint, const gms::application_state_map& states, gms::permit_id) override;
virtual future<> on_alive(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id) override;
- virtual future<> on_dead(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_remove(gms::inet_address endpoint, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_restart(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id) override { return make_ready_future(); }

public:
// For tests only.
diff --git a/service/view_update_backlog_broker.hh b/service/view_update_backlog_broker.hh
index 30db2c8410e..7d54ffb055d 100644
--- a/service/view_update_backlog_broker.hh
+++ b/service/view_update_backlog_broker.hh
@@ -42,11 +42,6 @@ class view_update_backlog_broker final
virtual future<> on_change(gms::inet_address, const gms::application_state_map& states, gms::permit_id) override;

virtual future<> on_remove(gms::inet_address, gms::permit_id) override;
-
- virtual future<> on_join(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_alive(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_dead(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_restart(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
};

}
diff --git a/streaming/stream_manager.hh b/streaming/stream_manager.hh
index 72b3dae91a6..927c3b24170 100644
--- a/streaming/stream_manager.hh
+++ b/streaming/stream_manager.hh
@@ -172,9 +172,6 @@ class stream_manager : public gms::i_endpoint_state_change_subscriber, public en
reader_consumer_v2 make_streaming_consumer(
uint64_t estimated_partitions, stream_reason, service::frozen_topology_guard);
public:
- virtual future<> on_join(inet_address endpoint, endpoint_state_ptr ep_state, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_change(gms::inet_address, const gms::application_state_map& states, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_alive(inet_address endpoint, endpoint_state_ptr state, gms::permit_id) override { return make_ready_future(); }
virtual future<> on_dead(inet_address endpoint, endpoint_state_ptr state, gms::permit_id) override;
virtual future<> on_remove(inet_address endpoint, gms::permit_id) override;
virtual future<> on_restart(inet_address endpoint, endpoint_state_ptr ep_state, gms::permit_id) override;
diff --git a/gms/feature_service.cc b/gms/feature_service.cc
index 8f3ff69ad00..eaa466f8741 100644
--- a/gms/feature_service.cc
+++ b/gms/feature_service.cc
@@ -271,10 +271,6 @@ class persistent_feature_enabler : public i_endpoint_state_change_subscriber {
}
return make_ready_future();
}
- future<> on_alive(inet_address, endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
- future<> on_dead(inet_address, endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
- future<> on_remove(inet_address, gms::permit_id) override { return make_ready_future(); }
- future<> on_restart(inet_address, endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }

future<> enable_features();
};
diff --git a/repair/row_level.cc b/repair/row_level.cc
index 7ddc5649b4e..b562c708dfe 100644
--- a/repair/row_level.cc
+++ b/repair/row_level.cc
@@ -3201,24 +3201,6 @@ class row_level_repair_gossip_helper : public gms::i_endpoint_state_change_subsc
rlogger.warn("Failed to remove row level repair for node {}: {}", node, std::current_exception());
}
}
- virtual future<> on_join(
- gms::inet_address endpoint,
- gms::endpoint_state_ptr ep_state,
- gms::permit_id) override {
- return make_ready_future();
- }
- virtual future<> on_change(
- gms::inet_address endpoint,
- const gms::application_state_map& states,
- gms::permit_id) override {
- return make_ready_future();
- }
- virtual future<> on_alive(
- gms::inet_address endpoint,
- gms::endpoint_state_ptr state,
- gms::permit_id) override {
- return make_ready_future();
- }
virtual future<> on_dead(

gms::inet_address endpoint,
gms::endpoint_state_ptr state,

diff --git a/service/storage_service.cc b/service/storage_service.cc
index 511bb59eb95..429be031246 100644
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -975,30 +975,11 @@ class storage_service::ip_address_updater: public gms::i_endpoint_state_change_s
return on_endpoint_change(endpoint, ep_state, permit_id, "on_join");
}

- virtual future<>
- on_change(gms::inet_address endpoint, const gms::application_state_map& states, gms::permit_id) override {
- // Raft server ID never changes - do nothing

- return make_ready_future<>();
- }
-

virtual future<>
on_alive(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id) override {
return on_endpoint_change(endpoint, ep_state, permit_id, "on_alive");
}

- virtual future<>
- on_dead(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id) override {

- return make_ready_future<>();
- }
-

- virtual future<>
- on_remove(gms::inet_address endpoint, gms::permit_id) override {
- // The mapping is removed when the server is removed from
- // Raft configuration, not when it's dead or alive, or
- // removed

- return make_ready_future<>();
- }
-

virtual future<>
on_restart(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id) override {
return on_endpoint_change(endpoint, ep_state, permit_id, "on_restart");
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:15:13 AMFeb 24

to scylladb-dev@googlegroups.com

It was always deprecated.
---
gms/gossiper.hh | 2 --
api/gossiper.cc | 7 +------
gms/gossiper.cc | 5 -----
api/api-doc/gossiper.json | 8 --------
4 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/gms/gossiper.hh b/gms/gossiper.hh
index 433a4e3f0fd..62097f735ab 100644
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -382,8 +382,6 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
*/
future<> advertise_token_removed(inet_address endpoint, locator::host_id host_id, permit_id);

- future<> unsafe_assassinate_endpoint(sstring address);
-
/**
* Do not call this method unless you know what you are doing.
* It will try extremely hard to obliterate any endpoint from the ring,
diff --git a/api/gossiper.cc b/api/gossiper.cc
index e5fcd2e1076..2df0e05cf45 100644
--- a/api/gossiper.cc
+++ b/api/gossiper.cc
@@ -53,12 +53,7 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
});

httpd::gossiper_json::assassinate_endpoint.set(r, [&g](std::unique_ptr<http::request> req) {
- if (req->get_query_param("unsafe") != "True") {
- return g.assassinate_endpoint(req->get_path_param("addr")).then([] {
- return make_ready_future<json::json_return_type>(json_void());
- });
- }
- return g.unsafe_assassinate_endpoint(req->get_path_param("addr")).then([] {
+ return g.assassinate_endpoint(req->get_path_param("addr")).then([] {

return make_ready_future<json::json_return_type>(json_void());
});
});

diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index f1a9e5ce994..78a9ec6edba 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -1368,11 +1368,6 @@ future<> gossiper::advertise_token_removed(inet_address endpoint, locator::host_

co_await sleep_abortable(INTERVAL * 2, _abort_source);
}

-future<> gossiper::unsafe_assassinate_endpoint(sstring address) {
- logger.warn("Gossiper.unsafeAssassinateEndpoint is deprecated and will be removed in the next release; use assassinate_endpoint instead");
- return assassinate_endpoint(address);
-}
-
future<> gossiper::assassinate_endpoint(sstring address) {
co_await container().invoke_on(0, [&] (auto&& gossiper) -> future<> {
inet_address endpoint(address);
diff --git a/api/api-doc/gossiper.json b/api/api-doc/gossiper.json
index 49a73ff17b2..54eab08e9be 100644
--- a/api/api-doc/gossiper.json
+++ b/api/api-doc/gossiper.json
@@ -136,14 +136,6 @@
"allowMultiple":false,
"type":"string",
"paramType":"path"
- },
- {
- "name":"unsafe",
- "description":"Set to True to perform an unsafe assassination",
- "required":false,
- "allowMultiple":false,
- "type":"boolean",
- "paramType":"query"
}
]
}
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:15:13 AMFeb 24

to scylladb-dev@googlegroups.com

---
service/storage_service.cc | 1 -
1 file changed, 1 deletion(-)

diff --git a/service/storage_service.cc b/service/storage_service.cc
index 429be031246..fe7470ceb5a 100644
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -2566,7 +2566,6 @@ future<> storage_service::handle_state_removed(inet_address endpoint, std::vecto
}
const auto host_id = _gossiper.get_host_id(endpoint);
if (get_token_metadata().is_normal_token_owner(host_id)) {
- auto state = pieces[0];
auto remove_tokens = get_token_metadata().get_tokens(host_id);

std::unordered_set<token> tmp(remove_tokens.begin(), remove_tokens.end());
co_await excise(std::move(tmp), endpoint, host_id, extract_expire_time(pieces), pid);

--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:15:14 AMFeb 24

to scylladb-dev@googlegroups.com

---
cdc/generation_service.hh | 4 ++--
gms/gossiper.hh | 2 +-
gms/i_endpoint_state_change_subscriber.hh | 16 +++++++-------
service/load_broadcaster.hh | 8 +++----
service/migration_manager.hh | 6 +++---
service/storage_service.hh | 12 +++++------
service/view_update_backlog_broker.hh | 4 ++--
streaming/stream_manager.hh | 6 +++---
cdc/generation.cc | 8 +++----
gms/endpoint_state.cc | 5 +++--
gms/feature_service.cc | 4 ++--
gms/gossiper.cc | 26 ++++++++++++-----------
repair/row_level.cc | 3 +++
service/migration_manager.cc | 8 +++----
service/misc_services.cc | 6 +++---
service/storage_service.cc | 24 ++++++++++-----------
streaming/stream_manager.cc | 6 +++---
17 files changed, 77 insertions(+), 71 deletions(-)

diff --git a/cdc/generation_service.hh b/cdc/generation_service.hh
index 165a0aab1ec..d1f13573c52 100644
--- a/cdc/generation_service.hh
+++ b/cdc/generation_service.hh
@@ -110,8 +110,8 @@ class generation_service : public peering_sharded_service<generation_service>
return _cdc_metadata;
}

- virtual future<> on_join(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override;

- virtual future<> on_change(gms::inet_address, const gms::application_state_map&, gms::permit_id) override;
+ virtual future<> on_join(gms::inet_address, locator::host_id id, gms::endpoint_state_ptr, gms::permit_id) override;
+ virtual future<> on_change(gms::inet_address, locator::host_id id, const gms::application_state_map&, gms::permit_id) override;

future<> check_and_repair_cdc_streams();

diff --git a/gms/gossiper.hh b/gms/gossiper.hh
index 62097f735ab..343882af0e2 100644
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -540,7 +540,7 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar

// notify that an application state has changed
// Must be called under lock_endpoint.
- future<> do_on_change_notifications(inet_address addr, const application_state_map& states, permit_id) const;
+ future<> do_on_change_notifications(inet_address addr, locator::host_id id, const application_state_map& states, permit_id) const;

// notify that a node is DOWN (dead)
// Must be called under lock_endpoint.
diff --git a/gms/i_endpoint_state_change_subscriber.hh b/gms/i_endpoint_state_change_subscriber.hh
index 44e32bb5ce9..dd1f7874fba 100644
--- a/gms/i_endpoint_state_change_subscriber.hh
+++ b/gms/i_endpoint_state_change_subscriber.hh
@@ -34,8 +34,8 @@ namespace gms {
*/
class i_endpoint_state_change_subscriber {
protected:
- future<> on_application_state_change(inet_address endpoint, const application_state_map& states, application_state app_state, permit_id,
- std::function<future<>(inet_address, const gms::versioned_value&, gms::permit_id)> func);
+ future<> on_application_state_change(inet_address endpoint, locator::host_id id, const application_state_map& states, application_state app_state, permit_id,
+ std::function<future<>(inet_address, locator::host_id, const gms::versioned_value&, gms::permit_id)> func);

public:
virtual ~i_endpoint_state_change_subscriber() {}

@@ -46,15 +46,15 @@ class i_endpoint_state_change_subscriber {
* @param endpoint endpoint for which the state change occurred.
* @param epState state that actually changed for the above endpoint.
*/

- virtual future<> on_join(inet_address endpoint, endpoint_state_ptr ep_state, permit_id) { return make_ready_future<>(); }
+ virtual future<> on_join(inet_address endpoint, locator::host_id id, endpoint_state_ptr ep_state, permit_id) { return make_ready_future<>(); }

- virtual future<> on_change(inet_address endpoint, const application_state_map& states, permit_id) { return make_ready_future<>(); }
+ virtual future<> on_change(inet_address endpoint, locator::host_id id, const application_state_map& states, permit_id) { return make_ready_future<>(); }

- virtual future<> on_alive(inet_address endpoint, endpoint_state_ptr state, permit_id) { return make_ready_future<>(); };
+ virtual future<> on_alive(inet_address endpoint, locator::host_id id, endpoint_state_ptr state, permit_id) { return make_ready_future<>(); };

- virtual future<> on_dead(inet_address endpoint, endpoint_state_ptr state, permit_id) { return make_ready_future<>(); };
+ virtual future<> on_dead(inet_address endpoint, locator::host_id id, endpoint_state_ptr state, permit_id) { return make_ready_future<>(); };

- virtual future<> on_remove(inet_address endpoint, permit_id) { return make_ready_future<>(); };
+ virtual future<> on_remove(inet_address endpoint, locator::host_id id, permit_id) { return make_ready_future<>(); };

/**
* Called whenever a node is restarted.
@@ -62,7 +62,7 @@ class i_endpoint_state_change_subscriber {
* previously marked down. It will have only if {@code state.isAlive() == false}
* as {@code state} is from before the restarted node is marked up.
*/

- virtual future<> on_restart(inet_address endpoint, endpoint_state_ptr state, permit_id) { return make_ready_future<>(); };
+ virtual future<> on_restart(inet_address endpoint, locator::host_id id, endpoint_state_ptr state, permit_id) { return make_ready_future<>(); };

};

} // namespace gms
diff --git a/service/load_broadcaster.hh b/service/load_broadcaster.hh

index 2f13ae4b5ed..4164a73afc8 100644
--- a/service/load_broadcaster.hh
+++ b/service/load_broadcaster.hh
@@ -37,14 +37,14 @@ class load_broadcaster : public gms::i_endpoint_state_change_subscriber, public
SCYLLA_ASSERT(_stopped);
}

- virtual future<> on_change(gms::inet_address endpoint, const gms::application_state_map& states, gms::permit_id pid) override {
- return on_application_state_change(endpoint, states, gms::application_state::LOAD, pid, [this] (gms::inet_address endpoint, const gms::versioned_value& value, gms::permit_id) {
+ virtual future<> on_change(gms::inet_address endpoint, locator::host_id id, const gms::application_state_map& states, gms::permit_id pid) override {
+ return on_application_state_change(endpoint, id, states, gms::application_state::LOAD, pid, [this] (gms::inet_address endpoint, locator::host_id id, const gms::versioned_value& value, gms::permit_id) {

_load_info[endpoint] = std::stod(value.value());

return make_ready_future<>();
});
}

- virtual future<> on_join(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id pid) override {
+ virtual future<> on_join(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id pid) override {

auto* local_value = ep_state->get_application_state_ptr(gms::application_state::LOAD);
if (local_value) {

_load_info[endpoint] = std::stod(local_value->value());

@@ -52,7 +52,7 @@ class load_broadcaster : public gms::i_endpoint_state_change_subscriber, public
return make_ready_future();
}

- virtual future<> on_remove(gms::inet_address endpoint, gms::permit_id) override {
+ virtual future<> on_remove(gms::inet_address endpoint, locator::host_id id, gms::permit_id) override {
_load_info.erase(endpoint);
return make_ready_future();
}
diff --git a/service/migration_manager.hh b/service/migration_manager.hh
index 50ea39eb5f9..4f9b2651229 100644
--- a/service/migration_manager.hh
+++ b/service/migration_manager.hh
@@ -185,9 +185,9 @@ class migration_manager : public seastar::async_sharded_service<migration_manage
future<schema_ptr> get_schema_for_write(table_schema_version, locator::host_id from, unsigned shard, netw::messaging_service& ms, abort_source& as);

private:
- virtual future<> on_join(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id) override;
- virtual future<> on_change(gms::inet_address endpoint, const gms::application_state_map& states, gms::permit_id) override;
- virtual future<> on_alive(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id) override;
+ virtual future<> on_join(gms::inet_address endpoint,locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id) override;
+ virtual future<> on_change(gms::inet_address endpoint, locator::host_id id, const gms::application_state_map& states, gms::permit_id) override;
+ virtual future<> on_alive(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr state, gms::permit_id) override;

public:
// For tests only.

diff --git a/service/storage_service.hh b/service/storage_service.hh
index 8263423ad40..6180b858245 100644
--- a/service/storage_service.hh
+++ b/service/storage_service.hh
@@ -466,7 +466,7 @@ class storage_service : public service::migration_listener, public gms::i_endpoi
future<std::map<token, inet_address>> get_tablet_to_endpoint_map(table_id table);

public:
- virtual future<> on_join(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id) override;
+ virtual future<> on_join(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id) override;
/*
* Handle the reception of a new particular ApplicationState for a particular endpoint. Note that the value of the
* ApplicationState has not necessarily "changed" since the last known value, if we already received the same update
@@ -495,11 +495,11 @@ class storage_service : public service::migration_listener, public gms::i_endpoi
* Note: Any time a node state changes from STATUS_NORMAL, it will not be visible to new nodes. So it follows that
* you should never bootstrap a new node during a removenode, decommission or move.
*/
- virtual future<> on_change(gms::inet_address endpoint, const gms::application_state_map& states, gms::permit_id) override;
- virtual future<> on_alive(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id) override;
- virtual future<> on_dead(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id) override;
- virtual future<> on_remove(gms::inet_address endpoint, gms::permit_id) override;
- virtual future<> on_restart(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id) override;
+ virtual future<> on_change(gms::inet_address endpoint, locator::host_id id, const gms::application_state_map& states, gms::permit_id) override;
+ virtual future<> on_alive(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr state, gms::permit_id) override;
+ virtual future<> on_dead(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr state, gms::permit_id) override;
+ virtual future<> on_remove(gms::inet_address endpoint, locator::host_id id, gms::permit_id) override;
+ virtual future<> on_restart(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr state, gms::permit_id) override;

public:
// For migration_listener
diff --git a/service/view_update_backlog_broker.hh b/service/view_update_backlog_broker.hh
index 7d54ffb055d..65e99274ec6 100644
--- a/service/view_update_backlog_broker.hh
+++ b/service/view_update_backlog_broker.hh
@@ -39,9 +39,9 @@ class view_update_backlog_broker final

seastar::future<> stop();

- virtual future<> on_change(gms::inet_address, const gms::application_state_map& states, gms::permit_id) override;
+ virtual future<> on_change(gms::inet_address, locator::host_id id, const gms::application_state_map& states, gms::permit_id) override;

- virtual future<> on_remove(gms::inet_address, gms::permit_id) override;
+ virtual future<> on_remove(gms::inet_address, locator::host_id id, gms::permit_id) override;
};

}
diff --git a/streaming/stream_manager.hh b/streaming/stream_manager.hh
index 927c3b24170..6bd589f39e2 100644
--- a/streaming/stream_manager.hh
+++ b/streaming/stream_manager.hh
@@ -172,9 +172,9 @@ class stream_manager : public gms::i_endpoint_state_change_subscriber, public en

reader_consumer_v2 make_streaming_consumer(
uint64_t estimated_partitions, stream_reason, service::frozen_topology_guard);
public:

- virtual future<> on_dead(inet_address endpoint, endpoint_state_ptr state, gms::permit_id) override;
- virtual future<> on_remove(inet_address endpoint, gms::permit_id) override;
- virtual future<> on_restart(inet_address endpoint, endpoint_state_ptr ep_state, gms::permit_id) override;
+ virtual future<> on_dead(inet_address endpoint, locator::host_id id, endpoint_state_ptr state, gms::permit_id) override;
+ virtual future<> on_remove(inet_address endpoint, locator::host_id id, gms::permit_id) override;
+ virtual future<> on_restart(inet_address endpoint, locator::host_id id, endpoint_state_ptr ep_state, gms::permit_id) override;

private:
void fail_all_sessions();
diff --git a/cdc/generation.cc b/cdc/generation.cc
index b21198af5de..6c781eed7d6 100644
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -841,18 +841,18 @@ future<> generation_service::leave_ring() {
co_await _gossiper.unregister_(shared_from_this());
}

-future<> generation_service::on_join(gms::inet_address ep, gms::endpoint_state_ptr ep_state, gms::permit_id pid) {
- return on_change(ep, ep_state->get_application_state_map(), pid);
+future<> generation_service::on_join(gms::inet_address ep, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id pid) {
+ return on_change(ep, id, ep_state->get_application_state_map(), pid);
}

-future<> generation_service::on_change(gms::inet_address ep, const gms::application_state_map& states, gms::permit_id pid) {
+future<> generation_service::on_change(gms::inet_address ep, locator::host_id id, const gms::application_state_map& states, gms::permit_id pid) {
assert_shard_zero(__PRETTY_FUNCTION__);

if (_raft_topology_change_enabled()) {
return make_ready_future<>();
}

- return on_application_state_change(ep, states, gms::application_state::CDC_GENERATION_ID, pid, [this] (gms::inet_address ep, const gms::versioned_value& v, gms::permit_id) {
+ return on_application_state_change(ep, id, states, gms::application_state::CDC_GENERATION_ID, pid, [this] (gms::inet_address ep, locator::host_id id, const gms::versioned_value& v, gms::permit_id) {
auto gen_id = gms::versioned_value::cdc_generation_id_from_string(v.value());
cdc_log.debug("Endpoint: {}, CDC generation ID change: {}", ep, gen_id);

diff --git a/gms/endpoint_state.cc b/gms/endpoint_state.cc
index 5c9ef4b90ed..0770f239afd 100644
--- a/gms/endpoint_state.cc
+++ b/gms/endpoint_state.cc
@@ -80,11 +80,12 @@ std::unordered_set<dht::token> endpoint_state::get_tokens() const {
}

future<> i_endpoint_state_change_subscriber::on_application_state_change(inet_address endpoint,
+ locator::host_id id,
const gms::application_state_map& states, application_state app_state, permit_id pid,
- std::function<future<>(inet_address, const gms::versioned_value&, permit_id)> func) {
+ std::function<future<>(inet_address, locator::host_id, const gms::versioned_value&, permit_id)> func) {
auto it = states.find(app_state);
if (it != states.end()) {
- return func(endpoint, it->second, pid);
+ return func(endpoint, id, it->second, pid);
}
return make_ready_future<>();
}
diff --git a/gms/feature_service.cc b/gms/feature_service.cc
index eaa466f8741..52044722147 100644
--- a/gms/feature_service.cc
+++ b/gms/feature_service.cc
@@ -262,10 +262,10 @@ class persistent_feature_enabler : public i_endpoint_state_change_subscriber {
, _ss(ss)
{
}
- future<> on_join(inet_address ep, endpoint_state_ptr state, gms::permit_id) override {
+ future<> on_join(inet_address ep, locator::host_id id, endpoint_state_ptr state, gms::permit_id) override {
return enable_features();
}
- future<> on_change(inet_address ep, const gms::application_state_map& states, gms::permit_id pid) override {
+ future<> on_change(inet_address ep, locator::host_id id, const gms::application_state_map& states, gms::permit_id pid) override {
if (states.contains(application_state::SUPPORTED_FEATURES)) {
return enable_features();
}
diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index 78a9ec6edba..a11fef48dce 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -703,10 +703,12 @@ future<> gossiper::remove_endpoint(inet_address endpoint, permit_id pid) {
auto permit = co_await lock_endpoint(endpoint, pid);
pid = permit.id();

+ auto state = get_endpoint_state_ptr(endpoint);
+
// do subscribers first so anything in the subscriber that depends on gossiper state won't get confused
try {
- co_await _subscribers.for_each([endpoint, pid] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
- return subscriber->on_remove(endpoint, pid);
+ co_await _subscribers.for_each([endpoint, state, pid] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
+ return subscriber->on_remove(endpoint, state ? state->get_host_id() : locator::host_id{}, pid);
});
} catch (...) {
logger.warn("Fail to call on_remove callback: {}", std::current_exception());
@@ -718,8 +720,6 @@ future<> gossiper::remove_endpoint(inet_address endpoint, permit_id pid) {
logger.info("removed {} from _seeds, updated _seeds list = {}", endpoint, _seeds);
}

- auto state = get_endpoint_state_ptr(endpoint);
-
if (!state) {

logger.warn("There is no state for the removed IP {}", endpoint);

co_return;
@@ -1769,7 +1769,7 @@ future<> gossiper::real_mark_alive(inet_address addr) {

logger.info("InetAddress {}/{} is now UP, status = {}", es->get_host_id(), addr, status);

co_await _subscribers.for_each([addr, es, pid = permit.id()] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) -> future<> {

- co_await subscriber->on_alive(addr, es, pid);
+ co_await subscriber->on_alive(addr, es->get_host_id(), es, pid);

logger.trace("Notified {}", fmt::ptr(subscriber.get()));
});
}

@@ -1812,7 +1812,7 @@ future<> gossiper::handle_major_state_change(inet_address ep, endpoint_state eps
if (eps_old) {
// the node restarted: it is up to the subscriber to take whatever action is necessary
co_await _subscribers.for_each([ep, eps_old, pid] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
- return subscriber->on_restart(ep, eps_old, pid);
+ return subscriber->on_restart(ep, eps_old->get_host_id(), eps_old, pid);
});
}

@@ -1828,7 +1828,7 @@ future<> gossiper::handle_major_state_change(inet_address ep, endpoint_state eps
}

co_await _subscribers.for_each([ep, ep_state, pid] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
- return subscriber->on_join(ep, ep_state, pid);
+ return subscriber->on_join(ep, ep_state->get_host_id(), ep_state, pid);
});

// check this at the end so nodes will learn about the endpoint
@@ -1909,6 +1909,8 @@ future<> gossiper::apply_new_states(inet_address addr, endpoint_state local_stat
ep = std::current_exception();
}

+ auto host_id = local_state.get_host_id();
+
// We must replicate endpoint states before listeners run.
// Exceptions during replication will cause abort because node's state
// would be inconsistent across shards. Changes listeners depend on state
@@ -1924,7 +1926,7 @@ future<> gossiper::apply_new_states(inet_address addr, endpoint_state local_stat
// Some values are set only once, so listeners would never be re-run.
// Listeners should decide which failures are non-fatal and swallow them.
try {
- co_await do_on_change_notifications(addr, changed, pid);
+ co_await do_on_change_notifications(addr, host_id, changed, pid);
} catch (...) {
auto msg = format("Gossip change listener failed: {}", std::current_exception());
if (_abort_source.abort_requested()) {
@@ -1937,18 +1939,18 @@ future<> gossiper::apply_new_states(inet_address addr, endpoint_state local_stat
maybe_rethrow_exception(std::move(ep));
}

-future<> gossiper::do_on_change_notifications(inet_address addr, const gms::application_state_map& states, permit_id pid) const {
+future<> gossiper::do_on_change_notifications(inet_address addr, locator::host_id id, const gms::application_state_map& states, permit_id pid) const {
co_await _subscribers.for_each([&] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
// Once _abort_source is aborted, don't attempt to process any further notifications
// because that would violate monotonicity due to partially failed notification.
_abort_source.check();
- return subscriber->on_change(addr, states, pid);
+ return subscriber->on_change(addr, id, states, pid);
});
}

future<> gossiper::do_on_dead_notifications(inet_address addr, endpoint_state_ptr state, permit_id pid) const {
co_await _subscribers.for_each([addr, state = std::move(state), pid] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
- return subscriber->on_dead(addr, state, pid);
+ return subscriber->on_dead(addr, state->get_host_id(), state, pid);
});
}

@@ -2270,7 +2272,7 @@ future<> gossiper::add_local_application_state(application_state_map states) {
// now we might defer again, so this could be reordered. But we've
// ensured the whole set of values are monotonically versioned and
// applied to endpoint state.
- co_await gossiper.do_on_change_notifications(ep_addr, states, permit.id());
+ co_await gossiper.do_on_change_notifications(ep_addr, gossiper.my_host_id(), states, permit.id());
});
} catch (...) {
logger.warn("Fail to apply application_state: {}", std::current_exception());
diff --git a/repair/row_level.cc b/repair/row_level.cc
index b562c708dfe..7dccf992920 100644
--- a/repair/row_level.cc
+++ b/repair/row_level.cc
@@ -3203,17 +3203,20 @@ class row_level_repair_gossip_helper : public gms::i_endpoint_state_change_subsc

}
virtual future<> on_dead(
gms::inet_address endpoint,

+ locator::host_id id,

gms::endpoint_state_ptr state,
gms::permit_id) override {

return remove_row_level_repair(_repair_service.get_gossiper().get_host_id(endpoint));
}
virtual future<> on_remove(
gms::inet_address endpoint,
+ locator::host_id id,
gms::permit_id) override {
return remove_row_level_repair(_repair_service.get_gossiper().get_host_id(endpoint));
}
virtual future<> on_restart(
gms::inet_address endpoint,
+ locator::host_id id,
gms::endpoint_state_ptr ep_state,
gms::permit_id) override {
return remove_row_level_repair(_repair_service.get_gossiper().get_host_id(endpoint));
diff --git a/service/migration_manager.cc b/service/migration_manager.cc
index 67a0c4d5a44..9da50bf51f0 100644
--- a/service/migration_manager.cc
+++ b/service/migration_manager.cc
@@ -1151,13 +1151,13 @@ future<column_mapping> get_column_mapping(db::system_keyspace& sys_ks, table_id
return db::schema_tables::get_column_mapping(sys_ks, table_id, v);
}

-future<> migration_manager::on_join(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id) {
+future<> migration_manager::on_join(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id) {
schedule_schema_pull(ep_state->get_host_id(), *ep_state);
return make_ready_future();
}

-future<> migration_manager::on_change(gms::inet_address endpoint, const gms::application_state_map& states, gms::permit_id pid) {
- return on_application_state_change(endpoint, states, gms::application_state::SCHEMA, pid, [this] (gms::inet_address endpoint, const gms::versioned_value&, gms::permit_id) {
+future<> migration_manager::on_change(gms::inet_address endpoint, locator::host_id id, const gms::application_state_map& states, gms::permit_id pid) {
+ return on_application_state_change(endpoint, id, states, gms::application_state::SCHEMA, pid, [this] (gms::inet_address endpoint, locator::host_id id, const gms::versioned_value&, gms::permit_id) {
auto ep_state = _gossiper.get_endpoint_state_ptr(endpoint);
if (!ep_state || _gossiper.is_dead_state(*ep_state)) {
mlogger.debug("Ignoring state change for dead or unknown endpoint: {}", endpoint);
@@ -1172,7 +1172,7 @@ future<> migration_manager::on_change(gms::inet_address endpoint, const gms::app
});
}

-future<> migration_manager::on_alive(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id) {
+future<> migration_manager::on_alive(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr state, gms::permit_id) {
schedule_schema_pull(state->get_host_id(), *state);
return make_ready_future();
}
diff --git a/service/misc_services.cc b/service/misc_services.cc
index a2e6d558c33..691f1a4b3d8 100644
--- a/service/misc_services.cc
+++ b/service/misc_services.cc
@@ -266,8 +266,8 @@ future<> view_update_backlog_broker::stop() {
});
}

-future<> view_update_backlog_broker::on_change(gms::inet_address endpoint, const gms::application_state_map& states, gms::permit_id pid) {
- return on_application_state_change(endpoint, states, gms::application_state::VIEW_BACKLOG, pid, [this] (gms::inet_address endpoint, const gms::versioned_value& value, gms::permit_id) {
+future<> view_update_backlog_broker::on_change(gms::inet_address endpoint, locator::host_id id, const gms::application_state_map& states, gms::permit_id pid) {
+ return on_application_state_change(endpoint, id, states, gms::application_state::VIEW_BACKLOG, pid, [this] (gms::inet_address endpoint, locator::host_id id, const gms::versioned_value& value, gms::permit_id) {
if (utils::get_local_injector().enter("skip_updating_local_backlog_via_view_update_backlog_broker")) {
return make_ready_future<>();
}
@@ -304,7 +304,7 @@ future<> view_update_backlog_broker::on_change(gms::inet_address endpoint, const
});
}

-future<> view_update_backlog_broker::on_remove(gms::inet_address endpoint, gms::permit_id) {
+future<> view_update_backlog_broker::on_remove(gms::inet_address endpoint, locator::host_id id, gms::permit_id) {
_sp.local()._view_update_backlogs.erase(_gossiper.get_host_id(endpoint));
return make_ready_future();
}
diff --git a/service/storage_service.cc b/service/storage_service.cc
index fe7470ceb5a..7898ba15d57 100644
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -971,17 +971,17 @@ class storage_service::ip_address_updater: public gms::i_endpoint_state_change_s
{}

virtual future<>
- on_join(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id) override {
+ on_join(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id) override {
return on_endpoint_change(endpoint, ep_state, permit_id, "on_join");
}

virtual future<>
- on_alive(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id) override {
+ on_alive(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id) override {

return on_endpoint_change(endpoint, ep_state, permit_id, "on_alive");
}

virtual future<>
- on_restart(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id) override {
+ on_restart(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id) override {

return on_endpoint_change(endpoint, ep_state, permit_id, "on_restart");
}

};
@@ -2575,12 +2575,12 @@ future<> storage_service::handle_state_removed(inet_address endpoint, std::vecto
}
}

-future<> storage_service::on_join(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id pid) {
+future<> storage_service::on_join(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id pid) {
slogger.debug("endpoint={} on_join: permit_id={}", endpoint, pid);
- co_await on_change(endpoint, ep_state->get_application_state_map(), pid);
+ co_await on_change(endpoint, id, ep_state->get_application_state_map(), pid);
}

-future<> storage_service::on_alive(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id pid) {
+future<> storage_service::on_alive(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr state, gms::permit_id pid) {
const auto& tm = get_token_metadata();
const auto host_id = state->get_host_id();

slogger.debug("endpoint={}/{} on_alive: permit_id={}", endpoint, host_id, pid);

@@ -2607,14 +2607,14 @@ future<std::optional<gms::inet_address>> storage_service::get_ip_from_peers_tabl
co_return std::nullopt;
}

-future<> storage_service::on_change(gms::inet_address endpoint, const gms::application_state_map& states_, gms::permit_id pid) {
+future<> storage_service::on_change(gms::inet_address endpoint, locator::host_id id, const gms::application_state_map& states_, gms::permit_id pid) {
// copy the states map locally since the coroutine may yield
auto states = states_;
slogger.debug("endpoint={} on_change: states={}, permit_id={}", endpoint, states, pid);
if (raft_topology_change_enabled()) {
slogger.debug("ignore status changes since topology changes are using raft");
} else {
- co_await on_application_state_change(endpoint, states, application_state::STATUS, pid, [this] (inet_address endpoint, const gms::versioned_value& value, gms::permit_id pid) -> future<> {
+ co_await on_application_state_change(endpoint, id, states, application_state::STATUS, pid, [this] (inet_address endpoint, locator::host_id id, const gms::versioned_value& value, gms::permit_id pid) -> future<> {
std::vector<sstring> pieces;
boost::split(pieces, value.value(), boost::is_any_of(versioned_value::DELIMITER));
if (pieces.empty()) {
@@ -2683,7 +2683,7 @@ future<> storage_service::maybe_reconnect_to_preferred_ip(inet_address ep, inet_
}

-future<> storage_service::on_remove(gms::inet_address endpoint, gms::permit_id pid) {
+future<> storage_service::on_remove(gms::inet_address endpoint, locator::host_id id, gms::permit_id pid) {
slogger.debug("endpoint={} on_remove: permit_id={}", endpoint, pid);

if (raft_topology_change_enabled()) {
@@ -2715,16 +2715,16 @@ future<> storage_service::on_remove(gms::inet_address endpoint, gms::permit_id p
co_await replicate_to_all_cores(std::move(tmptr));
}

-future<> storage_service::on_dead(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id pid) {
+future<> storage_service::on_dead(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr state, gms::permit_id pid) {
slogger.debug("endpoint={} on_dead: permit_id={}", endpoint, pid);
return notify_down(endpoint);
}

-future<> storage_service::on_restart(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id pid) {
+future<> storage_service::on_restart(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr state, gms::permit_id pid) {

slogger.debug("endpoint={} on_restart: permit_id={}", endpoint, pid);
// If we have restarted before the node was even marked down, we need to reset the connection pool

if (endpoint != get_broadcast_address() && _gossiper.is_alive(state->get_host_id())) {

- return on_dead(endpoint, state, pid);
+ return on_dead(endpoint, id, state, pid);
}
return make_ready_future();
}
diff --git a/streaming/stream_manager.cc b/streaming/stream_manager.cc
index f428f1a0c8b..1ec325a885b 100644
--- a/streaming/stream_manager.cc
+++ b/streaming/stream_manager.cc
@@ -345,7 +345,7 @@ void stream_manager::fail_all_sessions() {
}
}

-future<> stream_manager::on_remove(inet_address endpoint, gms::permit_id) {
+future<> stream_manager::on_remove(inet_address endpoint, locator::host_id id, gms::permit_id) {
if (has_peer(endpoint)) {
sslog.info("stream_manager: Close all stream_session with peer = {} in on_remove", endpoint);
//FIXME: discarded future.
@@ -358,7 +358,7 @@ future<> stream_manager::on_remove(inet_address endpoint, gms::permit_id) {
return make_ready_future();
}

-future<> stream_manager::on_restart(inet_address endpoint, endpoint_state_ptr ep_state, gms::permit_id) {
+future<> stream_manager::on_restart(inet_address endpoint, locator::host_id id, endpoint_state_ptr ep_state, gms::permit_id) {
if (has_peer(endpoint)) {
sslog.info("stream_manager: Close all stream_session with peer = {} in on_restart", endpoint);
//FIXME: discarded future.
@@ -371,7 +371,7 @@ future<> stream_manager::on_restart(inet_address endpoint, endpoint_state_ptr ep
return make_ready_future();
}

-future<> stream_manager::on_dead(inet_address endpoint, endpoint_state_ptr ep_state, gms::permit_id) {
+future<> stream_manager::on_dead(inet_address endpoint, locator::host_id id, endpoint_state_ptr ep_state, gms::permit_id) {
if (has_peer(endpoint)) {
sslog.info("stream_manager: Close all stream_session with peer = {} in on_dead", endpoint);
//FIXME: discarded future.
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:15:16 AMFeb 24

to scylladb-dev@googlegroups.com

host_id is already available at this point.
---
gms/gossiper.cc | 4 ----
1 file changed, 4 deletions(-)

diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index d4a6b498b51..72de339cb6d 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -784,10 +784,6 @@ future<> gossiper::do_status_check() {

// check for dead state removal

auto expire_time = get_expire_time_for_endpoint(host_id);
if (!is_alive && (now > expire_time)) {

- const auto host_id = eps->get_host_id();
- if (!host_id) {
- on_internal_error_noexcept(logger, format("Endpoint {} is dead and expired, but unexpecteduly, it has no HOST_ID in endpoint state", endpoint));
- }
const auto* node = get_token_metadata_ptr()->get_topology().find_node(host_id);
if (!host_id || !node || !node->is_member()) {
logger.debug("time is expiring for endpoint : {} ({})", endpoint, expire_time.time_since_epoch().count());
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:15:16 AMFeb 24

to scylladb-dev@googlegroups.com

Now that we have host ids in endpoint state change subscribers some of
them can be simplified by using the id directly instead of locking it up
by ip.
---
service/storage_service.hh | 8 ++--
streaming/stream_manager.hh | 4 +-
repair/row_level.cc | 6 +--
service/migration_manager.cc | 13 +++---
service/misc_services.cc | 4 +-
service/storage_service.cc | 76 +++++++++++++++---------------------
streaming/stream_manager.cc | 44 ++++++++++-----------
7 files changed, 70 insertions(+), 85 deletions(-)

diff --git a/service/storage_service.hh b/service/storage_service.hh
index 6180b858245..429926fbcef 100644
--- a/service/storage_service.hh
+++ b/service/storage_service.hh
@@ -548,7 +548,7 @@ class storage_service : public service::migration_listener, public gms::i_endpoi
*
* @param endpoint bootstrapping node
*/
- future<> handle_state_bootstrap(inet_address endpoint, gms::permit_id);
+ future<> handle_state_bootstrap(inet_address endpoint, locator::host_id id, gms::permit_id);

/**
* Handle node move to normal state. That is, node is entering token ring and participating
@@ -556,7 +556,7 @@ class storage_service : public service::migration_listener, public gms::i_endpoi
*
* @param endpoint node
*/
- future<> handle_state_normal(inet_address endpoint, gms::permit_id);
+ future<> handle_state_normal(inet_address endpoint, locator::host_id id, gms::permit_id);

/**
* Handle node leaving the ring. This will happen when a node is decommissioned
@@ -564,7 +564,7 @@ class storage_service : public service::migration_listener, public gms::i_endpoi
* @param endpoint If reason for leaving is decommission, endpoint is the leaving node.
* @param pieces STATE_LEFT,token
*/
- future<> handle_state_left(inet_address endpoint, std::vector<sstring> pieces, gms::permit_id);
+ future<> handle_state_left(inet_address endpoint, locator::host_id id, std::vector<sstring> pieces, gms::permit_id);

/**
* Handle notification that a node being actively removed from the ring via 'removenode'
@@ -572,7 +572,7 @@ class storage_service : public service::migration_listener, public gms::i_endpoi
* @param endpoint node
* @param pieces is REMOVED_TOKEN (node is gone)
*/
- future<> handle_state_removed(inet_address endpoint, std::vector<sstring> pieces, gms::permit_id);
+ future<> handle_state_removed(inet_address endpoint, locator::host_id id, std::vector<sstring> pieces, gms::permit_id);

private:
future<> excise(std::unordered_set<token> tokens, inet_address endpoint_ip, locator::host_id endpoint_hid,
diff --git a/streaming/stream_manager.hh b/streaming/stream_manager.hh
index 6bd589f39e2..284bb6d971e 100644
--- a/streaming/stream_manager.hh
+++ b/streaming/stream_manager.hh
@@ -178,8 +178,8 @@ class stream_manager : public gms::i_endpoint_state_change_subscriber, public en

private:
void fail_all_sessions();
- void fail_sessions(inet_address endpoint);
- bool has_peer(inet_address endpoint) const;
+ void fail_sessions(locator::host_id id);
+ bool has_peer(locator::host_id id) const;

void init_messaging_service_handler(abort_source& as);
future<> uninit_messaging_service_handler();
diff --git a/repair/row_level.cc b/repair/row_level.cc
index 7dccf992920..39a2e20e7aa 100644
--- a/repair/row_level.cc
+++ b/repair/row_level.cc
@@ -3206,20 +3206,20 @@ class row_level_repair_gossip_helper : public gms::i_endpoint_state_change_subsc

locator::host_id id,
gms::endpoint_state_ptr state,

gms::permit_id) override {
- return remove_row_level_repair(_repair_service.get_gossiper().get_host_id(endpoint));
+ return remove_row_level_repair(id);

}
virtual future<> on_remove(
gms::inet_address endpoint,

locator::host_id id,
gms::permit_id) override {

- return remove_row_level_repair(_repair_service.get_gossiper().get_host_id(endpoint));
+ return remove_row_level_repair(id);

}
virtual future<> on_restart(
gms::inet_address endpoint,

locator::host_id id,
gms::endpoint_state_ptr ep_state,

gms::permit_id) override {
- return remove_row_level_repair(_repair_service.get_gossiper().get_host_id(endpoint));
+ return remove_row_level_repair(id);
}
};

diff --git a/service/migration_manager.cc b/service/migration_manager.cc
index 9da50bf51f0..d2e4cb12f57 100644
--- a/service/migration_manager.cc
+++ b/service/migration_manager.cc
@@ -1152,28 +1152,27 @@ future<column_mapping> get_column_mapping(db::system_keyspace& sys_ks, table_id

}

future<> migration_manager::on_join(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id) {

- schedule_schema_pull(ep_state->get_host_id(), *ep_state);
+ schedule_schema_pull(id, *ep_state);
return make_ready_future();
}

future<> migration_manager::on_change(gms::inet_address endpoint, locator::host_id id, const gms::application_state_map& states, gms::permit_id pid) {

return on_application_state_change(endpoint, id, states, gms::application_state::SCHEMA, pid, [this] (gms::inet_address endpoint, locator::host_id id, const gms::versioned_value&, gms::permit_id) {

- auto ep_state = _gossiper.get_endpoint_state_ptr(endpoint);
+ auto ep_state = _gossiper.get_endpoint_state_ptr(id);
if (!ep_state || _gossiper.is_dead_state(*ep_state)) {
- mlogger.debug("Ignoring state change for dead or unknown endpoint: {}", endpoint);
+ mlogger.debug("Ignoring state change for dead or unknown endpoint: {}", id);
return make_ready_future();
}
- const auto host_id = _gossiper.get_host_id(endpoint);
- const auto* node = _storage_proxy.get_token_metadata_ptr()->get_topology().find_node(host_id);
+ const auto* node = _storage_proxy.get_token_metadata_ptr()->get_topology().find_node(id);

if (node && node->is_member()) {

- schedule_schema_pull(host_id, *ep_state);
+ schedule_schema_pull(id, *ep_state);
}
return make_ready_future<>();
});

}

future<> migration_manager::on_alive(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr state, gms::permit_id) {

- schedule_schema_pull(state->get_host_id(), *state);
+ schedule_schema_pull(id, *state);

return make_ready_future();
}

diff --git a/service/misc_services.cc b/service/misc_services.cc

index 691f1a4b3d8..b3a3f844699 100644
--- a/service/misc_services.cc
+++ b/service/misc_services.cc
@@ -294,7 +294,7 @@ future<> view_update_backlog_broker::on_change(gms::inet_address endpoint, locat
return make_ready_future();
}
auto backlog = view_update_backlog_timestamped{db::view::update_backlog{current, max}, ticks};
- return _sp.invoke_on_all([id = _gossiper.get_host_id(endpoint), backlog] (service::storage_proxy& sp) {
+ return _sp.invoke_on_all([id, backlog] (service::storage_proxy& sp) {
auto[it, inserted] = sp._view_update_backlogs.try_emplace(id, backlog);
if (!inserted && it->second.ts < backlog.ts) {
it->second = backlog;
@@ -305,7 +305,7 @@ future<> view_update_backlog_broker::on_change(gms::inet_address endpoint, locat

}

future<> view_update_backlog_broker::on_remove(gms::inet_address endpoint, locator::host_id id, gms::permit_id) {

- _sp.local()._view_update_backlogs.erase(_gossiper.get_host_id(endpoint));
+ _sp.local()._view_update_backlogs.erase(id);

return make_ready_future();
}

diff --git a/service/storage_service.cc b/service/storage_service.cc

index 7898ba15d57..07a7375eacb 100644
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -911,12 +911,7 @@ class storage_service::ip_address_updater: public gms::i_endpoint_state_change_s
storage_service& _ss;

future<>
- on_endpoint_change(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id, const char* ev) {
- auto app_state_ptr = ep_state->get_application_state_ptr(gms::application_state::HOST_ID);
- if (!app_state_ptr) {
- co_return;
- }
- locator::host_id id(utils::UUID(app_state_ptr->value()));
+ on_endpoint_change(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id, const char* ev) {
rslog.debug("ip_address_updater::on_endpoint_change({}) {} {}", ev, endpoint, id);

// If id maps to different ip in peers table it needs to be updated which is done by sync_raft_topology_nodes below
@@ -972,17 +967,17 @@ class storage_service::ip_address_updater: public gms::i_endpoint_state_change_s

virtual future<>

on_join(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id) override {

- return on_endpoint_change(endpoint, ep_state, permit_id, "on_join");
+ return on_endpoint_change(endpoint, id, ep_state, permit_id, "on_join");
}

virtual future<>

on_alive(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id) override {

- return on_endpoint_change(endpoint, ep_state, permit_id, "on_alive");
+ return on_endpoint_change(endpoint, id, ep_state, permit_id, "on_alive");
}

virtual future<>

on_restart(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id) override {

- return on_endpoint_change(endpoint, ep_state, permit_id, "on_restart");
+ return on_endpoint_change(endpoint, id, ep_state, permit_id, "on_restart");
}
};

@@ -2245,19 +2240,18 @@ storage_service::get_range_to_address_map(locator::effective_replication_map_ptr
std::ranges::to<std::unordered_map>();
}

-future<> storage_service::handle_state_bootstrap(inet_address endpoint, gms::permit_id pid) {
- slogger.debug("endpoint={} handle_state_bootstrap: permit_id={}", endpoint, pid);
+future<> storage_service::handle_state_bootstrap(inet_address endpoint, locator::host_id host_id, gms::permit_id pid) {
+ slogger.debug("endpoint={}/{} handle_state_bootstrap: permit_id={}", endpoint, host_id, pid);
// explicitly check for TOKENS, because a bootstrapping node might be bootstrapping in legacy mode; that is, not using vnodes and no token specified
auto tokens = get_tokens_for(endpoint);

- slogger.debug("Node {} state bootstrapping, token {}", endpoint, tokens);
+ slogger.debug("Node {}/{} state bootstrapping, token {}", endpoint, host_id, tokens);

// if this node is present in token metadata, either we have missed intermediate states
// or the node had crashed. Print warning if needed, clear obsolete stuff and
// continue.
auto tmlock = co_await get_token_metadata_lock();
auto tmptr = co_await get_mutable_token_metadata_ptr();
- const auto host_id = _gossiper.get_host_id(endpoint);
if (tmptr->is_normal_token_owner(host_id)) {
// If isLeaving is false, we have missed both LEAVING and LEFT. However, if
// isLeaving is true, we have only missed LEFT. Waiting time between completing
@@ -2276,12 +2270,12 @@ future<> storage_service::handle_state_bootstrap(inet_address endpoint, gms::per
co_await replicate_to_all_cores(std::move(tmptr));
}

-future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit_id pid) {
- slogger.debug("endpoint={} handle_state_normal: permit_id={}", endpoint, pid);
+future<> storage_service::handle_state_normal(inet_address endpoint, locator::host_id host_id, gms::permit_id pid) {
+ slogger.debug("endpoint={}/{} handle_state_normal: permit_id={}", endpoint, host_id, pid);

auto tokens = get_tokens_for(endpoint);

- slogger.info("Node {} is in normal state, tokens: {}", endpoint, tokens);
+ slogger.info("Node {}/{} is in normal state, tokens: {}", endpoint, host_id, tokens);

auto tmlock = std::make_unique<token_metadata_lock>(co_await get_token_metadata_lock());
auto tmptr = co_await get_mutable_token_metadata_ptr();
@@ -2300,7 +2294,6 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
endpoints_to_remove.insert(node);
};
// Order Matters, TM.updateHostID() should be called before TM.updateNormalToken(), (see CASSANDRA-4300).
- auto host_id = _gossiper.get_host_id(endpoint);
if (tmptr->is_normal_token_owner(host_id)) {
slogger.info("handle_state_normal: node {}/{} was already a normal token owner", endpoint, host_id);
}
@@ -2527,14 +2520,13 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
slogger.info("handle_state_normal for {}/{} finished", endpoint, host_id);
}

-future<> storage_service::handle_state_left(inet_address endpoint, std::vector<sstring> pieces, gms::permit_id pid) {
- slogger.debug("endpoint={} handle_state_left: permit_id={}", endpoint, pid);
+future<> storage_service::handle_state_left(inet_address endpoint, locator::host_id host_id, std::vector<sstring> pieces, gms::permit_id pid) {
+ slogger.debug("endpoint={}/{} handle_state_left: permit_id={}", endpoint, host_id, pid);

if (pieces.size() < 2) {
slogger.warn("Fail to handle_state_left endpoint={} pieces={}", endpoint, pieces);
co_return;
}
- const auto host_id = _gossiper.get_host_id(endpoint);
auto tokens = get_tokens_for(endpoint);
slogger.debug("Node {}/{} state left, tokens {}", endpoint, host_id, tokens);
if (tokens.empty()) {
@@ -2551,10 +2543,10 @@ future<> storage_service::handle_state_left(inet_address endpoint, std::vector<s
co_await excise(tokens, endpoint, host_id, extract_expire_time(pieces), pid);
}

-future<> storage_service::handle_state_removed(inet_address endpoint, std::vector<sstring> pieces, gms::permit_id pid) {
- slogger.debug("endpoint={} handle_state_removed: permit_id={}", endpoint, pid);
+future<> storage_service::handle_state_removed(inet_address endpoint, locator::host_id host_id, std::vector<sstring> pieces, gms::permit_id pid) {
+ slogger.debug("endpoint={}/{} handle_state_removed: permit_id={}", endpoint, host_id, pid);

- if (endpoint == get_broadcast_address()) {

+ if (is_me(host_id)) {
slogger.info("Received removenode gossip about myself. Is this node rejoining after an explicit removenode?");
try {
co_await drain();
@@ -2564,7 +2556,6 @@ future<> storage_service::handle_state_removed(inet_address endpoint, std::vecto
}
co_return;
}
- const auto host_id = _gossiper.get_host_id(endpoint);
if (get_token_metadata().is_normal_token_owner(host_id)) {

auto remove_tokens = get_token_metadata().get_tokens(host_id);
std::unordered_set<token> tmp(remove_tokens.begin(), remove_tokens.end());

@@ -2580,9 +2571,8 @@ future<> storage_service::on_join(gms::inet_address endpoint, locator::host_id i

co_await on_change(endpoint, id, ep_state->get_application_state_map(), pid);
}

-future<> storage_service::on_alive(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr state, gms::permit_id pid) {
+future<> storage_service::on_alive(gms::inet_address endpoint, locator::host_id host_id, gms::endpoint_state_ptr state, gms::permit_id pid) {

const auto& tm = get_token_metadata();

- const auto host_id = state->get_host_id();

slogger.debug("endpoint={}/{} on_alive: permit_id={}", endpoint, host_id, pid);

const auto* node = tm.get_topology().find_node(host_id);
if (node && node->is_member()) {

@@ -2607,14 +2597,14 @@ future<std::optional<gms::inet_address>> storage_service::get_ip_from_peers_tabl
co_return std::nullopt;
}

-future<> storage_service::on_change(gms::inet_address endpoint, locator::host_id id, const gms::application_state_map& states_, gms::permit_id pid) {
+future<> storage_service::on_change(gms::inet_address endpoint, locator::host_id host_id, const gms::application_state_map& states_, gms::permit_id pid) {

// copy the states map locally since the coroutine may yield
auto states = states_;
slogger.debug("endpoint={} on_change: states={}, permit_id={}", endpoint, states, pid);
if (raft_topology_change_enabled()) {
slogger.debug("ignore status changes since topology changes are using raft");
} else {

- co_await on_application_state_change(endpoint, id, states, application_state::STATUS, pid, [this] (inet_address endpoint, locator::host_id id, const gms::versioned_value& value, gms::permit_id pid) -> future<> {
+ co_await on_application_state_change(endpoint, host_id, states, application_state::STATUS, pid, [this] (inet_address endpoint, locator::host_id id, const gms::versioned_value& value, gms::permit_id pid) -> future<> {

std::vector<sstring> pieces;
boost::split(pieces, value.value(), boost::is_any_of(versioned_value::DELIMITER));
if (pieces.empty()) {

@@ -2623,25 +2613,24 @@ future<> storage_service::on_change(gms::inet_address endpoint, locator::host_id
}
const sstring& move_name = pieces[0];
if (move_name == versioned_value::STATUS_BOOTSTRAPPING) {
- co_await handle_state_bootstrap(endpoint, pid);
+ co_await handle_state_bootstrap(endpoint, id, pid);
} else if (move_name == versioned_value::STATUS_NORMAL ||
move_name == versioned_value::SHUTDOWN) {
- co_await handle_state_normal(endpoint, pid);
+ co_await handle_state_normal(endpoint, id, pid);
} else if (move_name == versioned_value::REMOVED_TOKEN) {
- co_await handle_state_removed(endpoint, std::move(pieces), pid);
+ co_await handle_state_removed(endpoint, id, std::move(pieces), pid);
} else if (move_name == versioned_value::STATUS_LEFT) {
- co_await handle_state_left(endpoint, std::move(pieces), pid);
+ co_await handle_state_left(endpoint, id, std::move(pieces), pid);
} else {
co_return; // did nothing.
}
});
}
- auto ep_state = _gossiper.get_endpoint_state_ptr(endpoint);
+ auto ep_state = _gossiper.get_endpoint_state_ptr(host_id);
if (!ep_state || _gossiper.is_dead_state(*ep_state)) {
slogger.debug("Ignoring state change for dead or unknown endpoint: {}", endpoint);
co_return;
}
- const auto host_id = _gossiper.get_host_id(endpoint);

const auto& tm = get_token_metadata();

const auto* node = tm.get_topology().find_node(host_id);
// The check peers[host_id] == endpoint is needed when a node changes
@@ -2683,21 +2672,18 @@ future<> storage_service::maybe_reconnect_to_preferred_ip(inet_address ep, inet_
}

-future<> storage_service::on_remove(gms::inet_address endpoint, locator::host_id id, gms::permit_id pid) {
- slogger.debug("endpoint={} on_remove: permit_id={}", endpoint, pid);
+future<> storage_service::on_remove(gms::inet_address endpoint, locator::host_id host_id, gms::permit_id pid) {
+ slogger.debug("endpoint={}/{} on_remove: permit_id={}", endpoint, host_id, pid);

if (raft_topology_change_enabled()) {
slogger.debug("ignore on_remove since topology changes are using raft");
co_return;
}

- locator::host_id host_id;

- try {
- // It seems gossiper does not check for endpoint existence before calling the callback
- // so the lookup may fail, but there is nothing to do in this case.
- host_id = _gossiper.get_host_id(endpoint);
- } catch (...) {
+ // It seems gossiper does not check for endpoint existence before calling the callback
+ // In this case host_id will be empty
+ if (host_id == locator::host_id{}) {
co_return;
}

@@ -2716,14 +2702,14 @@ future<> storage_service::on_remove(gms::inet_address endpoint, locator::host_id

}

future<> storage_service::on_dead(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr state, gms::permit_id pid) {

- slogger.debug("endpoint={} on_dead: permit_id={}", endpoint, pid);
+ slogger.debug("endpoint={}/{} on_dead: permit_id={}", endpoint, id, pid);
return notify_down(endpoint);

}

future<> storage_service::on_restart(gms::inet_address endpoint, locator::host_id id, gms::endpoint_state_ptr state, gms::permit_id pid) {

- slogger.debug("endpoint={} on_restart: permit_id={}", endpoint, pid);
+ slogger.debug("endpoint={}/{} on_restart: permit_id={}", endpoint, id, pid);

// If we have restarted before the node was even marked down, we need to reset the connection pool

- if (endpoint != get_broadcast_address() && _gossiper.is_alive(state->get_host_id())) {
+ if (id != my_host_id() && _gossiper.is_alive(id)) {

return on_dead(endpoint, id, state, pid);
}
return make_ready_future();
diff --git a/streaming/stream_manager.cc b/streaming/stream_manager.cc

index 1ec325a885b..39eb572456d 100644
--- a/streaming/stream_manager.cc
+++ b/streaming/stream_manager.cc
@@ -302,10 +302,10 @@ stream_bytes stream_manager::get_progress_on_local_shard() const {
return ret;
}

-bool stream_manager::has_peer(inet_address endpoint) const {
+bool stream_manager::has_peer(locator::host_id id) const {
for (auto sr : get_all_streams()) {
for (auto session : sr->get_coordinator()->get_all_stream_sessions()) {
- if (_gossiper.get_address_map().find(session->peer) == endpoint) {
+ if (session->peer == id) {
return true;
}
}
@@ -327,10 +327,10 @@ future<> stream_manager::fail_stream_plan(streaming::plan_id plan_id) {
});
}

-void stream_manager::fail_sessions(inet_address endpoint) {
+void stream_manager::fail_sessions(locator::host_id id) {
for (auto sr : get_all_streams()) {
for (auto session : sr->get_coordinator()->get_all_stream_sessions()) {
- if (_gossiper.get_address_map().find(session->peer) == endpoint) {
+ if (session->peer == id) {
session->close_session(stream_session_state::FAILED);
}
}
@@ -346,39 +346,39 @@ void stream_manager::fail_all_sessions() {

}

future<> stream_manager::on_remove(inet_address endpoint, locator::host_id id, gms::permit_id) {

- if (has_peer(endpoint)) {
- sslog.info("stream_manager: Close all stream_session with peer = {} in on_remove", endpoint);
+ if (has_peer(id)) {
+ sslog.info("stream_manager: Close all stream_session with peer = {}/{} in on_remove", endpoint, id);
//FIXME: discarded future.
- (void)container().invoke_on_all([endpoint] (auto& sm) {
- sm.fail_sessions(endpoint);
- }).handle_exception([endpoint] (auto ep) {
- sslog.warn("stream_manager: Fail to close sessions peer = {} in on_remove", endpoint);
+ (void)container().invoke_on_all([id] (auto& sm) {
+ sm.fail_sessions(id);
+ }).handle_exception([endpoint, id] (auto ep) {
+ sslog.warn("stream_manager: Fail to close sessions peer = {}/{} in on_remove", endpoint, id);
});
}
return make_ready_future();

}

future<> stream_manager::on_restart(inet_address endpoint, locator::host_id id, endpoint_state_ptr ep_state, gms::permit_id) {

- if (has_peer(endpoint)) {
- sslog.info("stream_manager: Close all stream_session with peer = {} in on_restart", endpoint);
+ if (has_peer(id)) {
+ sslog.info("stream_manager: Close all stream_session with peer = {}/{} in on_restart", endpoint, id);
//FIXME: discarded future.
- (void)container().invoke_on_all([endpoint] (auto& sm) {
- sm.fail_sessions(endpoint);
- }).handle_exception([endpoint] (auto ep) {
- sslog.warn("stream_manager: Fail to close sessions peer = {} in on_restart", endpoint);
+ (void)container().invoke_on_all([id] (auto& sm) {
+ sm.fail_sessions(id);
+ }).handle_exception([endpoint, id] (auto ep) {
+ sslog.warn("stream_manager: Fail to close sessions peer = {}/{} in on_restart", endpoint, id);
});
}
return make_ready_future();

}

future<> stream_manager::on_dead(inet_address endpoint, locator::host_id id, endpoint_state_ptr ep_state, gms::permit_id) {

- if (has_peer(endpoint)) {
- sslog.info("stream_manager: Close all stream_session with peer = {} in on_dead", endpoint);
+ if (has_peer(id)) {
+ sslog.info("stream_manager: Close all stream_session with peer = {}/{} in on_dead", endpoint, id);
//FIXME: discarded future.
- (void)container().invoke_on_all([endpoint] (auto& sm) {
- sm.fail_sessions(endpoint);
- }).handle_exception([endpoint] (auto ep) {
- sslog.warn("stream_manager: Fail to close sessions peer = {} in on_dead", endpoint);
+ (void)container().invoke_on_all([id] (auto& sm) {
+ sm.fail_sessions(id);
+ }).handle_exception([endpoint, id] (auto ep) {
+ sslog.warn("stream_manager: Fail to close sessions peer = {}/{} in on_dead", endpoint, id);
});
}
return make_ready_future();
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Feb 24, 2025, 6:15:16 AMFeb 24

to scylladb-dev@googlegroups.com

Only host id is needed in the callback anyway.
---
message/messaging_service.hh | 3 +--
message/messaging_service_fwd.hh | 4 ++--
message/messaging_service.cc | 21 +++++++++------------
service/storage_proxy.cc | 14 +++-----------
4 files changed, 15 insertions(+), 27 deletions(-)

diff --git a/message/messaging_service.hh b/message/messaging_service.hh
index ae6ea7bab07..0b466494d67 100644
--- a/message/messaging_service.hh
+++ b/message/messaging_service.hh
@@ -261,10 +261,9 @@ class messaging_service : public seastar::async_sharded_service<messaging_servic
static constexpr int32_t current_version = 0;

struct shard_info {
- shard_info(shared_ptr<rpc_protocol_client_wrapper>&& client, bool topology_ignored, inet_address ip);
+ shard_info(shared_ptr<rpc_protocol_client_wrapper>&& client, bool topology_ignored);
shared_ptr<rpc_protocol_client_wrapper> rpc_client;
const bool topology_ignored;
- const inet_address endpoint;
rpc::stats get_stats() const;
};

diff --git a/message/messaging_service_fwd.hh b/message/messaging_service_fwd.hh
index 08d2940cc76..43c274b7153 100644
--- a/message/messaging_service_fwd.hh
+++ b/message/messaging_service_fwd.hh
@@ -21,8 +21,8 @@ struct msg_addr;
enum class messaging_verb;
class messaging_service;

-using connection_drop_signal_t = boost::signals2::signal_type<void (gms::inet_address, std::optional<locator::host_id>), boost::signals2::keywords::mutex_type<boost::signals2::dummy_mutex>>::type;
-using connection_drop_slot_t = std::function<void(gms::inet_address, std::optional<locator::host_id>)>;
+using connection_drop_signal_t = boost::signals2::signal_type<void (locator::host_id), boost::signals2::keywords::mutex_type<boost::signals2::dummy_mutex>>::type;
+using connection_drop_slot_t = std::function<void(locator::host_id)>;
using connection_drop_registration_t = boost::signals2::scoped_connection;

}
diff --git a/message/messaging_service.cc b/message/messaging_service.cc
index 5656fdee95e..da1bc53a195 100644
--- a/message/messaging_service.cc
+++ b/message/messaging_service.cc
@@ -223,9 +223,9 @@ size_t msg_addr::hash::operator()(const msg_addr& id) const noexcept {
return std::hash<bytes_view>()(id.addr.bytes());
}

-messaging_service::shard_info::shard_info(shared_ptr<rpc_protocol_client_wrapper>&& client, bool topo_ignored, inet_address ip)
+messaging_service::shard_info::shard_info(shared_ptr<rpc_protocol_client_wrapper>&& client, bool topo_ignored)
: rpc_client(std::move(client))
- , topology_ignored(topo_ignored), endpoint(ip)
+ , topology_ignored(topo_ignored)
{
}

@@ -1150,12 +1150,12 @@ shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::ge
// the topology (so we always set `topology_ignored` to `false` in that case).
bool topology_ignored = idx != TOPOLOGY_INDEPENDENT_IDX && topology_status.has_value() && *topology_status == false;
if (host_id) {
- auto res = _clients_with_host_id[idx].emplace(*host_id, shard_info(std::move(client), topology_ignored, id.addr));
+ auto res = _clients_with_host_id[idx].emplace(*host_id, shard_info(std::move(client), topology_ignored));
SCYLLA_ASSERT(res.second);
auto it = res.first;
client = it->second.rpc_client;
} else {
- auto res = _clients[idx].emplace(id, shard_info(std::move(client), topology_ignored, id.addr));
+ auto res = _clients[idx].emplace(id, shard_info(std::move(client), topology_ignored));
SCYLLA_ASSERT(res.second);
auto it = res.first;
client = it->second.rpc_client;
@@ -1187,12 +1187,10 @@ void messaging_service::find_and_remove_client(Map& clients, typename Map::key_t
if (it != clients.end() && filter(it->second)) {
auto client = std::move(it->second.rpc_client);

- gms::inet_address addr;
- std::optional<locator::host_id> hid;
+ locator::host_id hid;
if constexpr (std::is_same_v<typename Map::key_type, msg_addr>) {
- addr = id.addr;
+ hid = _address_to_host_id_mapper(id.addr);
} else {
- addr = it->second.endpoint;
hid = id;
}

@@ -1204,10 +1202,10 @@ void messaging_service::find_and_remove_client(Map& clients, typename Map::key_t
// This will make sure messaging_service::stop() blocks until
// client->stop() is over.
//
- (void)client->stop().finally([addr, client, ms = shared_from_this()] {
- mlogger.debug("dropped connection to {}", addr);
+ (void)client->stop().finally([id, client, ms = shared_from_this()] {
+ mlogger.debug("dropped connection to {}", id);
}).discard_result();
- _connection_dropped(addr, hid);
+ _connection_dropped(hid);
}
}

@@ -1220,7 +1218,6 @@ void messaging_service::remove_error_rpc_client(messaging_verb verb, locator::ho

}

// Removes client to id.addr in both _client and _clients_with_host_id

-// FIXME: make removing from _clients_with_host_id more efficient

void messaging_service::remove_rpc_client(msg_addr id, std::optional<locator::host_id> hid) {
for (auto& c : _clients) {
find_and_remove_client(c, id, [] (const auto&) { return true; });

diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc
index 30ffdbd4aee..d7928580967 100644
--- a/service/storage_proxy.cc
+++ b/service/storage_proxy.cc
@@ -1046,18 +1046,10 @@ class storage_proxy::remote {
co_return netw::messaging_service::no_wait();
}

- void connection_dropped(gms::inet_address addr, std::optional<locator::host_id> id) {
- slogger.debug("Drop hit rate info for {} because of disconnect", addr);
- if (!id) {
- try {
- id = _gossiper.get_host_id(addr);
- } catch (...) {}
- }
- if (!id) {
- return;
- }
+ void connection_dropped(locator::host_id id) {
+ slogger.debug("Drop hit rate info for {} because of disconnect", id);
for (auto&& cf : _sp._db.local().get_non_system_column_families()) {
- cf->drop_hit_rate(*id);
+ cf->drop_hit_rate(id);
}
}

--
2.47.1

Benny Halevy

<bhalevy@scylladb.com>

unread,

Feb 27, 2025, 6:24:10 AMFeb 27

to Gleb Natapov, scylladb-dev@googlegroups.com

nit: s/circle/cycle/ in subject

<bhalevy@scylladb.com>

unread,

Feb 28, 2025, 4:16:46 AMFeb 28

to Gleb Natapov, scylladb-dev@googlegroups.com

LGTM

Avi Kivity

<avi@scylladb.com>

unread,

Mar 2, 2025, 11:01:21 AMMar 2

to Gleb Natapov, scylladb-dev@googlegroups.com

On Mon, 2025-02-24 at 13:04 +0200, 'Gleb Natapov' via ScyllaDB
development wrote:

> It is no longer used.

When/how/why did we stop using it?

Will this removal be problematic when talking to old nodes?

> ---
> gms/gossiper.cc | 11 -----------
> 1 file changed, 11 deletions(-)
>
> diff --git a/gms/gossiper.cc b/gms/gossiper.cc
> index b46542fce66..0729150a2d0 100644
> --- a/gms/gossiper.cc
> +++ b/gms/gossiper.cc
> @@ -1980,17 +1980,6 @@ void gossiper::send_all(gossip_digest&
> g_digest,
> void
> gossiper::examine_gossiper(utils::chunked_vector<gossip_digest>&
> g_digest_list,
>      utils::chunked_vector<gossip_digest>& delta_gossip_digest_list,
>      std::map<inet_address, endpoint_state>& delta_ep_state_map)
> const {
> -    if (g_digest_list.size() == 0) {
> -        /* we've been sent a *completely* empty syn, which should
> normally
> -             * never happen since an endpoint will at least send a
> syn with
> -             * itself. If this is happening then the node is
> attempting shadow
> -             * gossip, and we should reply with everything we know.
> -             */
> -        logger.debug("Shadow request received, adding all states");
> -        for (auto& entry : _endpoint_state_map) {
> -            g_digest_list.emplace_back(entry.first);
> -        }
> -    }
>      for (gossip_digest& g_digest : g_digest_list) {
>          auto remote_generation = g_digest.get_generation();
>          auto max_remote_version = g_digest.get_max_version();
> --
> 2.47.1
>

Avi Kivity

<avi@scylladb.com>

unread,

Mar 2, 2025, 11:10:51 AMMar 2

to Gleb Natapov, scylladb-dev@googlegroups.com

On Mon, 2025-02-24 at 13:04 +0200, 'Gleb Natapov' via ScyllaDB
development wrote:

> ---
> gms/gossiper.cc | 17 +++--------------
> 1 file changed, 3 insertions(+), 14 deletions(-)
>
> diff --git a/gms/gossiper.cc b/gms/gossiper.cc
> index 253fbf6c766..af36f599ee5 100644
> --- a/gms/gossiper.cc
> +++ b/gms/gossiper.cc
> @@ -1090,27 +1090,16 @@ void gossiper::run() {
> if (g_digests.size() > 0) {

> gossip_digest_syn message(get_cluster_name(),
> get_partitioner_name(), g_digests, get_group0_id());
>

> - if (_endpoints_to_talk_with.empty()) {
> + if (_endpoints_to_talk_with.empty() &&
> !_live_endpoints.empty()) {

> auto live_endpoints = _live_endpoints |
> std::ranges::to<std::vector<inet_address>>();

>                      std::shuffle(live_endpoints.begin(),
> live_endpoints.end(), _random_engine);
>                      // This guarantees the local node will talk with
> all nodes
>                      // in live_endpoints at least once within
> nr_rounds gossip rounds.

>                      // Other gossip implementation like SWIM uses
> similar approach.
>                      //
> https://www.cs.cornell.edu/projects/Quicksilver/public_pdfs/SWIM.pdf
> -                    size_t nr_rounds = 10;
> +                    constexpr size_t nr_rounds = 10;

> size_t nodes_per_round = (live_endpoints.size()
> + nr_rounds - 1) / nr_rounds;

> -                    std::vector<inet_address> live_nodes;
> -                    for (const auto& node : live_endpoints) {
> -                        if (live_nodes.size() < nodes_per_round) {
> -                            live_nodes.push_back(node);
> -                        } else {
> -
> _endpoints_to_talk_with.push_back(std::move(live_nodes));
> -                            live_nodes = {node};
> -                        }
> -                    }
> -                    if (!live_nodes.empty()) {
> -
> _endpoints_to_talk_with.push_back(live_nodes);
> -                    }

> + _endpoints_to_talk_with = live_endpoints |
> std::views::chunk(nodes_per_round) |

> std::ranges::to<std::list<std::vector<inet_address>>>();

How does this std::ranges::to() work? chunk() generates a bunch of
ranges, but a vector cannot be constructed from a range. It needs a
std::from_range_t argument.

>                      logger.debug("Set live nodes to talk:
> endpoint_state_map={}, all_live_nodes={}, endpoints_to_talk_with={}",
>                              _endpoint_state_map.size(),
> live_endpoints, _endpoints_to_talk_with);
>                  }

> --
> 2.47.1
>

Avi Kivity

<avi@scylladb.com>

unread,

Mar 2, 2025, 11:16:07 AMMar 2

to Gleb Natapov, scylladb-dev@googlegroups.com

On Mon, 2025-02-24 at 13:04 +0200, 'Gleb Natapov' via ScyllaDB
development wrote:

Wow.

Avi Kivity

<avi@scylladb.com>

unread,

Mar 2, 2025, 1:02:23 PMMar 2

to Gleb Natapov, scylladb-dev@googlegroups.com

On Mon, 2025-02-24 at 13:04 +0200, 'Gleb Natapov' via ScyllaDB
development wrote:

Please follow up with moving this lambda into a named member function.
It's annoying to see stack traces pointing at the constructor.

Let's say a node stops and restarts on another IP address. The
address_map will not be updated, because the generation is younger (due
to the restart) for an hour, right?

Avi Kivity

<avi@scylladb.com>

unread,

Mar 2, 2025, 1:06:49 PMMar 2

to Gleb Natapov, scylladb-dev@googlegroups.com

On Mon, 2025-02-24 at 13:04 +0200, 'Gleb Natapov' via ScyllaDB development wrote:

Provide default implementation for them instead. Will be easier to rework them later.

But harder to add new methods later. Maybe convert back to pure virtual at the end?

Avi Kivity

<avi@scylladb.com>

unread,

Mar 2, 2025, 1:09:50 PMMar 2

to Gleb Natapov, scylladb-dev@googlegroups.com

Looks good, but cover letter should explain interoperability with older
versions and what tests were done to verify it, since some behavior is
changed.

On Mon, 2025-02-24 at 13:04 +0200, 'Gleb Natapov' via ScyllaDB
development wrote:

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 9, 2025, 5:48:32 AMMar 9

to Avi Kivity, scylladb-dev@googlegroups.com

On Sun, Mar 02, 2025 at 06:01:14PM +0200, Avi Kivity wrote:
> On Mon, 2025-02-24 at 13:04 +0200, 'Gleb Natapov' via ScyllaDB
> development wrote:
> > It is no longer used.
>
> When/how/why did we stop using it?
>

We replaced it with explicit GOSSIP_GET_ENDPOINT_STATES verb in
cd7d64f588e3a0a09a792589196786495186b552 which is in scylla-4.3.0

>
> Will this removal be problematic when talking to old nodes?
>

No. We already removed a lot of old shadow round code. This part we
missed.

--
Gleb.

<avi@scylladb.com>

unread,

Mar 9, 2025, 10:13:15 AMMar 9

to Gleb Natapov, scylladb-dev@googlegroups.com

On Sun, 2025-03-09 at 12:06 +0200, Gleb Natapov wrote:

On Sun, Mar 02, 2025 at 08:06:42PM +0200, Avi Kivity wrote:
On Mon, 2025-02-24 at 13:04 +0200, 'Gleb Natapov' via ScyllaDB
development wrote:
Provide default implementation for them instead. Will be easier to
rework them later.

But harder to add new methods later. Maybe convert back to pure virtual
at the end?

Why will it be harder to add new methods late?

You will have to hunt for all implementations of the interface.

Although, you could add the new method as pure virtual.

I think it is other way
around. You only add then where you want non default implementation.
Tough we did not add any as far as I see, so this is probably
theoretical.

Yes.

Avi Kivity

<avi@scylladb.com>

unread,

Mar 9, 2025, 10:20:09 AMMar 9

to Gleb Natapov, scylladb-dev@googlegroups.com

In [1], is says:

b) Otherwise, the return expression is equivalent to:

to<C>(ranges::ref_view(r) | views::transform([](auto&& elem) {

return to<ranges::range_value_t<C>>(std::forward<decltype(elem)>(elem));

}), std::forward<Args>(args)...)

Which allows nested range constructions within the range if ranges::input_range<ranges::range_reference_t<C>> is true.

So, it does evaluate the input recursively. Good to know.

[1] https://en.cppreference.com/w/cpp/ranges/to

Avi Kivity

<avi@scylladb.com>

unread,

Mar 9, 2025, 10:21:22 AMMar 9

to Gleb Natapov, scylladb-dev@googlegroups.com

On Sun, 2025-03-09 at 11:48 +0200, Gleb Natapov wrote:

On Sun, Mar 02, 2025 at 06:01:14PM +0200, Avi Kivity wrote:
On Mon, 2025-02-24 at 13:04 +0200, 'Gleb Natapov' via ScyllaDB
development wrote:
It is no longer used.

When/how/why did we stop using it?

We replaced it with explicit GOSSIP_GET_ENDPOINT_STATES verb in
cd7d64f588e3a0a09a792589196786495186b552 which is in scylla-4.3.0

Please mention such things in the patch and cover letter.

Avi Kivity

<avi@scylladb.com>

unread,

Mar 9, 2025, 10:23:20 AMMar 9

to Gleb Natapov, scylladb-dev@googlegroups.com

Maybe it's due to me misunderstanding. What is the epoch of generation
numbers? Is it node startup? If so generation can go backwards. Is
generation monotonic across restarts?

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 9, 2025, 11:00:05 AMMar 9

to Avi Kivity, scylladb-dev@googlegroups.com

The generation is monotonic (based on a timestamp) and even used to
detect restarts.

--
Gleb.

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:46 AMMar 13

to scylladb-dev@googlegroups.com

A node may change its IP but some other node in the cluster may still
try to ping it using an old IP because it may receive an outdated gossiper
entry with the old IP. Do not send echo message to the old IP. It will
cause a misusing UP message with old address to be printed.
---
gms/gossiper.cc | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index 4db8ecdc105..9c29a4a85ca 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -1720,6 +1720,11 @@ void gossiper::mark_alive(inet_address addr) {
// ping an old gossip entry.
return;
}
+ if (_address_map.find(id) != addr) {
+ // We are here because id has now different ip but we
+ // try to ping the old one
+ return;
+ }
auto generation = my_endpoint_state().get_heart_beat_state().get_generation();
// Enter the _background_msg gate so stop() would wait on it
auto gh = _background_msg.hold();
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:46 AMMar 13

to scylladb-dev@googlegroups.com

This is series starts conversion of the gossiper to use host ids to
index nodes. It does not touch the main map yet, but converts a lot of
internal code to host id. There are also some unrelated cleanups that

were done while working on the series. On of which is dropping code
related to old shadow round. We replaced shadow round with explicit
GOSSIP_GET_ENDPOINT_STATES verb in cd7d64f588e3a0a09a792589196786495186b552
which is in scylla-4.3.0, so there should be no compatibility problem.
We already dropped a lot of old shadow round code in previous patches
anyway.

I tested manually that old and new node can co-exist in the same
cluster,

CI: https://jenkins.scylladb.com/job/scylla-master/job/scylla-ci/15920/

Also in scylla-dev gleb/gossiper-host-id-v2

v1->v2:
- fixed typos in commit message
- added clarification about compatability
- add get_group_server_if_raft_topolgy_enabled() function
- rebase

Gleb Natapov (33):
gossiper: drop unused field from loaded_endpoint_state

storage_service: drop outdated code that checks whether raft topology

should be used
gossiper: do not ping outdated address
table: use host id based get_endpoint_state_ptr and skip id->ip
translation
gossiper: move is_gossip_only_member and its users to work on host id
migration_manager: drop unneeded id to ip translation
gossiper: check id match inside force_remove_endpoint
gossiper: drop unused get_endpoint_states function
gossiper: drop old shadow round code
gossiper: send shutdown notification by host id
idl: generate ip based version of a verb only for verbs that need it

gossiper: chunk vector using std::views::chunk instead of explicitly

code it
gossiper: move _live_endpoints and _unreachable_endpoints endpoint to
host_id
storage_proxy: drop unused template
treewide: move everyone to use host id based gossiper::is_alive and
drop ip based one
topology_coordinator: notify about IP change from
sync_raft_topology_nodes as well
messaging_service: add temporary address map entry on incoming
connection
gossiper: start using host ids to send messages earlier
gossiper: drop ip address from handle_echo_msg and simplify code since
host_id is now mandatory
treewide: drop endpoint state change subscribers that do nothing

storage_service: drop unused code in handle_state_removed

gossiper: drop deprecated unsafe_assassinate_endpoint operation
treewide: pass host id to endpoint state change subscribers
treewide: use host id directly in endpoint state change subscribers
load_meter: move to host id

treewide: drop endpoint life cycle subscribers that do nothing

treewide: pass host id to endpoint_lifecycle_subscriber
messaging_service: pass host id to remove_rpc_client in down
notification
messaging_service: change connection dropping notification to pass
host id only
gossiper: drop unused get_msg_addr function
gossiper: move _just_removed_endpoints to host id
gossiper: move _expire_time_endpoint_map to host_id
gossiper: drop unneeded code

idl-compiler.py | 22 +-
cdc/generation_service.hh | 9 +-
gms/gossiper.hh | 66 ++--
gms/i_endpoint_state_change_subscriber.hh | 16 +-
idl/gossip.idl.hh | 4 +-
idl/group0.idl.hh | 2 +-
idl/join_node.idl.hh | 4 +-
message/messaging_service.hh | 10 +-
message/messaging_service_fwd.hh | 4 +-
service/endpoint_lifecycle_subscriber.hh | 14 +-
service/load_broadcaster.hh | 24 +-
service/migration_manager.hh | 13 +-
service/qos/service_level_controller.hh | 3 -
service/storage_proxy.hh | 4 +-

service/storage_service.hh | 33 +-

service/storage_service.cc | 250 ++++++-------

46 files changed, 537 insertions(+), 755 deletions(-)

--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:47 AMMar 13

to scylladb-dev@googlegroups.com

---
replica/table.cc | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/replica/table.cc b/replica/table.cc
index 18841d61014..4944798ff24 100644
--- a/replica/table.cc
+++ b/replica/table.cc
@@ -3397,9 +3397,8 @@ table::cache_hit_rate table::get_hit_rate(const gms::gossiper& gossiper, locator
}
auto it = _cluster_cache_hit_rates.find(addr);
if (it == _cluster_cache_hit_rates.end()) {
- auto ip_opt = gossiper.get_address_map().find(addr);
// no data yet, get it from the gossiper
- auto eps = ip_opt ? gossiper.get_endpoint_state_ptr(*ip_opt) : nullptr;
+ auto eps = gossiper.get_endpoint_state_ptr(addr);
if (eps) {
auto* state = eps->get_application_state_ptr(gms::application_state::CACHE_HITRATES);
float f = -1.0f; // missing state means old node
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:48 AMMar 13

to scylladb-dev@googlegroups.com

After raft_topology_change_enabled() was introduced the code does
nothing useful. The function is responsible for the decision if raft topology
is enabled or not.
---
service/storage_service.hh | 1 +
service/storage_service.cc | 19 +++++--------------
2 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/service/storage_service.hh b/service/storage_service.hh
index 8263423ad40..0f6e17775f2 100644
--- a/service/storage_service.hh
+++ b/service/storage_service.hh
@@ -408,6 +408,7 @@ class storage_service : public service::migration_listener, public gms::i_endpoi
bool should_bootstrap();
bool is_replacing();
bool is_first_node();
+ raft::server* get_group_server_if_raft_topolgy_enabled();
future<> join_topology(sharded<service::storage_proxy>& proxy,
std::unordered_set<gms::inet_address> initial_contact_nodes,
std::unordered_map<locator::host_id, gms::loaded_endpoint_state> loaded_endpoints,
diff --git a/service/storage_service.cc b/service/storage_service.cc
index 8d7b0e21a24..91d08f9337b 100644
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -1497,6 +1497,10 @@ future<> storage_service::await_tablets_rebuilt(raft::server_id replaced_id) {
slogger.info("Tablet replicas from the replaced node have been rebuilt");
}

+raft::server* storage_service::get_group_server_if_raft_topolgy_enabled() {
+ return raft_topology_change_enabled() ? &_group0->group0_server() : nullptr;
+}
+
future<> storage_service::join_topology(sharded<service::storage_proxy>& proxy,
std::unordered_set<gms::inet_address> initial_contact_nodes,
std::unordered_map<locator::host_id, gms::loaded_endpoint_state> loaded_endpoints,
@@ -1772,20 +1776,7 @@ future<> storage_service::join_topology(sharded<service::storage_proxy>& proxy,
co_await _group0->setup_group0(_sys_ks.local(), initial_contact_nodes, std::move(handshaker),
raft_replace_info, *this, _qp, _migration_manager.local(), raft_topology_change_enabled(), join_params);

- raft::server* raft_server = co_await [this] () -> future<raft::server*> {
- if (!raft_topology_change_enabled()) {
- co_return nullptr;
- } else if (_sys_ks.local().bootstrap_complete()) {
- auto [upgrade_lock_holder, upgrade_state] = co_await _group0->client().get_group0_upgrade_state();
- co_return upgrade_state == group0_upgrade_state::use_post_raft_procedures ? &_group0->group0_server() : nullptr;
- } else {
- auto upgrade_state = (co_await _group0->client().get_group0_upgrade_state()).second;
- if (upgrade_state != group0_upgrade_state::use_post_raft_procedures) {
- on_internal_error(rtlogger, "cluster not upgraded to use group 0 after setup_group0");
- }
- co_return &_group0->group0_server();
- }
- } ();
+ raft::server* raft_server = get_group_server_if_raft_topolgy_enabled();

if (!raft_topology_change_enabled()) {
co_await _gossiper.wait_for_gossip_to_settle();
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:48 AMMar 13

to scylladb-dev@googlegroups.com

---
gms/gossiper.hh | 1 -
gms/gossiper.cc | 3 ---
2 files changed, 4 deletions(-)

diff --git a/gms/gossiper.hh b/gms/gossiper.hh
index f3d5dd631cb..72263b730ec 100644
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -75,7 +75,6 @@ struct loaded_endpoint_state {
gms::inet_address endpoint;
std::unordered_set<dht::token> tokens;
std::optional<locator::endpoint_dc_rack> opt_dc_rack;
- std::optional<gms::versioned_value> opt_status;
};

/**
diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index d34d3274ddf..4db8ecdc105 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -2223,9 +2223,6 @@ future<> gossiper::add_saved_endpoint(locator::host_id host_id, gms::loaded_endp
ep_state.add_application_state(gms::application_state::DC, gms::versioned_value::datacenter(st.opt_dc_rack->dc));
ep_state.add_application_state(gms::application_state::RACK, gms::versioned_value::datacenter(st.opt_dc_rack->rack));
}
- if (st.opt_status) {
- ep_state.add_application_state(gms::application_state::STATUS, std::move(*st.opt_status));
- }

auto generation = ep_state.get_heart_beat_state().get_generation();
co_await replicate(ep, std::move(ep_state), permit.id());

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:50 AMMar 13

to scylladb-dev@googlegroups.com

---
gms/gossiper.hh | 2 --
gms/gossiper.cc | 6 +-----
2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/gms/gossiper.hh b/gms/gossiper.hh
index e3d5b0e7a5e..7d4f2657cb8 100644
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -420,8 +420,6 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar

future<> do_status_check();

- const std::unordered_map<inet_address, endpoint_state_ptr>& get_endpoint_states() const noexcept;
-
public:
clk::time_point get_expire_time_for_endpoint(inet_address endpoint) const noexcept;

diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index 68444a6f299..7b34f7e2669 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -1550,10 +1550,6 @@ future<> gossiper::reset_endpoint_state_map() {
});
}

-const std::unordered_map<inet_address, endpoint_state_ptr>& gms::gossiper::get_endpoint_states() const noexcept {
- return _endpoint_state_map;
-}
-
std::vector<inet_address> gossiper::get_endpoints() const {
return _endpoint_state_map | std::views::keys | std::ranges::to<std::vector>();
}
@@ -1600,7 +1596,7 @@ locator::host_id gossiper::get_host_id(inet_address endpoint) const {

std::set<gms::inet_address> gossiper::get_nodes_with_host_id(locator::host_id host_id) const {
std::set<gms::inet_address> nodes;
- for (const auto& [node, eps] : get_endpoint_states()) {
+ for (const auto& [node, eps] : _endpoint_state_map) {
auto app_state = eps->get_application_state_ptr(application_state::HOST_ID);
if (app_state && host_id == locator::host_id(utils::UUID(app_state->value()))) {
nodes.insert(node);
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:51 AMMar 13

to scylladb-dev@googlegroups.com

It is no longer used. It was replaced with explicit GOSSIP_GET_ENDPOINT_STATES verb in

cd7d64f588e3a0a09a792589196786495186b552 which is in scylla-4.3.0

---
gms/gossiper.cc | 11 -----------
1 file changed, 11 deletions(-)

diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index 7b34f7e2669..6d4d6e938a2 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -1981,17 +1981,6 @@ void gossiper::send_all(gossip_digest& g_digest,

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:52 AMMar 13

to scylladb-dev@googlegroups.com

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:54 AMMar 13

to scylladb-dev@googlegroups.com

Index live and dead endpoints by host id. It also allows to simplify
some code that does a translation.
---
gms/gossiper.hh | 24 ++---
api/gossiper.cc | 2 +-
db/hints/manager.cc | 50 +----------
dht/boot_strapper.cc | 2 +-
gms/gossiper.cc | 177 ++++++++++++++++++-------------------
service/storage_service.cc | 6 +-
6 files changed, 107 insertions(+), 154 deletions(-)

diff --git a/gms/gossiper.hh b/gms/gossiper.hh
index 7d4f2657cb8..38c2d5796a2 100644
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -222,17 +222,17 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
*/
atomic_vector<shared_ptr<i_endpoint_state_change_subscriber>> _subscribers;

- std::list<std::vector<inet_address>> _endpoints_to_talk_with;
+ std::list<std::vector<locator::host_id>> _endpoints_to_talk_with;

/* live member set */
- std::unordered_set<inet_address> _live_endpoints;
+ std::unordered_set<locator::host_id> _live_endpoints;
uint64_t _live_endpoints_version = 0;

/* nodes are being marked as alive */
std::unordered_set<inet_address> _pending_mark_alive_endpoints;

/* unreachable member set */
- std::unordered_map<inet_address, clk::time_point> _unreachable_endpoints;
+ std::unordered_map<locator::host_id, clk::time_point> _unreachable_endpoints;

semaphore _endpoint_update_semaphore = semaphore(1);

@@ -255,8 +255,8 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
future<semaphore_units<>> lock_endpoint_update_semaphore();

struct live_and_unreachable_endpoints {
- std::unordered_set<inet_address> live;
- std::unordered_map<inet_address, clk::time_point> unreachable;
+ std::unordered_set<locator::host_id> live;
+ std::unordered_map<locator::host_id, clk::time_point> unreachable;
};

// Must be called on shard 0.
@@ -302,15 +302,14 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
/**
* @return a list of unreachable gossip participants, including fat clients
*/
- std::set<inet_address> get_unreachable_members() const;
- std::set<locator::host_id> get_unreachable_host_ids() const;
+ std::set<locator::host_id> get_unreachable_members() const;

/**
* @return a list of unreachable nodes
*/
std::set<locator::host_id> get_unreachable_nodes() const;

- int64_t get_endpoint_downtime(inet_address ep) const noexcept;
+ int64_t get_endpoint_downtime(locator::host_id ep) const noexcept;

/**
* Return either: the greatest heartbeat or application state
@@ -410,10 +409,12 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
* @param epSet a set of endpoint from which a random endpoint is chosen.
* @return true if the chosen endpoint is also a seed.
*/
- future<> send_gossip(gossip_digest_syn message, std::set<inet_address> epset);
+ template<typename T>
+ future<> send_gossip(gossip_digest_syn message, std::set<T> epset);

/* Sends a Gossip message to a live member */
- future<> do_gossip_to_live_member(gossip_digest_syn message, inet_address ep);
+ template<typename T>
+ future<> do_gossip_to_live_member(gossip_digest_syn message, T ep);

/* Sends a Gossip message to an unreachable member */
future<> do_gossip_to_unreachable_member(gossip_digest_syn message);
@@ -521,7 +522,6 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
future<> wait_alive(std::vector<gms::inet_address> nodes, std::chrono::milliseconds timeout);
future<> wait_alive(std::vector<locator::host_id> nodes, std::chrono::milliseconds timeout);
future<> wait_alive(noncopyable_function<std::vector<locator::host_id>()> get_nodes, std::chrono::milliseconds timeout);
- std::set<inet_address> get_live_members_helper() const;

// Wait for `n` live nodes to show up in gossip (including ourself).
future<> wait_for_live_nodes_to_show_up(size_t n);
@@ -703,7 +703,7 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
}
private:
future<> failure_detector_loop();
- future<> failure_detector_loop_for_node(gms::inet_address node, generation_type gossip_generation, uint64_t live_endpoints_version);
+ future<> failure_detector_loop_for_node(locator::host_id node, generation_type gossip_generation, uint64_t live_endpoints_version);
};

diff --git a/api/gossiper.cc b/api/gossiper.cc
index 4826c2f3eb3..e5fcd2e1076 100644
--- a/api/gossiper.cc
+++ b/api/gossiper.cc
@@ -35,7 +35,7 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {

gms::inet_address ep(req->get_path_param("addr"));

// synchronize unreachable_members on all shards
co_await g.get_unreachable_members_synchronized();
- co_return g.get_endpoint_downtime(ep);
+ co_return g.get_endpoint_downtime(g.get_host_id(ep));
});

httpd::gossiper_json::get_current_generation_number.set(r, [&g] (std::unique_ptr<http::request> req) {
diff --git a/db/hints/manager.cc b/db/hints/manager.cc
index 77ca698b2ae..be6eb99a0f7 100644
--- a/db/hints/manager.cc
+++ b/db/hints/manager.cc
@@ -461,59 +461,13 @@ bool manager::store_hint(endpoint_id host_id, schema_ptr s, lw_shared_ptr<const
}
}

-/// Checks if there is a node corresponding to a given host ID that hasn't been down for longer
-/// than a given amount of time. The function relies on information obtained from the passed `gms::gossiper`.
-static bool endpoint_downtime_not_bigger_than(const gms::gossiper& gossiper, const locator::host_id& host_id,
- uint64_t max_downtime_us)
-{
- // We want to enforce small buffer optimization in the call
- // to `gms::gossiper::for_each_endpoint_state_until()` below
- // to avoid an unnecessary allocation.
- // Since we need all these four pieces of information in the lambda,
- // the function object passed to the function might be too big.
- // That's why we create it locally on the stack and only pass a reference to it.
- struct sbo_info {
- locator::host_id host_id;
- const gms::gossiper& gossiper;
- int64_t max_hint_window_us;
- bool small_node_downtime;
- };
-
- sbo_info info {
- .host_id = host_id,
- .gossiper = gossiper,
- .max_hint_window_us = max_downtime_us,
- .small_node_downtime = false
- };
-
- gossiper.for_each_endpoint_state_until(
- [&info] (const gms::inet_address& ip, const gms::endpoint_state& state) {
- const auto* app_state = state.get_application_state_ptr(gms::application_state::HOST_ID);
- if (!app_state) {
- manager_logger.error("Host ID application state for {} has not been found. Endpoint state: {}", ip, state);
- return stop_iteration::no;
- }
- const auto host_id = locator::host_id{utils::UUID{app_state->value()}};
- if (host_id != info.host_id) {
- return stop_iteration::no;
- }
- if (info.gossiper.get_endpoint_downtime(ip) <= info.max_hint_window_us) {
- info.small_node_downtime = true;
- return stop_iteration::yes;
- }
- return stop_iteration::no;
- });
-
- return info.small_node_downtime;
-}
-
bool manager::too_many_in_flight_hints_for(endpoint_id ep) const noexcept {
// There is no need to check the DC here because if there is an in-flight hint for this
// endpoint, then this means that its DC has already been checked and found to be ok.
return _stats.size_of_hints_in_progress > max_size_of_hints_in_progress()
&& !_proxy.local_db().get_token_metadata().get_topology().is_me(ep)
&& hints_in_progress_for(ep) > 0
- && endpoint_downtime_not_bigger_than(local_gossiper(), ep, _max_hint_window_us);
+ && local_gossiper().get_endpoint_downtime(ep) <= _max_hint_window_us;
}

bool manager::can_hint_for(endpoint_id ep) const noexcept {
@@ -548,7 +502,7 @@ bool manager::can_hint_for(endpoint_id ep) const noexcept {
return false;
}

- const bool node_is_alive = endpoint_downtime_not_bigger_than(local_gossiper(), ep, _max_hint_window_us);
+ const bool node_is_alive = local_gossiper().get_endpoint_downtime(ep) <= _max_hint_window_us;
if (!node_is_alive) {
manager_logger.trace("{} has been down for too long, not hinting", ep);
return false;
diff --git a/dht/boot_strapper.cc b/dht/boot_strapper.cc
index 5656634a86f..05836917c1a 100644
--- a/dht/boot_strapper.cc
+++ b/dht/boot_strapper.cc
@@ -42,7 +42,7 @@ future<> boot_strapper::bootstrap(streaming::stream_reason reason, gms::gossiper
}
try {
auto streamer = make_lw_shared<range_streamer>(_db, _stream_manager, _token_metadata_ptr, _abort_source, _tokens, _address, _dr, description, reason, topo_guard);
- auto nodes_to_filter = gossiper.get_unreachable_host_ids();
+ auto nodes_to_filter = gossiper.get_unreachable_members();
if (reason == streaming::stream_reason::replace) {
nodes_to_filter.insert(std::move(replace_address));
}
diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index 0fca198e675..972b072d57f 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -24,6 +24,7 @@
#include "db/system_keyspace.hh"
#include <fmt/chrono.h>
#include <fmt/ranges.h>
+#include <ranges>
#include <seastar/core/sleep.hh>
#include <seastar/core/thread.hh>
#include <seastar/core/metrics.hh>
@@ -559,8 +560,9 @@ future<> gossiper::uninit_messaging_service_handler() {
return ser::gossip_rpc_verbs::unregister(&ms);
}

-future<> gossiper::send_gossip(gossip_digest_syn message, std::set<inet_address> epset) {
- utils::chunked_vector<inet_address> __live_endpoints(epset.begin(), epset.end());
+template<typename T>
+future<> gossiper::send_gossip(gossip_digest_syn message, std::set<T> epset) {
+ utils::chunked_vector<T> __live_endpoints(epset.begin(), epset.end());
size_t size = __live_endpoints.size();
if (size < 1) {
return make_ready_future<>();
@@ -568,8 +570,7 @@ future<> gossiper::send_gossip(gossip_digest_syn message, std::set<inet_address>
/* Generate a random number from 0 -> size */
std::uniform_int_distribution<int> dist(0, size - 1);
int index = dist(_random_engine);
- inet_address to = __live_endpoints[index];
- auto id = get_msg_addr(to);
+ std::conditional_t<std::is_same_v<T, gms::inet_address>, netw::msg_addr, T> id{__live_endpoints[index]};
logger.trace("Sending a GossipDigestSyn to {} ...", id);
return ser::gossip_rpc_verbs::send_gossip_digest_syn(&_messaging, id, std::move(message)).handle_exception([id] (auto ep) {
// It is normal to reach here because it is normal that a node
@@ -721,17 +722,29 @@ future<> gossiper::remove_endpoint(inet_address endpoint, permit_id pid) {

auto state = get_endpoint_state_ptr(endpoint);

+ if (!state) {
+ logger.warn("There is no state for the removed IP {}", endpoint);
+ co_return;
+ }
+
+ auto host_id = state->get_host_id();
+
bool was_alive = false;
- co_await mutate_live_and_unreachable_endpoints([endpoint, &was_alive] (live_and_unreachable_endpoints& data) {
- was_alive = data.live.erase(endpoint);
- data.unreachable.erase(endpoint);
- });
+
+ if (_address_map.find(host_id) == endpoint) {
+ // During IP address change we may have a situation where we work on old address
+ // but there is a new address for the same host id, so no need to mark host id as down
+ co_await mutate_live_and_unreachable_endpoints([host_id, &was_alive] (live_and_unreachable_endpoints& data) {
+ was_alive = data.live.erase(host_id);
+ data.unreachable.erase(host_id);
+ });
+ }
_syn_handlers.erase(endpoint);
_ack_handlers.erase(endpoint);
quarantine_endpoint(endpoint);
logger.info("Removed endpoint {}", endpoint);

- if (was_alive && state) {
+ if (was_alive) {
try {
logger.info("InetAddress {}/{} is now DOWN, status = {}", state->get_host_id(), endpoint, get_gossip_status(*state));
co_await do_on_dead_notifications(endpoint, std::move(state), pid);
@@ -931,7 +944,7 @@ future<std::set<inet_address>> gossiper::get_live_members_synchronized() {
return container().invoke_on(0, [] (gms::gossiper& g) -> future<std::set<inet_address>> {
// Make sure the value we return is synchronized on all shards
auto lock = co_await g.lock_endpoint_update_semaphore();
- co_return g.get_live_members_helper();
+ co_return g.get_live_members() | std::views::transform([&g] (auto id) { return g._address_map.get(id); }) | std::ranges::to<std::set>();
});
}

@@ -939,17 +952,16 @@ future<std::set<inet_address>> gossiper::get_unreachable_members_synchronized()
return container().invoke_on(0, [] (gms::gossiper& g) -> future<std::set<inet_address>> {
// Make sure the value we return is synchronized on all shards
auto lock = co_await g.lock_endpoint_update_semaphore();
- co_return g.get_unreachable_members();
+ co_return g.get_unreachable_members() | std::views::transform([&g] (auto id) { return g._address_map.get(id); }) | std::ranges::to<std::set>();
});
}

-future<> gossiper::failure_detector_loop_for_node(gms::inet_address node, generation_type gossip_generation, uint64_t live_endpoints_version) {
+future<> gossiper::failure_detector_loop_for_node(locator::host_id host_id, generation_type gossip_generation, uint64_t live_endpoints_version) {
auto last = gossiper::clk::now();
auto diff = gossiper::clk::duration(0);
auto echo_interval = std::chrono::seconds(2);
auto max_duration = echo_interval + std::chrono::milliseconds(_gcfg.failure_detector_timeout_ms());
- auto host_id = get_host_id(node);
-
+ auto node = _address_map.get(host_id);
while (is_enabled()) {
bool failed = false;
try {
@@ -1001,7 +1013,7 @@ future<> gossiper::failure_detector_loop() {
co_await sleep_abortable(std::chrono::seconds(1), _abort_source);
continue;
}
- auto nodes = _live_endpoints | std::ranges::to<std::vector<inet_address>>();
+ auto nodes = _live_endpoints | std::ranges::to<std::vector>();
auto live_endpoints_version = _live_endpoints_version;
auto generation_number = my_endpoint_state().get_heart_beat_state().get_generation();
co_await coroutine::parallel_for_each(std::views::iota(0u, nodes.size()), [this, generation_number, live_endpoints_version, &nodes] (size_t idx) {
@@ -1092,7 +1104,7 @@ void gossiper::run() {

gossip_digest_syn message(get_cluster_name(), get_partitioner_name(), g_digests, get_group0_id());

if (_endpoints_to_talk_with.empty() && !_live_endpoints.empty()) {
- auto live_endpoints = _live_endpoints | std::ranges::to<std::vector<inet_address>>();
+ auto live_endpoints = _live_endpoints | std::ranges::to<std::vector>();

std::shuffle(live_endpoints.begin(), live_endpoints.end(), _random_engine);
// This guarantees the local node will talk with all nodes
// in live_endpoints at least once within nr_rounds gossip rounds.

@@ -1100,17 +1112,11 @@ void gossiper::run() {
// https://www.cs.cornell.edu/projects/Quicksilver/public_pdfs/SWIM.pdf

constexpr size_t nr_rounds = 10;
size_t nodes_per_round = (live_endpoints.size() + nr_rounds - 1) / nr_rounds;

- _endpoints_to_talk_with = live_endpoints | std::views::chunk(nodes_per_round) | std::ranges::to<std::list<std::vector<inet_address>>>();
+ _endpoints_to_talk_with = live_endpoints | std::views::chunk(nodes_per_round) | std::ranges::to<std::list<std::vector<locator::host_id>>>();

logger.debug("Set live nodes to talk: endpoint_state_map={}, all_live_nodes={}, endpoints_to_talk_with={}",
_endpoint_state_map.size(), live_endpoints, _endpoints_to_talk_with);
}

- if (_endpoints_to_talk_with.empty()) {
- auto nodes = std::vector<inet_address>(_seeds.begin(), _seeds.end());
- logger.debug("No live nodes yet: try initial contact point nodes={}", nodes);
- if (!nodes.empty()) {
- _endpoints_to_talk_with.push_back(std::move(nodes));
- }
- }
+
if (!_endpoints_to_talk_with.empty()) {
auto live_nodes = std::move(_endpoints_to_talk_with.front());
_endpoints_to_talk_with.pop_front();
@@ -1122,6 +1128,15 @@ void gossiper::run() {
});
});
}
+ } else if (!_seeds.empty()) {
+ logger.debug("No live nodes yet: try initial contact point nodes={}", _seeds);
+ for (auto& ep: _seeds) {
+ (void)with_gate(_background_msg, [this, message, ep] () mutable {
+ return do_gossip_to_live_member(message, ep).handle_exception([] (auto ep) {
+ logger.trace("Failed to send gossip to live members: {}", ep);
+ });
+ });
+ }
} else {
logger.debug("No one to talk with");
}
@@ -1171,21 +1186,17 @@ future<> gossiper::unregister_(shared_ptr<i_endpoint_state_change_subscriber> su
return _subscribers.remove(subscriber);
}

-std::set<inet_address> gossiper::get_live_members_helper() const {
- std::set<inet_address> live_members(_live_endpoints.begin(), _live_endpoints.end());
+std::set<locator::host_id> gossiper::get_live_members() const {
+ std::set<locator::host_id> live_members(_live_endpoints.begin(), _live_endpoints.end());
auto myip = get_broadcast_address();
logger.debug("live_members before={}", live_members);
if (!is_shutdown(myip)) {
- live_members.insert(myip);
+ live_members.insert(my_host_id());
}
logger.debug("live_members after={}", live_members);
return live_members;
}

-std::set<locator::host_id> gossiper::get_live_members() const {
- return get_live_members_helper() | std::views::transform([this] (inet_address ip) { return get_host_id(ip); }) | std::ranges::to<std::set>();
-}
-
std::set<locator::host_id> gossiper::get_live_token_owners() const {
std::set<locator::host_id> token_owners;
auto normal_token_owners = get_token_metadata_ptr()->get_normal_token_owners();
@@ -1209,7 +1220,7 @@ std::set<locator::host_id> gossiper::get_unreachable_nodes() const {
}

// Return downtime in microseconds
-int64_t gossiper::get_endpoint_downtime(inet_address ep) const noexcept {
+int64_t gossiper::get_endpoint_downtime(locator::host_id ep) const noexcept {
auto it = _unreachable_endpoints.find(ep);
if (it != _unreachable_endpoints.end()) {
auto& downtime = it->second;
@@ -1235,19 +1246,8 @@ future<> gossiper::convict(inet_address endpoint) {
}
}

-std::set<inet_address> gossiper::get_unreachable_members() const {
- std::set<inet_address> ret;
- for (auto&& x : _unreachable_endpoints) {
- ret.insert(x.first);
- }
- return ret;
-}
-
-std::set<locator::host_id> gossiper::get_unreachable_host_ids() const {
- return get_unreachable_members() |
- std::views::transform([this] (gms::inet_address ip) { return get_host_id(ip); }) |
- std::ranges::to<std::set>();
-
+std::set<locator::host_id> gossiper::get_unreachable_members() const {
+ return _unreachable_endpoints | std::views::keys | std::ranges::to<std::set>();
}

version_type gossiper::get_max_endpoint_state_version(const endpoint_state& state) const noexcept {
@@ -1261,14 +1261,18 @@ version_type gossiper::get_max_endpoint_state_version(const endpoint_state& stat

future<> gossiper::evict_from_membership(inet_address endpoint, permit_id pid) {
verify_permit(endpoint, pid);
- co_await mutate_live_and_unreachable_endpoints([endpoint] (live_and_unreachable_endpoints& data) {
- data.unreachable.erase(endpoint);
- data.live.erase(endpoint);
- });
+ auto hid = get_host_id(endpoint);
+ if (_address_map.find(hid) == endpoint) {
+ // During IP address change we may have a situation where we work on old address
+ // but there is a new address for the same host id, so no need to mark host id as down
+ co_await mutate_live_and_unreachable_endpoints([hid] (live_and_unreachable_endpoints& data) {
+ data.unreachable.erase(hid);
+ data.live.erase(hid);
+ });
+ }

- co_await container().invoke_on_all([endpoint] (auto& g) {
+ co_await container().invoke_on_all([endpoint, hid] (auto& g) {
if (this_shard_id() == 0) {
- auto hid = g.get_endpoint_state_ptr(endpoint)->get_host_id();
if (g._address_map.find(hid) == endpoint) {
// During IP address change we may have a situation where we remove old gossiper state
// but there is a new address for the same host id, so no need to make it expiring
@@ -1436,8 +1440,9 @@ future<version_type> gossiper::get_current_heart_beat_version(inet_address endpo
});
}

-future<> gossiper::do_gossip_to_live_member(gossip_digest_syn message, gms::inet_address ep) {
- return send_gossip(message, {ep});
+template<typename T>
+future<> gossiper::do_gossip_to_live_member(gossip_digest_syn message, T ep) {
+ return send_gossip<T>(message, {ep});
}

future<> gossiper::do_gossip_to_unreachable_member(gossip_digest_syn message) {
@@ -1449,10 +1454,10 @@ future<> gossiper::do_gossip_to_unreachable_member(gossip_digest_syn message) {
std::uniform_real_distribution<double> dist(0, 1);
double rand_dbl = dist(_random_engine);
if (rand_dbl < prob) {
- std::set<inet_address> addrs;
+ std::set<locator::host_id> addrs;
for (auto&& x : _unreachable_endpoints) {
// Ignore the node which is decommissioned
- if (get_gossip_status(x.first) != sstring(versioned_value::STATUS_LEFT)) {
+ if (get_gossip_status(_address_map.get(x.first)) != sstring(versioned_value::STATUS_LEFT)) {
addrs.insert(x.first);
}
}
@@ -1752,7 +1757,7 @@ future<> gossiper::real_mark_alive(inet_address addr) {

logger.debug("removing expire time for endpoint : {}", addr);
bool was_live = false;
- co_await mutate_live_and_unreachable_endpoints([addr, &was_live] (live_and_unreachable_endpoints& data) {
+ co_await mutate_live_and_unreachable_endpoints([addr = es->get_host_id(), &was_live] (live_and_unreachable_endpoints& data) {
data.unreachable.erase(addr);
auto [it_, inserted] = data.live.insert(addr);
was_live = !inserted;
@@ -1763,9 +1768,9 @@ future<> gossiper::real_mark_alive(inet_address addr) {
}

if (_endpoints_to_talk_with.empty()) {
- _endpoints_to_talk_with.push_back({addr});
+ _endpoints_to_talk_with.push_back({es->get_host_id()});
} else {
- _endpoints_to_talk_with.front().push_back(addr);
+ _endpoints_to_talk_with.front().push_back(es->get_host_id());
}

logger.info("InetAddress {}/{} is now UP, status = {}", es->get_host_id(), addr, status);
@@ -1779,10 +1784,15 @@ future<> gossiper::real_mark_alive(inet_address addr) {
future<> gossiper::mark_dead(inet_address addr, endpoint_state_ptr state, permit_id pid) {
logger.trace("marking as down {}", addr);
verify_permit(addr, pid);
- co_await mutate_live_and_unreachable_endpoints([addr] (live_and_unreachable_endpoints& data) {
- data.live.erase(addr);
- data.unreachable[addr] = now();
- });
+
+ if (_address_map.find(state->get_host_id()) == addr) {
+ // During IP address change we may have a situation where we work on old address
+ // but there is a new address for the same host id, so no need to mark host id as down
+ co_await mutate_live_and_unreachable_endpoints([addr = state->get_host_id()] (live_and_unreachable_endpoints& data) {
+ data.live.erase(addr);
+ data.unreachable[addr] = now();
+ });
+ }
logger.info("InetAddress {}/{} is now DOWN, status = {}", state->get_host_id(), addr, get_gossip_status(*state));
co_await do_on_dead_notifications(addr, std::move(state), pid);
}
@@ -2208,7 +2218,7 @@ future<> gossiper::add_saved_endpoint(locator::host_id host_id, gms::loaded_endp

}
auto generation = ep_state.get_heart_beat_state().get_generation();
co_await replicate(ep, std::move(ep_state), permit.id());

- _unreachable_endpoints[ep] = now();
+ _unreachable_endpoints[host_id] = now();
logger.trace("Adding saved endpoint {} {}", ep, generation);
}

@@ -2291,8 +2301,7 @@ future<> gossiper::do_stop_gossiping() {
logger.info("Announcing shutdown");

co_await add_local_application_state(application_state::STATUS, versioned_value::shutdown(true));
auto live_endpoints = _live_endpoints;

- for (inet_address addr : live_endpoints) {
- auto id = get_host_id(addr);
+ for (locator::host_id id : live_endpoints) {

logger.info("Sending a GossipShutdown to {} with generation {}", id, local_generation);
try {
co_await ser::gossip_rpc_verbs::send_gossip_shutdown(&_messaging, id, get_broadcast_address(), local_generation.value());

@@ -2367,41 +2376,31 @@ bool gossiper::is_alive(inet_address ep) const {
if (ep == get_broadcast_address()) {
return true;
}
- bool is_alive = _live_endpoints.contains(ep);

-#ifndef SCYLLA_BUILD_MODE_RELEASE
- // Live endpoints must always have a valid endpoint_state.
- // Verify that in testing mode to reduce the overhead in production.
- if (is_alive && !get_endpoint_state_ptr(ep)) {
- on_internal_error(logger, fmt::format("Node {} is alive but has no endpoint state", ep));
+ auto sptr = get_endpoint_state_ptr(ep);
+ if (!sptr) {
+ return false;
}
-#endif

- return is_alive;
+ return _live_endpoints.contains(sptr->get_host_id());
}

bool gossiper::is_alive(locator::host_id id) const {
- auto ip_opt = _address_map.find(id);
-
- if (!ip_opt) {
- // if host ID is not in the gossiper state (and hence not in the address map) it is dead
- return false;
+ if (id == my_host_id()) {
+ return true;
}

- auto ep = get_endpoint_state_ptr(*ip_opt);
- if (!ep) {
- // _address_map may have stale entry since we rely on gc to remove entries there
- // FIXME: add function to address_map to remove immediately
- return false;
- }
+ bool is_alive = _live_endpoints.contains(id);

- if (id != ep->get_host_id()) {
- // If IDs do not match it means that the node with provided ID was replaced
- // with a new node with same IP address and hence it is dead
- return false;
+#ifndef SCYLLA_BUILD_MODE_RELEASE
+ // Live endpoints must always have a valid endpoint_state.
+ // Verify that in testing mode to reduce the overhead in production.
+ if (is_alive && !get_endpoint_state_ptr(id)) {
+ on_internal_error(logger, fmt::format("Node {} is alive but has no endpoint state", id));
}
+#endif

- return is_alive(*ip_opt);
+ return is_alive;
}

template<typename ID>
diff --git a/service/storage_service.cc b/service/storage_service.cc
index 395203b4b20..eaf6fbca1a8 100644
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -4862,7 +4862,7 @@ future<> storage_service::rebuild(utils::optional_param source_dc) {
} else {
auto streamer = make_lw_shared<dht::range_streamer>(ss._db, ss._stream_manager, tmptr, ss._abort_source,
tmptr->get_my_id(), ss._snitch.local()->get_location(), "Rebuild", streaming::stream_reason::rebuild, null_topology_guard);
- streamer->add_source_filter(std::make_unique<dht::range_streamer::failure_detector_source_filter>(ss._gossiper.get_unreachable_host_ids()));
+ streamer->add_source_filter(std::make_unique<dht::range_streamer::failure_detector_source_filter>(ss._gossiper.get_unreachable_members()));
if (source_dc) {
streamer->add_source_filter(std::make_unique<dht::range_streamer::single_datacenter_filter>(*source_dc));
}
@@ -5761,7 +5761,7 @@ future<raft_topology_cmd_result> storage_service::raft_topology_cmd_handler(raft
} else {
auto streamer = make_lw_shared<dht::range_streamer>(_db, _stream_manager, tmptr, _abort_source,
tmptr->get_my_id(), _snitch.local()->get_location(), "Rebuild", streaming::stream_reason::rebuild, _topology_state_machine._topology.session);
- streamer->add_source_filter(std::make_unique<dht::range_streamer::failure_detector_source_filter>(_gossiper.get_unreachable_host_ids()));
+ streamer->add_source_filter(std::make_unique<dht::range_streamer::failure_detector_source_filter>(_gossiper.get_unreachable_members()));
if (source_dc != "") {
streamer->add_source_filter(std::make_unique<dht::range_streamer::single_datacenter_filter>(source_dc));
}
@@ -6127,7 +6127,7 @@ future<> storage_service::stream_tablet(locator::global_tablet_id tablet) {
std::move(tables));
tm = nullptr;
streamer->add_source_filter(std::make_unique<dht::range_streamer::failure_detector_source_filter>(
- _gossiper.get_unreachable_host_ids()));
+ _gossiper.get_unreachable_members()));

std::unordered_map<locator::host_id, dht::token_range_vector> ranges_per_endpoint;
for (auto r: streaming_info.read_from) {
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:54 AMMar 13

to scylladb-dev@googlegroups.com

---
gms/gossiper.cc | 17 +++--------------
1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index a3dab8f2efe..0fca198e675 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -1091,27 +1091,16 @@ void gossiper::run() {
if (g_digests.size() > 0) {

gossip_digest_syn message(get_cluster_name(), get_partitioner_name(), g_digests, get_group0_id());

- if (_endpoints_to_talk_with.empty()) {
+ if (_endpoints_to_talk_with.empty() && !_live_endpoints.empty()) {

auto live_endpoints = _live_endpoints | std::ranges::to<std::vector<inet_address>>();

std::shuffle(live_endpoints.begin(), live_endpoints.end(), _random_engine);
// This guarantees the local node will talk with all nodes
// in live_endpoints at least once within nr_rounds gossip rounds.

// Other gossip implementation like SWIM uses similar approach.
// https://www.cs.cornell.edu/projects/Quicksilver/public_pdfs/SWIM.pdf
- size_t nr_rounds = 10;

+ constexpr size_t nr_rounds = 10;

size_t nodes_per_round = (live_endpoints.size() + nr_rounds - 1) / nr_rounds;

- std::vector<inet_address> live_nodes;

- for (const auto& node : live_endpoints) {
- if (live_nodes.size() < nodes_per_round) {
- live_nodes.push_back(node);
- } else {
- _endpoints_to_talk_with.push_back(std::move(live_nodes));
- live_nodes = {node};
- }
- }
- if (!live_nodes.empty()) {
- _endpoints_to_talk_with.push_back(live_nodes);
- }

+ _endpoints_to_talk_with = live_endpoints | std::views::chunk(nodes_per_round) | std::ranges::to<std::list<std::vector<inet_address>>>();

logger.debug("Set live nodes to talk: endpoint_state_map={}, all_live_nodes={}, endpoints_to_talk_with={}",
_endpoint_state_map.size(), live_endpoints, _endpoints_to_talk_with);
}

--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:55 AMMar 13

to scylladb-dev@googlegroups.com

---
gms/gossiper.hh | 2 +-
gms/gossiper.cc | 25 ++++++++++++-------------
2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/gms/gossiper.hh b/gms/gossiper.hh
index dd6b33f42bb..433a4e3f0fd 100644
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -104,7 +104,7 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
future<> handle_syn_msg(locator::host_id from, gossip_digest_syn syn_msg);
future<> handle_ack_msg(locator::host_id from, gossip_digest_ack ack_msg);
future<> handle_ack2_msg(locator::host_id from, gossip_digest_ack2 msg);
- future<> handle_echo_msg(inet_address from, const locator::host_id* id, seastar::rpc::opt_time_point, std::optional<int64_t> generation_number_opt, bool notify_up);
+ future<> handle_echo_msg(locator::host_id id, seastar::rpc::opt_time_point, std::optional<int64_t> generation_number_opt, bool notify_up);
future<> handle_shutdown_msg(inet_address from, std::optional<int64_t> generation_number_opt);
future<> do_send_ack_msg(locator::host_id from, gossip_digest_syn syn_msg);
future<> do_send_ack2_msg(locator::host_id from, utils::chunked_vector<gossip_digest> ack_msg_digest);
diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index 88a492070da..4ec7f80c3b9 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -420,21 +420,21 @@ future<> gossiper::handle_ack2_msg(locator::host_id from, gossip_digest_ack2 msg
co_await apply_state_locally(std::move(remote_ep_state_map));
}

-future<> gossiper::handle_echo_msg(gms::inet_address from, const locator::host_id* from_hid, seastar::rpc::opt_time_point timeout, std::optional<int64_t> generation_number_opt, bool notify_up) {
+future<> gossiper::handle_echo_msg(locator::host_id from_hid, seastar::rpc::opt_time_point timeout, std::optional<int64_t> generation_number_opt, bool notify_up) {
bool respond = true;
- if (from_hid && !_advertise_to_nodes.empty()) {
- auto it = _advertise_to_nodes.find(*from_hid);
+ if (!_advertise_to_nodes.empty()) {
+ auto it = _advertise_to_nodes.find(from_hid);
if (it == _advertise_to_nodes.end()) {
respond = false;
} else {
- auto es = get_endpoint_state_ptr(from);
+ auto es = get_endpoint_state_ptr(from_hid);
if (es) {
auto saved_generation_number = it->second;
auto current_generation_number = generation_number_opt ?
generation_type(generation_number_opt.value()) : es->get_heart_beat_state().get_generation();
respond = saved_generation_number == current_generation_number;
logger.debug("handle_echo_msg: from={}, saved_generation_number={}, current_generation_number={}",
- from, saved_generation_number, current_generation_number);
+ from_hid, saved_generation_number, current_generation_number);
} else {
respond = false;
}
@@ -444,21 +444,21 @@ future<> gossiper::handle_echo_msg(gms::inet_address from, const locator::host_i
throw std::runtime_error("Not ready to respond gossip echo message");
}
if (notify_up) {
- if (!timeout || !from_hid) {
- on_internal_error(logger, "UP notification should have a timeout and src host id");
+ if (!timeout) {
+ on_internal_error(logger, "UP notification should have a timeout");
}
auto normal = [] (gossiper& g, locator::host_id hid) {
const auto& topo = g.get_token_metadata_ptr()->get_topology();
return topo.has_node(hid) && topo.find_node(hid)->is_normal();
};
- co_await container().invoke_on(0, [from, from_hid, timeout, &normal] (gossiper& g) -> future<> {
+ co_await container().invoke_on(0, [from_hid, timeout, &normal] (gossiper& g) -> future<> {
try {
// Wait to see the node as normal. It may node be the case if the node bootstraps
- while (rpc::rpc_clock_type::now() < *timeout && !(normal(g, *from_hid) && g.is_alive(*from_hid))) {
+ while (rpc::rpc_clock_type::now() < *timeout && !(normal(g, from_hid) && g.is_alive(from_hid))) {
co_await sleep_abortable(std::chrono::milliseconds(100), g._abort_source);
}
} catch(...) {
- logger.warn("handle_echo_msg: UP notification from {} failed with {}", from, std::current_exception());
+ logger.warn("handle_echo_msg: UP notification from {} failed with {}", from_hid, std::current_exception());
}
});
}
@@ -539,9 +539,8 @@ void gossiper::init_messaging_service_handler() {
});
});
ser::gossip_rpc_verbs::register_gossip_echo(&_messaging, [this] (const rpc::client_info& cinfo, seastar::rpc::opt_time_point timeout, rpc::optional<int64_t> generation_number_opt, rpc::optional<bool> notify_up_opt) {
- auto from = cinfo.retrieve_auxiliary<gms::inet_address>("baddr");
- auto from_hid = cinfo.retrieve_auxiliary_opt<locator::host_id>("host_id");
- return handle_echo_msg(from, from_hid, timeout, generation_number_opt, notify_up_opt.value_or(false));
+ auto from_hid = cinfo.retrieve_auxiliary<locator::host_id>("host_id");
+ return handle_echo_msg(from_hid, timeout, generation_number_opt, notify_up_opt.value_or(false));
});
ser::gossip_rpc_verbs::register_gossip_shutdown(&_messaging, [this] (inet_address from, rpc::optional<int64_t> generation_number_opt) {
return background_msg("GOSSIP_SHUTDOWN", [from, generation_number_opt] (gms::gossiper& gossiper) {
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:55 AMMar 13

to scylladb-dev@googlegroups.com

Provide default implementation for them instead. Will be easier to rework them later.

---
cdc/generation_service.hh | 5 -----
gms/i_endpoint_state_change_subscriber.hh | 12 ++++++------
service/load_broadcaster.hh | 6 ------
service/migration_manager.hh | 3 ---
service/view_update_backlog_broker.hh | 5 -----
streaming/stream_manager.hh | 3 ---
gms/feature_service.cc | 4 ----
repair/row_level.cc | 18 ------------------
service/storage_service.cc | 19 -------------------
9 files changed, 6 insertions(+), 69 deletions(-)

diff --git a/cdc/generation_service.hh b/cdc/generation_service.hh
index efad713821f..165a0aab1ec 100644
--- a/cdc/generation_service.hh
+++ b/cdc/generation_service.hh
@@ -110,11 +110,6 @@ class generation_service : public peering_sharded_service<generation_service>
return _cdc_metadata;
}

- virtual future<> on_alive(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_dead(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_remove(gms::inet_address, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_restart(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
-
virtual future<> on_join(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override;
virtual future<> on_change(gms::inet_address, const gms::application_state_map&, gms::permit_id) override;

diff --git a/gms/i_endpoint_state_change_subscriber.hh b/gms/i_endpoint_state_change_subscriber.hh
index 3c15c43d425..44e32bb5ce9 100644
--- a/gms/i_endpoint_state_change_subscriber.hh
+++ b/gms/i_endpoint_state_change_subscriber.hh
@@ -46,15 +46,15 @@ class i_endpoint_state_change_subscriber {
* @param endpoint endpoint for which the state change occurred.
* @param epState state that actually changed for the above endpoint.
*/
- virtual future<> on_join(inet_address endpoint, endpoint_state_ptr ep_state, permit_id) = 0;
+ virtual future<> on_join(inet_address endpoint, endpoint_state_ptr ep_state, permit_id) { return make_ready_future<>(); }

- virtual future<> on_change(inet_address endpoint, const application_state_map& states, permit_id) = 0;
+ virtual future<> on_change(inet_address endpoint, const application_state_map& states, permit_id) { return make_ready_future<>(); }

- virtual future<> on_alive(inet_address endpoint, endpoint_state_ptr state, permit_id) = 0;
+ virtual future<> on_alive(inet_address endpoint, endpoint_state_ptr state, permit_id) { return make_ready_future<>(); };

- virtual future<> on_dead(inet_address endpoint, endpoint_state_ptr state, permit_id) = 0;
+ virtual future<> on_dead(inet_address endpoint, endpoint_state_ptr state, permit_id) { return make_ready_future<>(); };

- virtual future<> on_remove(inet_address endpoint, permit_id) = 0;
+ virtual future<> on_remove(inet_address endpoint, permit_id) { return make_ready_future<>(); };

/**
* Called whenever a node is restarted.
@@ -62,7 +62,7 @@ class i_endpoint_state_change_subscriber {
* previously marked down. It will have only if {@code state.isAlive() == false}
* as {@code state} is from before the restarted node is marked up.
*/
- virtual future<> on_restart(inet_address endpoint, endpoint_state_ptr state, permit_id) = 0;
+ virtual future<> on_restart(inet_address endpoint, endpoint_state_ptr state, permit_id) { return make_ready_future<>(); };
};

} // namespace gms
diff --git a/service/load_broadcaster.hh b/service/load_broadcaster.hh
index abbce7eb00d..2f13ae4b5ed 100644
--- a/service/load_broadcaster.hh
+++ b/service/load_broadcaster.hh
@@ -51,12 +51,6 @@ class load_broadcaster : public gms::i_endpoint_state_change_subscriber, public
}
return make_ready_future();
}
-
- future<> on_alive(gms::inet_address endpoint, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
-
- future<> on_dead(gms::inet_address endpoint, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
-
- future<> on_restart(gms::inet_address endpoint, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }

virtual future<> on_remove(gms::inet_address endpoint, gms::permit_id) override {
_load_info.erase(endpoint);
diff --git a/service/migration_manager.hh b/service/migration_manager.hh
index 508d1291b40..50ea39eb5f9 100644
--- a/service/migration_manager.hh
+++ b/service/migration_manager.hh
@@ -188,9 +188,6 @@ class migration_manager : public seastar::async_sharded_service<migration_manage
virtual future<> on_join(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id) override;
virtual future<> on_change(gms::inet_address endpoint, const gms::application_state_map& states, gms::permit_id) override;
virtual future<> on_alive(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id) override;
- virtual future<> on_dead(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_remove(gms::inet_address endpoint, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_restart(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id) override { return make_ready_future(); }

public:
// For tests only.
diff --git a/service/view_update_backlog_broker.hh b/service/view_update_backlog_broker.hh
index 30db2c8410e..7d54ffb055d 100644
--- a/service/view_update_backlog_broker.hh
+++ b/service/view_update_backlog_broker.hh
@@ -42,11 +42,6 @@ class view_update_backlog_broker final
virtual future<> on_change(gms::inet_address, const gms::application_state_map& states, gms::permit_id) override;

virtual future<> on_remove(gms::inet_address, gms::permit_id) override;
-
- virtual future<> on_join(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_alive(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_dead(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_restart(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
};

}
diff --git a/streaming/stream_manager.hh b/streaming/stream_manager.hh
index 72b3dae91a6..927c3b24170 100644
--- a/streaming/stream_manager.hh
+++ b/streaming/stream_manager.hh
@@ -172,9 +172,6 @@ class stream_manager : public gms::i_endpoint_state_change_subscriber, public en
reader_consumer_v2 make_streaming_consumer(
uint64_t estimated_partitions, stream_reason, service::frozen_topology_guard);
public:
- virtual future<> on_join(inet_address endpoint, endpoint_state_ptr ep_state, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_change(gms::inet_address, const gms::application_state_map& states, gms::permit_id) override { return make_ready_future(); }
- virtual future<> on_alive(inet_address endpoint, endpoint_state_ptr state, gms::permit_id) override { return make_ready_future(); }
virtual future<> on_dead(inet_address endpoint, endpoint_state_ptr state, gms::permit_id) override;
virtual future<> on_remove(inet_address endpoint, gms::permit_id) override;
virtual future<> on_restart(inet_address endpoint, endpoint_state_ptr ep_state, gms::permit_id) override;
diff --git a/gms/feature_service.cc b/gms/feature_service.cc
index 8f3ff69ad00..eaa466f8741 100644
--- a/gms/feature_service.cc
+++ b/gms/feature_service.cc
@@ -271,10 +271,6 @@ class persistent_feature_enabler : public i_endpoint_state_change_subscriber {
}
return make_ready_future();
}
- future<> on_alive(inet_address, endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
- future<> on_dead(inet_address, endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
- future<> on_remove(inet_address, gms::permit_id) override { return make_ready_future(); }
- future<> on_restart(inet_address, endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }

future<> enable_features();
};
diff --git a/repair/row_level.cc b/repair/row_level.cc
index 7ddc5649b4e..b562c708dfe 100644
--- a/repair/row_level.cc
+++ b/repair/row_level.cc
@@ -3201,24 +3201,6 @@ class row_level_repair_gossip_helper : public gms::i_endpoint_state_change_subsc
rlogger.warn("Failed to remove row level repair for node {}: {}", node, std::current_exception());
}
}
- virtual future<> on_join(
- gms::inet_address endpoint,
- gms::endpoint_state_ptr ep_state,
- gms::permit_id) override {
- return make_ready_future();
- }
- virtual future<> on_change(
- gms::inet_address endpoint,
- const gms::application_state_map& states,
- gms::permit_id) override {
- return make_ready_future();
- }
- virtual future<> on_alive(
- gms::inet_address endpoint,
- gms::endpoint_state_ptr state,
- gms::permit_id) override {
- return make_ready_future();
- }
virtual future<> on_dead(
gms::inet_address endpoint,
gms::endpoint_state_ptr state,
diff --git a/service/storage_service.cc b/service/storage_service.cc
index ebe34ffe909..cf003e31101 100644
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -976,30 +976,11 @@ class storage_service::ip_address_updater: public gms::i_endpoint_state_change_s
return on_endpoint_change(endpoint, ep_state, permit_id, "on_join");
}

- virtual future<>
- on_change(gms::inet_address endpoint, const gms::application_state_map& states, gms::permit_id) override {
- // Raft server ID never changes - do nothing

- return make_ready_future<>();
- }
-

virtual future<>
on_alive(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id) override {
return on_endpoint_change(endpoint, ep_state, permit_id, "on_alive");
}

- virtual future<>
- on_dead(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id) override {

- return make_ready_future<>();
- }
-

- virtual future<>
- on_remove(gms::inet_address endpoint, gms::permit_id) override {
- // The mapping is removed when the server is removed from
- // Raft configuration, not when it's dead or alive, or
- // removed

- return make_ready_future<>();
- }
-

virtual future<>
on_restart(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id permit_id) override {
return on_endpoint_change(endpoint, ep_state, permit_id, "on_restart");
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:56 AMMar 13

to scylladb-dev@googlegroups.com

It was always deprecated.
---
gms/gossiper.hh | 2 --
api/gossiper.cc | 7 +------
gms/gossiper.cc | 5 -----
api/api-doc/gossiper.json | 8 --------
4 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/gms/gossiper.hh b/gms/gossiper.hh
index 433a4e3f0fd..62097f735ab 100644
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -382,8 +382,6 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar
*/
future<> advertise_token_removed(inet_address endpoint, locator::host_id host_id, permit_id);

- future<> unsafe_assassinate_endpoint(sstring address);
-
/**
* Do not call this method unless you know what you are doing.
* It will try extremely hard to obliterate any endpoint from the ring,
diff --git a/api/gossiper.cc b/api/gossiper.cc
index e5fcd2e1076..2df0e05cf45 100644
--- a/api/gossiper.cc
+++ b/api/gossiper.cc
@@ -53,12 +53,7 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
});

httpd::gossiper_json::assassinate_endpoint.set(r, [&g](std::unique_ptr<http::request> req) {
- if (req->get_query_param("unsafe") != "True") {
- return g.assassinate_endpoint(req->get_path_param("addr")).then([] {
- return make_ready_future<json::json_return_type>(json_void());
- });
- }
- return g.unsafe_assassinate_endpoint(req->get_path_param("addr")).then([] {
+ return g.assassinate_endpoint(req->get_path_param("addr")).then([] {

return make_ready_future<json::json_return_type>(json_void());
});
});

diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index 4ec7f80c3b9..2a0e96b6e82 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -1369,11 +1369,6 @@ future<> gossiper::advertise_token_removed(inet_address endpoint, locator::host_
co_await sleep_abortable(INTERVAL * 2, _abort_source);
}

-future<> gossiper::unsafe_assassinate_endpoint(sstring address) {
- logger.warn("Gossiper.unsafeAssassinateEndpoint is deprecated and will be removed in the next release; use assassinate_endpoint instead");
- return assassinate_endpoint(address);
-}
-
future<> gossiper::assassinate_endpoint(sstring address) {
co_await container().invoke_on(0, [&] (auto&& gossiper) -> future<> {
inet_address endpoint(address);
diff --git a/api/api-doc/gossiper.json b/api/api-doc/gossiper.json
index 49a73ff17b2..54eab08e9be 100644
--- a/api/api-doc/gossiper.json
+++ b/api/api-doc/gossiper.json
@@ -136,14 +136,6 @@
"allowMultiple":false,
"type":"string",
"paramType":"path"
- },
- {
- "name":"unsafe",
- "description":"Set to True to perform an unsafe assassination",
- "required":false,
- "allowMultiple":false,
- "type":"boolean",
- "paramType":"query"
}
]
}
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:57 AMMar 13

to scylladb-dev@googlegroups.com

---
service/storage_service.cc | 1 -
1 file changed, 1 deletion(-)

diff --git a/service/storage_service.cc b/service/storage_service.cc
index cf003e31101..060bdac5ac4 100644
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -2571,7 +2571,6 @@ future<> storage_service::handle_state_removed(inet_address endpoint, std::vecto
}
const auto host_id = _gossiper.get_host_id(endpoint);
if (get_token_metadata().is_normal_token_owner(host_id)) {
- auto state = pieces[0];
auto remove_tokens = get_token_metadata().get_tokens(host_id);
std::unordered_set<token> tmp(remove_tokens.begin(), remove_tokens.end());
co_await excise(std::move(tmp), endpoint, host_id, extract_expire_time(pieces), pid);
--
2.47.1

Gleb Natapov

<gleb@scylladb.com>

unread,

Mar 13, 2025, 6:10:57 AMMar 13

to scylladb-dev@googlegroups.com

---
gms/gossiper.hh | 1 -
gms/gossiper.cc | 4 ----
2 files changed, 5 deletions(-)

diff --git a/gms/gossiper.hh b/gms/gossiper.hh
index 343882af0e2..8f2efd527c8 100644
--- a/gms/gossiper.hh
+++ b/gms/gossiper.hh
@@ -110,7 +110,6 @@ class gossiper : public seastar::async_sharded_service<gossiper>, public seastar

future<> do_send_ack2_msg(locator::host_id from, utils::chunked_vector<gossip_digest> ack_msg_digest);

future<gossip_get_endpoint_states_response> handle_get_endpoint_states_msg(gossip_get_endpoint_states_request request);
static constexpr uint32_t _default_cpuid = 0;
- msg_addr get_msg_addr(inet_address to) const noexcept;
void do_sort(utils::chunked_vector<gossip_digest>& g_digest_list) const;
timer<lowres_clock> _scheduled_gossip_task;
bool _enabled = false;
diff --git a/gms/gossiper.cc b/gms/gossiper.cc
index 21de6589721..7d25e8e4acc 100644
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -60,10 +60,6 @@ constexpr std::chrono::milliseconds gossiper::INTERVAL;
constexpr std::chrono::hours gossiper::A_VERY_LONG_TIME;
constexpr generation_type::value_type gossiper::MAX_GENERATION_DIFFERENCE;

-netw::msg_addr gossiper::get_msg_addr(inet_address to) const noexcept {
- return msg_addr{to, _default_cpuid};
-}
-
const sstring& gossiper::get_cluster_name() const noexcept {
return _gcfg.cluster_name;
}
--
2.47.1