[PATCH v1 03/56] api: do not use token_metadata to retrieve ip to id mapping in token_metadata RESTful endpoints

2 views
Skip to first unread message

Gleb Natapov

<gleb@scylladb.com>
unread,
Jan 13, 2025, 3:20:48 AMJan 13
to scylladb-dev@googlegroups.com
We want to drop ip knowledge from the token_metadata, so use gossiper to
retrieve the mapping instead.
---
api/api_init.hh | 2 +-
api/token_metadata.hh | 3 ++-
locator/token_metadata.hh | 2 +-
api/api.cc | 4 ++--
api/token_metadata.cc | 45 ++++++++++++++++++++++++++-------------
locator/token_metadata.cc | 30 +++++++++-----------------
main.cc | 2 +-
7 files changed, 47 insertions(+), 41 deletions(-)

diff --git a/api/api_init.hh b/api/api_init.hh
index 435f637c0ca..cf539344984 100644
--- a/api/api_init.hh
+++ b/api/api_init.hh
@@ -112,7 +112,7 @@ future<> set_server_authorization_cache(http_context& ctx, sharded<auth::service
future<> unset_server_authorization_cache(http_context& ctx);
future<> set_server_snapshot(http_context& ctx, sharded<db::snapshot_ctl>& snap_ctl);
future<> unset_server_snapshot(http_context& ctx);
-future<> set_server_token_metadata(http_context& ctx, sharded<locator::shared_token_metadata>& tm);
+future<> set_server_token_metadata(http_context& ctx, sharded<locator::shared_token_metadata>& tm, sharded<gms::gossiper>& g);
future<> unset_server_token_metadata(http_context& ctx);
future<> set_server_gossip(http_context& ctx, sharded<gms::gossiper>& g);
future<> unset_server_gossip(http_context& ctx);
diff --git a/api/token_metadata.hh b/api/token_metadata.hh
index 0bab6d999fd..3e804050fc0 100644
--- a/api/token_metadata.hh
+++ b/api/token_metadata.hh
@@ -15,10 +15,11 @@ class routes;
}

namespace locator { class shared_token_metadata; }
+namespace gms { class gossiper; }

namespace api {
struct http_context;
-void set_token_metadata(http_context& ctx, seastar::httpd::routes& r, seastar::sharded<locator::shared_token_metadata>& tm);
+void set_token_metadata(http_context& ctx, seastar::httpd::routes& r, seastar::sharded<locator::shared_token_metadata>& tm, seastar::sharded<gms::gossiper>& g);
void unset_token_metadata(http_context& ctx, seastar::httpd::routes& r);

}
diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh
index 0f604bdf858..541c91f086e 100644
--- a/locator/token_metadata.hh
+++ b/locator/token_metadata.hh
@@ -245,7 +245,7 @@ class token_metadata final {
inet_address get_endpoint_for_host_id(locator::host_id host_id) const;

/** @return a copy of the endpoint-to-id map for read-only operations */
- std::unordered_map<inet_address, host_id> get_endpoint_to_host_id_map() const;
+ std::unordered_set<host_id> get_host_ids() const;

/// Returns host_id of the local node.
host_id get_my_id() const;
diff --git a/api/api.cc b/api/api.cc
index ff5c06432d6..879f030b9b0 100644
--- a/api/api.cc
+++ b/api/api.cc
@@ -188,8 +188,8 @@ future<> unset_server_snapshot(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_snapshot(ctx, r); });
}

-future<> set_server_token_metadata(http_context& ctx, sharded<locator::shared_token_metadata>& tm) {
- return ctx.http_server.set_routes([&ctx, &tm] (routes& r) { set_token_metadata(ctx, r, tm); });
+future<> set_server_token_metadata(http_context& ctx, sharded<locator::shared_token_metadata>& tm, sharded<gms::gossiper>& g) {
+ return ctx.http_server.set_routes([&ctx, &tm, &g] (routes& r) { set_token_metadata(ctx, r, tm, g); });
}

future<> unset_server_token_metadata(http_context& ctx) {
diff --git a/api/token_metadata.cc b/api/token_metadata.cc
index a8c3234befe..081388d329d 100644
--- a/api/token_metadata.cc
+++ b/api/token_metadata.cc
@@ -10,6 +10,7 @@
#include "api/api-doc/storage_service.json.hh"
#include "api/api-doc/endpoint_snitch_info.json.hh"
#include "locator/token_metadata.hh"
+#include "gms/gossiper.hh"

using namespace seastar::httpd;

@@ -18,7 +19,7 @@ namespace api {
namespace ss = httpd::storage_service_json;
using namespace json;

-void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_token_metadata>& tm) {
+void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_token_metadata>& tm, sharded<gms::gossiper>& g) {
ss::local_hostid.set(r, [&tm](std::unique_ptr<http::request> req) {
auto id = tm.local().get()->get_my_id();
if (!bool(id)) {
@@ -33,22 +34,25 @@ void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_to
}));
});

- ss::get_node_tokens.set(r, [&tm] (std::unique_ptr<http::request> req) {
+ ss::get_node_tokens.set(r, [&tm, &g] (std::unique_ptr<http::request> req) {
gms::inet_address addr(req->get_path_param("endpoint"));
auto& local_tm = *tm.local().get();
- const auto host_id = local_tm.get_host_id_if_known(addr);
+ std::optional<locator::host_id> host_id;
+ try {
+ host_id = g.local().get_host_id(addr);
+ } catch (...) {}
return make_ready_future<json::json_return_type>(stream_range_as_array(host_id ? local_tm.get_tokens(*host_id): std::vector<dht::token>{}, [](const dht::token& i) {
return fmt::to_string(i);
}));
});

- ss::get_leaving_nodes.set(r, [&tm](const_req req) {
+ ss::get_leaving_nodes.set(r, [&tm, &g](const_req req) {
const auto& local_tm = *tm.local().get();
const auto& leaving_host_ids = local_tm.get_leaving_endpoints();
std::unordered_set<gms::inet_address> eps;
eps.reserve(leaving_host_ids.size());
for (const auto host_id: leaving_host_ids) {
- eps.insert(local_tm.get_endpoint_for_host_id(host_id));
+ eps.insert(g.local().get_address_map().get(host_id));
}
return container_to_vec(eps);
});
@@ -58,20 +62,23 @@ void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_to
return container_to_vec(addr);
});

- ss::get_joining_nodes.set(r, [&tm](const_req req) {
+ ss::get_joining_nodes.set(r, [&tm, &g](const_req req) {
const auto& local_tm = *tm.local().get();
const auto& points = local_tm.get_bootstrap_tokens();
std::unordered_set<gms::inet_address> eps;
eps.reserve(points.size());
for (const auto& [token, host_id]: points) {
- eps.insert(local_tm.get_endpoint_for_host_id(host_id));
+ eps.insert(g.local().get_address_map().get(host_id));
}
return container_to_vec(eps);
});

- ss::get_host_id_map.set(r, [&tm](const_req req) {
+ ss::get_host_id_map.set(r, [&tm, &g](const_req req) {
std::vector<ss::mapper> res;
- return map_to_key_value(tm.local().get()->get_endpoint_to_host_id_map(), res);
+ auto map = tm.local().get()->get_host_ids() |
+ std::views::transform([&g] (locator::host_id id) { return std::make_pair(g.local().get_address_map().get(id), id); }) |
+ std::ranges::to<std::unordered_map>();
+ return map_to_key_value(std::move(map), res);
});

static auto host_or_broadcast = [&tm](const_req req) {
@@ -79,26 +86,34 @@ void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_to
return host.empty() ? tm.local().get()->get_topology().my_address() : gms::inet_address(host);
};

- httpd::endpoint_snitch_info_json::get_datacenter.set(r, [&tm](const_req req) {
+ httpd::endpoint_snitch_info_json::get_datacenter.set(r, [&tm, &g](const_req req) {
auto& topology = tm.local().get()->get_topology();
auto ep = host_or_broadcast(req);
- if (!topology.has_endpoint(ep)) {
+ std::optional<locator::host_id> host_id;
+ try {
+ host_id = g.local().get_host_id(ep);
+ } catch (...) {}
+ if (!host_id || !topology.has_node(*host_id)) {
// Cannot return error here, nodetool status can race, request
// info about just-left node and not handle it nicely
return locator::endpoint_dc_rack::default_location.dc;
}
- return topology.get_datacenter(ep);
+ return topology.get_datacenter(*host_id);
});

- httpd::endpoint_snitch_info_json::get_rack.set(r, [&tm](const_req req) {
+ httpd::endpoint_snitch_info_json::get_rack.set(r, [&tm, &g](const_req req) {
auto& topology = tm.local().get()->get_topology();
auto ep = host_or_broadcast(req);
- if (!topology.has_endpoint(ep)) {
+ std::optional<locator::host_id> host_id;
+ try {
+ host_id = g.local().get_host_id(ep);
+ } catch (...) {}
+ if (!host_id || !topology.has_node(*host_id)) {
// Cannot return error here, nodetool status can race, request
// info about just-left node and not handle it nicely
return locator::endpoint_dc_rack::default_location.rack;
}
- return topology.get_rack(ep);
+ return topology.get_rack(*host_id);
});
}

diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc
index 35c7efd7924..3c369e44ce5 100644
--- a/locator/token_metadata.cc
+++ b/locator/token_metadata.cc
@@ -162,8 +162,8 @@ class token_metadata_impl final {
/** Return the end-point for a unique host ID.*/
inet_address get_endpoint_for_host_id(host_id) const;

- /** @return a copy of the endpoint-to-id map for read-only operations */
- std::unordered_map<inet_address, host_id> get_endpoint_to_host_id_map() const;
+ /** @return a copy of host id set for read-only operations */
+ std::unordered_set<host_id> get_host_ids() const;

void add_bootstrap_token(token t, host_id endpoint);

@@ -567,21 +567,11 @@ inet_address token_metadata_impl::get_endpoint_for_host_id(host_id host_id) cons
}
}

-std::unordered_map<inet_address, host_id> token_metadata_impl::get_endpoint_to_host_id_map() const {
- const auto& nodes = _topology.get_nodes_by_endpoint();
- std::unordered_map<inet_address, host_id> map;
- map.reserve(nodes.size());
- for (const auto& [endpoint, node] : nodes) {
- if (node.get().left() || node.get().is_none()) {
- continue;
- }
- if (const auto& host_id = node.get().host_id()) {
- map[endpoint] = host_id;
- } else {
- tlogger.info("get_endpoint_to_host_id_map: endpoint {} has null host_id: state={}", endpoint, node.get().get_state());
- }
- }
- return map;
+std::unordered_set<host_id> token_metadata_impl::get_host_ids() const {
+ return _topology.get_nodes() |
+ std::views::filter([&] (const node& n) { return !n.left() && !n.is_none(); }) |
+ std::views::transform([] (const node& n) { return n.host_id(); }) |
+ std::ranges::to<std::unordered_set>();
}

bool token_metadata_impl::is_normal_token_owner(host_id endpoint) const {
@@ -1067,9 +1057,9 @@ token_metadata::get_endpoint_for_host_id(host_id host_id) const {
return _impl->get_endpoint_for_host_id(host_id);
}

-std::unordered_map<inet_address, host_id>
-token_metadata::get_endpoint_to_host_id_map() const {
- return _impl->get_endpoint_to_host_id_map();
+std::unordered_set<host_id>
+token_metadata::get_host_ids() const {
+ return _impl->get_host_ids();
}

void
diff --git a/main.cc b/main.cc
index 8b73496bfcf..7cd2bef73e4 100644
--- a/main.cc
+++ b/main.cc
@@ -1085,7 +1085,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
// token_metadata.stop().get();
//});

- api::set_server_token_metadata(ctx, token_metadata).get();
+ api::set_server_token_metadata(ctx, token_metadata, gossiper).get();
auto stop_tokens_api = defer_verbose_shutdown("token metadata API", [&ctx] {
api::unset_server_token_metadata(ctx).get();
});
--
2.47.1

Kamil Braun

<kbraun@scylladb.com>
unread,
Jan 13, 2025, 10:31:36 AMJan 13
to Gleb Natapov, scylladb-dev@googlegroups.com
previous version with get_endpoint_to_host_id_map() would skip nodes
without IPs and print a warning, this version will fail, isn't it going
to break some scenarios?

(question applies to all APIs where you replaced getting map with
address_map::get())
was this branch dead, so we don't need to handle it in new version?

I mean couldn't get_nodes() return nodes without host IDs below (I don't
understand how such nodes would appear, but just to be sure)

Gleb Natapov

<gleb@scylladb.com>
unread,
Jan 14, 2025, 3:34:43 AMJan 14
to Kamil Braun, scylladb-dev@googlegroups.com
I am not sure if it is a good idea to silently (printing a log message
does not count since API use will not see it) skip a node as opposite to
report an error. Internal error will be an exception in production which
will cause an API error to be returned if mapping is not available.
Internal error will let as catch if it really happens in testing.

> (question applies to all APIs where you replaced getting map with
> address_map::get())

get_host_id_if_known() in the previous API also reported internal error
in case a node is not available. But returns empty address if one is not
knows without printing anything. May we we can do it in all APIs here.
Who are the users?
--
Gleb.

Kamil Braun

<kbraun@scylladb.com>
unread,
Jan 14, 2025, 4:38:51 AMJan 14
to Gleb Natapov, scylladb-dev@googlegroups.com
Main user is nodetool status

Gleb Natapov

<gleb@scylladb.com>
unread,
Jan 14, 2025, 4:45:34 AMJan 14
to Kamil Braun, scylladb-dev@googlegroups.com
On Tue, Jan 14, 2025 at 10:38:45AM +0100, Kamil Braun wrote:
> > > > - ss::get_host_id_map.set(r, [&tm](const_req req) {
> > > > + ss::get_host_id_map.set(r, [&tm, &g](const_req req) {
> > > > std::vector<ss::mapper> res;
> > > > - return map_to_key_value(tm.local().get()->get_endpoint_to_host_id_map(), res);
> > > > + auto map = tm.local().get()->get_host_ids() |
> > > > + std::views::transform([&g] (locator::host_id id) { return std::make_pair(g.local().get_address_map().get(id), id); }) |
> > > previous version with get_endpoint_to_host_id_map() would skip nodes without
> > > IPs and print a warning, this version will fail, isn't it going to break
> > > some scenarios?
> > I am not sure if it is a good idea to silently (printing a log message
> > does not count since API use will not see it) skip a node as opposite to
> > report an error. Internal error will be an exception in production which
> > will cause an API error to be returned if mapping is not available.
> > Internal error will let as catch if it really happens in testing.
> >
> > > (question applies to all APIs where you replaced getting map with
> > > address_map::get())
> >
> > get_host_id_if_known() in the previous API also reported internal error
> > in case a node is not available. But returns empty address if one is not
> > knows without printing anything. May we we can do it in all APIs here.
> > Who are the users?
> Main user is nodetool status

For this API? Need to check how it handles missing result.


--
Gleb.

Gleb Natapov

<gleb@scylladb.com>
unread,
Jan 14, 2025, 6:50:00 AMJan 14
to Kamil Braun, scylladb-dev@googlegroups.com
Looks like it will just report an incomplete info if id->ip mapping is
missing. I am not sure why it would be missing though in normal
circumstances. May be if token_metadata keeps nodes that are already
left, but I just checked and I see that tm::get_host_ids() filter such
hosts out, so not having the mapping may be an error indeed.


--
Gleb.
Reply all
Reply to author
Forward
0 new messages