Corrupt keys might be printed as non-utf8 strings to the log,
and that, in turn, may break applications reading the logs,
such as Python (3.7)
For example:
```
Traceback (most recent call last):
File "/home/bhalevy/dev/scylla-dtest/dtest.py", line 1148, in tearDown
self.cleanUpCluster()
File "/home/bhalevy/dev/scylla-dtest/dtest.py", line 1184, in cleanUpCluster
matches = node.grep_log(expr)
File "/home/bhalevy/dev/scylla-ccm/ccmlib/node.py", line 367, in grep_log
for line in f:
File "/usr/lib64/python3.7/codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb3 in position 5577: invalid start byte
```
Signed-off-by: Benny Halevy <
bha...@scylladb.com>
---
compaction/compaction.cc | 12 ++++++------
dht/i_partitioner.cc | 8 ++++++++
dht/i_partitioner.hh | 2 ++
3 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/compaction/compaction.cc b/compaction/compaction.cc
index 8af0ba2fa..0bd25a8d8 100644
--- a/compaction/compaction.cc
+++ b/compaction/compaction.cc
@@ -1199,9 +1199,9 @@ class scrub_compaction final : public regular_compaction {
type,
schema.ks_name(),
schema.cf_name(),
- new_key.key().with_schema(schema),
+ new_key.to_string(schema),
new_key,
- current_key.key().with_schema(schema),
+ current_key.to_string(schema),
current_key,
action.empty() ? "" : "; ",
action);
@@ -1214,9 +1214,9 @@ class scrub_compaction final : public regular_compaction {
type,
schema.ks_name(),
schema.cf_name(),
- new_key.key().with_schema(schema),
+ new_key.to_string(schema),
new_key,
- current_key.key().with_schema(schema),
+ current_key.to_string(schema),
current_key,
action.empty() ? "" : "; ",
action);
@@ -1234,7 +1234,7 @@ class scrub_compaction final : public regular_compaction {
mf.mutation_fragment_kind(),
mf.has_key() ? format(" with key {}", mf.key().with_schema(schema)) : "",
mf.position(),
- key.key().with_schema(schema),
+ key.to_string(schema),
key,
prev_pos.region(),
prev_pos.has_key() ? format(" with key {}", prev_pos.key().with_schema(schema)) : "",
@@ -1246,7 +1246,7 @@ class scrub_compaction final : public regular_compaction {
const auto& schema = validator.schema();
const auto& key = validator.previous_partition_key();
clogger.error("[{} compaction {}.{}] Invalid end-of-stream, last partition {} ({}) didn't end with a partition-end fragment{}{}",
- type, schema.ks_name(), schema.cf_name(), key.key().with_schema(schema), key, action.empty() ? "" : "; ", action);
+ type, schema.ks_name(), schema.cf_name(), key.to_string(schema), key, action.empty() ? "" : "; ", action);
}
private:
diff --git a/dht/i_partitioner.cc b/dht/i_partitioner.cc
index 410be9227..dff1dd2e7 100644
--- a/dht/i_partitioner.cc
+++ b/dht/i_partitioner.cc
@@ -33,6 +33,8 @@
#include <boost/range/adaptor/transformed.hpp>
#include "sstables/key.hh"
#include <seastar/core/thread.hh>
+#include <seastar/core/sstring.hh>
+#include "utils/utf8.hh"
namespace dht {
@@ -147,6 +149,12 @@ decorated_key::less_comparator::operator()(const decorated_key& lhs, const ring_
return lhs.tri_compare(*s, rhs) < 0;
}
+sstring
+decorated_key::to_string(const schema& s) const {
+ sstring ret = format("{}", key().with_schema(s));
+ return utils::utf8::validate((const uint8_t*)ret.data(), ret.size()) ? ret : "<non-utf8-key>";
+}
+
std::ostream& operator<<(std::ostream& out, const ring_position_ext& pos) {
return out << (ring_position_view)pos;
}
diff --git a/dht/i_partitioner.hh b/dht/i_partitioner.hh
index e64bbe3e4..bc88dadea 100644
--- a/dht/i_partitioner.hh
+++ b/dht/i_partitioner.hh
@@ -129,6 +129,8 @@ class decorated_key {
size_t memory_usage() const {
return sizeof(decorated_key) + external_memory_usage();
}
+
+ sstring to_string(const schema& s) const;
};
--
2.31.1