Although Cassandra generally does not allow empty strings as partition keys
(not they are allowed as clustering keys!), it *does* allow empty strings
in regular columns to be indexed by a secondary index, or to become an empty
partition-key column in a materialized view. As noted in issues #9375
and #9364 and verified in a few xfailing cql-pytest tests, Scylla didn't
allow these cases - and this patch fixes that.
The patch is easy, almost "too easy" - it just removes an elaborate function
is_partition_key_empty() which the code used to check whether the view's
row will end up with an empty partition key, which was supposedly
forbidden. But in fact, should have been allowed like they are allowed
in Cassandra and required for the secondary-index implementation, and
the entire function wasn't necessary. This patch also comments-out a
part of a unit test which enshrined the wrong behavior.
Note that the removed function is_partition_key_empty() was *NOT* required
for the "IS NOT NULL" feature of materialized views - this continues to work
as expected after this patch, and we add another test to confirm it.
Being null and being an empty string are two different things.
After this patch we are left with one interesting difference from
Cassandra: Though Cassandra allows a user to create a view row with an
empty-string partition key, and this row is fully visible in when
scanning the view, this row can *not* be queried individually because
"WHERE v=''" is forbidden when v is the partition key (of the view).
Scylla doesn't reproduce this anomaly - and such point query does work
in Scylla after this patch. We add a new test to check this case, and mark
it "cassandra_bug", i.e., it's a Cassandra behavior which we consider wrong
and don't want to emulate.
Fixes #9364
Fixes #9375
Signed-off-by: Nadav Har'El <
n...@scylladb.com>
---
test/cql-pytest/test_materialized_view.py | 52 ++++++++++++++++++++---
test/cql-pytest/test_secondary_index.py | 1 -
db/view/view.cc | 34 ++-------------
test/boost/view_schema_pkey_test.cc | 7 +++
4 files changed, 58 insertions(+), 36 deletions(-)
diff --git a/test/cql-pytest/test_materialized_view.py b/test/cql-pytest/test_materialized_view.py
index d69677e80..cfb4ff45c 100644
--- a/test/cql-pytest/test_materialized_view.py
+++ b/test/cql-pytest/test_materialized_view.py
@@ -81,7 +81,6 @@ def test_mv_select_stmt_bound_values(cql, test_keyspace):
# because the "IS NOT NULL" clause in the view's declaration does not
# eliminate this row (an empty string is not considered NULL).
# This reproduces issue #9375.
-...@pytest.mark.xfail(reason="issue #9375")
def test_mv_empty_string_partition_key(cql, test_keyspace):
schema = 'p int, v text, primary key (p)'
with new_test_table(cql, test_keyspace, schema) as table:
@@ -94,7 +93,50 @@ def test_mv_empty_string_partition_key(cql, test_keyspace):
# The view row with the empty partition key should exist.
# In #9375, this failed in Scylla:
assert list(cql.execute(f"SELECT * FROM {mv}")) == [('', 123)]
- # However, it is still impossible to select just this row,
- # because Cassandra forbids an empty partition key on select
- with pytest.raises(InvalidRequest, match='Key may not be empty'):
- cql.execute(f"SELECT * FROM {mv} WHERE v=''")
+
+# The previous test (test_mv_empty_string_partition_key) verifies that a
+# row with an empty-string partition key can appear in the view. This was
+# checked with a full-table scan. But curiously, Cassandra does NOT allow
+# to SELECT this specific row individually, because "WHERE v=''" is not
+# allowed when v is the partition key (of the view).
+# Scylla *does* allow such a query. This demonstrates a difference between
+# Scylla and Cassandra, but we believe that the Cassandra behavior is the
+# wrong one: It doesn't make sense to allow adding a row and making it
+# visible in a full-table scans, but not allowing to query it individually.
+# This is why we mark this test as a Cassandra bug.
+def test_mv_empty_string_partition_key_individual(cassandra_bug, cql, test_keyspace):
+ schema = 'p int, v text, primary key (p)'
+ with new_test_table(cql, test_keyspace, schema) as table:
+ with new_materialized_view(cql, table, '*', 'v, p', 'v is not null and p is not null') as mv:
+ cql.execute(f"INSERT INTO {table} (p,v) VALUES (123, '')")
+ # Note that because cql-pytest runs on a single node, view
+ # updates are synchronous, and we can read the view immediately
+ # without retrying. In a general setup, this test would require
+ # retries.
+ # The view row with the empty partition key should exist.
+ assert list(cql.execute(f"SELECT * FROM {mv} WHERE v=''")) == [('', 123)]
+
+# Test that the "IS NOT NULL" clause in the materialized view's SELECT
+# functions as expected - namely, rows which have their would-be view
+# key column unset (aka null) do not get copied into the view.
+def test_mv_is_not_null(cql, test_keyspace):
+ schema = 'p int, v text, primary key (p)'
+ with new_test_table(cql, test_keyspace, schema) as table:
+ with new_materialized_view(cql, table, '*', 'v, p', 'v is not null and p is not null') as mv:
+ cql.execute(f"INSERT INTO {table} (p,v) VALUES (123, 'dog')")
+ cql.execute(f"INSERT INTO {table} (p,v) VALUES (17, null)")
+ # Note that because cql-pytest runs on a single node, view
+ # updates are synchronous, and we can read the view immediately
+ # without retrying. In a general setup, this test would require
+ # retries.
+ # The row with 123 should appear in the view, but the row with
+ # 17 should not, because v *is* null.
+ assert list(cql.execute(f"SELECT * FROM {mv}")) == [('dog', 123)]
+ # The view row should disappear and reappear if its key is
+ # changed to null and back in the base table:
+ cql.execute(f"UPDATE {table} SET v=null WHERE p=123")
+ assert list(cql.execute(f"SELECT * FROM {mv}")) == []
+ cql.execute(f"UPDATE {table} SET v='cat' WHERE p=123")
+ assert list(cql.execute(f"SELECT * FROM {mv}")) == [('cat', 123)]
+ cql.execute(f"DELETE v FROM {table} WHERE p=123")
+ assert list(cql.execute(f"SELECT * FROM {mv}")) == []
diff --git a/test/cql-pytest/test_secondary_index.py b/test/cql-pytest/test_secondary_index.py
index 362a87c3d..672cecb41 100644
--- a/test/cql-pytest/test_secondary_index.py
+++ b/test/cql-pytest/test_secondary_index.py
@@ -289,7 +289,6 @@ def test_multi_column_with_regular_index(cql, test_keyspace):
# wrong or unusual about an empty string, and it should be supported just
# like any other string.
# Reproduces issue #9364
-...@pytest.mark.xfail(reason="issue #9364")
def test_index_empty_string(cql, test_keyspace):
schema = 'p int, v text, primary key (p)'
# Searching for v='' without an index (with ALLOW FILTERING), works
diff --git a/db/view/view.cc b/db/view/view.cc
index 28e1dbd75..274c80795 100644
--- a/db/view/view.cc
+++ b/db/view/view.cc
@@ -309,32 +309,6 @@ static bool update_requires_read_before_write(const schema& base,
return false;
}
-static bool is_partition_key_empty(
- const schema& base,
- const schema& view_schema,
- const partition_key& base_key,
- const clustering_row& update) {
- // Empty partition keys are not supported on normal tables - they cannot
- // be inserted or queried, so enforce those rules here.
- if (view_schema.partition_key_columns().size() > 1) {
- // Composite partition keys are different: all components
- // are then allowed to be empty.
- return false;
- }
- auto* base_col = base.get_column_definition(view_schema.partition_key_columns().front().name());
- switch (base_col->kind) {
- case column_kind::partition_key:
- return base_key.get_component(base, base_col->position()).empty();
- case column_kind::clustering_key:
- return update.key().get_component(base, base_col->position()).empty();
- default:
- // No multi-cell columns in the view's partition key
- auto& c = update.cells().cell_at(base_col->id);
- atomic_cell_view col_value = c.as_atomic_cell(*base_col);
- return !col_value.is_live() || col_value.value().empty();
- }
-}
-
// Checks if the result matches the provided view filter.
// It's currently assumed that the result consists of just a single row.
class view_filter_checking_visitor {
@@ -692,7 +666,7 @@ static void add_cells_to_view(const schema& base, const schema& view, row base_c
* This method checks that the base row does match the view filter before applying anything.
*/
void view_updates::create_entry(const partition_key& base_key, const clustering_row& update, gc_clock::time_point now) {
- if (is_partition_key_empty(*_base, *_view, base_key, update) || !matches_view_filter(*_base, _view_info, base_key, update, now)) {
+ if (!matches_view_filter(*_base, _view_info, base_key, update, now)) {
return;
}
deletable_row& r = get_view_row(base_key, update);
@@ -710,7 +684,7 @@ void view_updates::create_entry(const partition_key& base_key, const clustering_
void view_updates::delete_old_entry(const partition_key& base_key, const clustering_row& existing, const clustering_row& update, gc_clock::time_point now) {
// Before deleting an old entry, make sure it was matching the view filter
// (otherwise there is nothing to delete)
- if (!is_partition_key_empty(*_base, *_view, base_key, existing) && matches_view_filter(*_base, _view_info, base_key, existing, now)) {
+ if (matches_view_filter(*_base, _view_info, base_key, existing, now)) {
do_delete_old_entry(base_key, existing, update, now);
}
}
@@ -821,11 +795,11 @@ bool view_updates::can_skip_view_updates(const clustering_row& update, const clu
void view_updates::update_entry(const partition_key& base_key, const clustering_row& update, const clustering_row& existing, gc_clock::time_point now) {
// While we know update and existing correspond to the same view entry,
// they may not match the view filter.
- if (is_partition_key_empty(*_base, *_view, base_key, existing) || !matches_view_filter(*_base, _view_info, base_key, existing, now)) {
+ if (!matches_view_filter(*_base, _view_info, base_key, existing, now)) {
create_entry(base_key, update, now);
return;
}
- if (is_partition_key_empty(*_base, *_view, base_key, update) || !matches_view_filter(*_base, _view_info, base_key, update, now)) {
+ if (!matches_view_filter(*_base, _view_info, base_key, update, now)) {
do_delete_old_entry(base_key, existing, update, now);
return;
}
diff --git a/test/boost/view_schema_pkey_test.cc b/test/boost/view_schema_pkey_test.cc
index 54b5eea09..e4bc901ee 100644
--- a/test/boost/view_schema_pkey_test.cc
+++ b/test/boost/view_schema_pkey_test.cc
@@ -727,6 +727,12 @@ SEASTAR_TEST_CASE(test_base_non_pk_columns_in_view_partition_key_are_non_emtpy)
});
}
+ // The following if'ed-out tests verified behavior that we
+ // no longer believe to be correct, and also turns out not to be
+ // compatible with Cassandra (see issue #9375): these tests checked
+ // that if a view row should get an empty string as a partition key,
+ // this row should not be generated. But this is not the case.
+#if 0
auto views_not_matching = {
"create materialized view %s as select * from cf "
"where p1 is not null and p2 is not null and c is not null and v is not null "
@@ -776,5 +782,6 @@ SEASTAR_TEST_CASE(test_base_non_pk_columns_in_view_partition_key_are_non_emtpy)
auto msg = e.execute_cql(format("select p1, p2, c, v from {}", name)).get0();
assert_that(msg).is_rows().is_empty();
});
+#endif
});
}
--
2.31.1