Work on the fsck tool seems to have settled an old doubt about the order
of dentry keys with the same hash. The name length is not considered at
all, despite sharing the same field with the hash; the order is decided
by comparing the strings of the original unnormalized names.
Because of our incorrect assumptions, some files are listed but don't
open. This is relatively rare though, since it depends on collisions of
the 22-bit hash happening inside a single directory.
To fix it, split the APFS_QUERY_MULTIPLE flag into APFS_QUERY_ANY_NAME
and APFS_QUERY_ANY_NUMBER, so that queries can return all records with
a given hash and let apfs_inode_by_name() decide which one is correct.
Also mask away the filemane length.
All testing has been for ascii filenames in case-insensitive volumes, so
a small degree of uncertainty remains.
fs/apfs/btree.h | 10 ++++++----
fs/apfs/dir.c | 24 ++++++++++++++++--------
fs/apfs/key.c | 35 ++++++++++++++++-------------------
fs/apfs/key.h | 2 +-
fs/apfs/node.c | 9 +++++----
5 files changed, 44 insertions(+), 36 deletions(-)
diff --git a/fs/apfs/btree.h b/fs/apfs/btree.h
index 03ddae63e476..9e2b50bde830 100644
--- a/fs/apfs/btree.h
+++ b/fs/apfs/btree.h
@@ -16,10 +16,12 @@ struct super_block;
#define APFS_QUERY_TREE_MASK 0007 /* Which b-tree we query */
#define APFS_QUERY_OMAP 0001 /* This is a b-tree object map query */
#define APFS_QUERY_CAT 0002 /* This is a catalog tree query */
-#define APFS_QUERY_MULTIPLE 0010 /* Search for multiple matches */
-#define APFS_QUERY_NEXT 0020 /* Find next of multiple matches */
-#define APFS_QUERY_EXACT 0040 /* Search for an exact match */
-#define APFS_QUERY_DONE 0100 /* The search at this level is over */
+#define APFS_QUERY_NEXT 0010 /* Find next of multiple matches */
+#define APFS_QUERY_EXACT 0020 /* Search for an exact match */
+#define APFS_QUERY_DONE 0040 /* The search at this level is over */
+#define APFS_QUERY_ANY_NAME 0100 /* Multiple search for any name */
+#define APFS_QUERY_ANY_NUMBER 0200 /* Multiple search for any number */
+#define APFS_QUERY_MULTIPLE (APFS_QUERY_ANY_NAME | APFS_QUERY_ANY_NUMBER)
/*
* Structure used to retrieve data from an APFS B-Tree. For now only used
diff --git a/fs/apfs/dir.c b/fs/apfs/dir.c
index 64e478c0bc3d..631a7fc8a135 100644
--- a/fs/apfs/dir.c
+++ b/fs/apfs/dir.c
@@ -85,16 +85,24 @@ int apfs_inode_by_name(struct inode *dir, const struct qstr *child, u64 *ino)
if (!query)
return -ENOMEM;
query->key = &key;
- query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT;
- err = apfs_btree_query(sb, &query);
- if (err)
- goto out;
-
- err = apfs_drec_from_query(query, &drec);
- if (!err)
- *ino = drec.ino;
+ /*
+ * Distinct filenames in the same directory may (rarely) share the same
+ * hash. The query code cannot handle that because their order in the
+ * b-tree would depend on their unnormalized original names. Just get
+ * all the candidates and check them one by one.
+ */
+ query->flags |= APFS_QUERY_CAT | APFS_QUERY_ANY_NAME | APFS_QUERY_EXACT;
+ do {
+ err = apfs_btree_query(sb, &query);
+ if (err)
+ goto out;
+ err = apfs_drec_from_query(query, &drec);
+ if (err)
+ goto out;
+ } while (unlikely(apfs_filename_cmp(sb, child->name,
drec.name)));
+ *ino = drec.ino;
out:
apfs_free_query(sb, query);
return err;
diff --git a/fs/apfs/key.c b/fs/apfs/key.c
index 204ceae68c8a..17a821a66aa7 100644
--- a/fs/apfs/key.c
+++ b/fs/apfs/key.c
@@ -42,8 +42,8 @@ static inline u64 apfs_cat_cnid(struct apfs_key_header *key)
* @name1, @name2: names to compare
*
* returns 0 if @name1 and @name2 are equal
- * < 0 if @name1 comes before @name2 in the btree
- * > 0 if @name1 comes after @name2 in the btree
+ * < 0 if @name1 comes before @name2
+ * > 0 if @name1 comes after @name2
*/
int apfs_filename_cmp(struct super_block *sb,
const char *name1, const char *name2)
@@ -85,16 +85,11 @@ int apfs_keycmp(struct super_block *sb,
return k1->type < k2->type ? -1 : 1;
if (k1->number != k2->number)
return k1->number < k2->number ? -1 : 1;
- if (!k1->name) /* Keys of this type have no name */
+ if (!k1->name)
return 0;
- if (k1->type == APFS_TYPE_XATTR) {
- /* xattr names seem to be always case sensitive */
- return strcmp(k1->name, k2->name);
- }
-
- /* Only guessing, I've never seen two names with the same hash. TODO */
- return apfs_filename_cmp(sb, k1->name, k2->name);
+ /* Normalization seems to be ignored here, even for directory records */
+ return strcmp(k1->name, k2->name);
}
/**
@@ -119,8 +114,12 @@ int apfs_read_cat_key(void *raw, int size, struct apfs_key *key)
/* Filename must have NULL-termination */
return -EFSCORRUPTED;
}
+
+ /* Name length is not used in key comparisons, only the hash */
key->number = le32_to_cpu(
- ((struct apfs_drec_hashed_key *)raw)->name_len_and_hash);
+ ((struct apfs_drec_hashed_key *)raw)->name_len_and_hash) &
+ APFS_DREC_HASH_MASK;
+
key->name = ((struct apfs_drec_hashed_key *)raw)->name;
break;
case APFS_TYPE_XATTR:
@@ -182,13 +181,15 @@ void apfs_init_drec_hashed_key(struct super_block *sb, u64 ino,
struct apfs_unicursor cursor;
bool case_fold = apfs_is_case_insensitive(sb);
u32 hash = 0xFFFFFFFF;
- int namelen;
key->id = ino;
key->type = APFS_TYPE_DIR_REC;
+
+ /* To respect normalization, queries can only consider the hash */
+ key->name = NULL;
+
if (!name) {
key->number = 0;
- key->name = NULL;
return;
}
@@ -204,10 +205,6 @@ void apfs_init_drec_hashed_key(struct super_block *sb, u64 ino,
hash = crc32c(hash, &utf32, sizeof(utf32));
}
- /* APFS counts the NULL termination for the filename length */
- namelen = cursor.utf8curr - name;
-
- key->number = (hash << APFS_DREC_HASH_SHIFT) |
- (namelen & APFS_DREC_LEN_MASK);
- key->name = name;
+ /* The filename length doesn't matter, so it's left as zero */
+ key->number = hash << APFS_DREC_HASH_SHIFT;
}
diff --git a/fs/apfs/key.h b/fs/apfs/key.h
index 57b8930b38a3..4eb1756e00f8 100644
--- a/fs/apfs/key.h
+++ b/fs/apfs/key.h
@@ -171,7 +171,7 @@ static inline void apfs_init_xattr_key(u64 ino, const char *name,
{
key->id = ino;
key->type = APFS_TYPE_XATTR;
- key->number = 0; /* Maybe use the name length here, for speed? */
+ key->number = 0;
key->name = name;
}
diff --git a/fs/apfs/node.c b/fs/apfs/node.c
index 8504ed9f66a7..bcdbf05046ce 100644
--- a/fs/apfs/node.c
+++ b/fs/apfs/node.c
@@ -259,11 +259,12 @@ static int apfs_key_from_query(struct apfs_query *query, struct apfs_key *key)
query->node->object.block_nr);
}
- if (query->flags & APFS_QUERY_MULTIPLE) {
- /* A multiple query must ignore these fields */
- key->number = 0;
+ /* A multiple query must ignore some of these fields */
+ if (query->flags & APFS_QUERY_ANY_NAME)
key->name = NULL;
- }
+ if (query->flags & APFS_QUERY_ANY_NUMBER)
+ key->number = 0;
+
return err;
}
--
2.11.0