Thomas Wetmore
unread,Oct 17, 2024, 11:25:10 AM10/17/24Sign in to reply to author
Sign in to forward
You do not have permission to delete messages in this group
Either email addresses are anonymous for this group or you need the view member email addresses permission to view the original message
to root...@googlegroups.com
Records in my genealogical software are trees where the nodes are Gedcom lines; data is pure Gedcom. To get values, software traverses the trees. If I want a person's birth date I traverse to the first DATE node of the first BIRT node. It is easy to do but gets tiresome.
To speed things up I added the GedPath structure to define paths in Gedcom records. Paths can be written as Strings. For example, "INDI->NAME" is the first NAME node in a person tree; "INDI->NAME*" is all the NAME nodes at the level 1 in atree; and "INDI->ANY*->DATE*" is all the DATE nodes at level 2 anywhere in the record.
There are two functions:
buildGedPath -- parses the String form of a GedPath into its internal linked list form.
traverseGedPath -- traverses a Gedcom tree using a GedPath, accumulating the matched Gedcom nodes in a list.
I put off writing this quite awhile because I thought it would be tricky. It wasn't. As expected the traverse function is recursive. For anyone interested the code is attached. Included are the typedefs for GNode, the Gedcom tree node, and GedPath, a node in a Gedcom path.
----------------------------------------------------------------------------
// GNode is the internal form of a Gedcom line; a node in a Gedcom tree.
typedef struct GNode GNode;
struct GNode {
String key; // Record key; only root nodes use this field.
String tag; // Line tag; all nodes use this field.
String value; // Line value; values are optional.
GNode *parent; // Parent node; all nodes except roots use this field.
GNode *child; // First child node of this node, if any.
GNode *sibling; // Next sibling node of this node, if any.
};
// GedPath is an element in a Gedcom path list.
typedef struct GedPath {
String tag; // Tag to match.
bool all; // Get all matches.
bool any; // Allow any tag.
struct GedPath *next;
} GedPath;
// buildPath converts a Gedcom path expression to a GedPath.
GedPath* buildGedPath(String string) {
GedPath* first = null;
GedPath* prev = null;
char *expression = strdup(string); // Allocate a writable copy
String token = strtok(expression, "->"); // Tokens are separated by "->"'s.
while (token) {
GedPath* path = createGedPath(); // Get first token.
bool all = false;
int len = (int) strlen(token);
if (token[len-1] == '*') {
token = strndup(token, len - 1);
all = true;
}
path->all = all;
if (eqstr("ANY", token)) {
path->tag = null; // Allow all tags/
path->any = true;
} else {
path->tag = strdup(token);
}
path->next = null;
if (!first) first = path;
else prev->next = path;
prev = path;
token = strtok(null, "->"); // Get next token.
}
free(expression);
return first;
}
// traverseGedPath traverses a Gedcom path collecting the GNodes at the endpoints.
void traverseGedPath(GNode *node, GedPath *path, GNodeList* results, int *count) {
if (!node || !path) return;
while (node) {
// Check for a tag match or tag wildcard.
if (path->any || (path->tag && eqstr(node->tag, path->tag))) {
if (!path->next) {
appendToList(results, node); // Found a match.
(*count)++;
} else {
// Otherwise, recurse deeper into the tree.
int countIn = *count;
traverseGedPath(node->child, path->next, results, count);
if (*count > countIn && !path->all) return;
}
}
node = node->sibling;
}
}