keyword names

39 views
Skip to first unread message

Martin R

unread,
May 29, 2026, 3:07:29 AM (9 days ago) May 29
to sage-devel
Here is a script that I use to find all the names of keyword arguments in user visible methods and functions.  For example:

martin@convex63:~/sage$ python3 collect_optional.py src/sage --exclude src/sage/libs --find return_map
return_map
    src/sage/combinat/constellation.py
    src/sage/graphs/generic_graph.py
    src/sage/graphs/graph.py
return_maps
    src/sage/quivers/morphism.py
    src/sage/quivers/representation.py

Mostly LLM generated.

#!/usr/bin/env python3
import ast
import re
import argparse
from pathlib import Path
from collections import defaultdict

def keywords_from_python_file(path):
    keywords = set()

    try:
        source = Path(path).read_text(encoding="utf-8")
        tree = ast.parse(source)
    except Exception:
        return keywords

    for node in ast.walk(tree):
        if not isinstance(node, (ast.FunctionDef,
                                 ast.AsyncFunctionDef)):
            continue

        # Ignore private/internal functions
        if node.name.startswith("_"):
            continue

        args = node.args
        # positional-or-keyword arguments
        positional = args.posonlyargs + args.args
        defaults = args.defaults
        if defaults:
            optional = positional[-len(defaults):]
            for arg in optional:
                keywords.add(arg.arg)

        # keyword-only arguments
        for arg, default in zip(args.kwonlyargs,
                                args.kw_defaults):
            if default is not None:
                keywords.add(arg.arg)

    return keywords


DEF_RE = re.compile(
    r'^\s*(?:cpdef|def)\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(',
    re.MULTILINE
)

ARG_DEFAULT_RE = re.compile(
    r'''
    ^
    (?:
        [A-Za-z_][A-Za-z0-9_<>*\s\[\],]*   # optional type
        \s+
    )?
    (?P<name>[A-Za-z_][A-Za-z0-9_]*)
    \s*=
    ''',
    re.VERBOSE
)


def split_arguments(arg_string):
    result = []
    current = []
    level = 0

    for ch in arg_string:
        if ch in "([{":
            level += 1
        elif ch in ")]}":
            level -= 1
        elif ch == "," and level == 0:
            result.append("".join(current).strip())
            current = []
            continue
        current.append(ch)

    if current:
        result.append("".join(current).strip())

    return result


def extract_balanced_parentheses(text, start):
    level = 1
    i = start
    while i < len(text):
        ch = text[i]
        if ch == "(":
            level += 1
        elif ch == ")":
            level -= 1
            if level == 0:
                return text[start:i]
        i += 1
    return None


def keywords_from_pyx_file(path):
    keywords = set()
    try:
        text = Path(path).read_text(
            encoding="utf-8",
            errors="ignore"
        )
    except Exception:
        return keywords
    # Remove triple-quoted strings first.
    # to prevent doctests from matching.
    text = re.sub(
        r"'''(.*?)'''",
        "",
        text,
        flags=re.DOTALL
    )

    text = re.sub(
        r'"""(.*?)"""',
        "",
        text,
        flags=re.DOTALL
    )

    for match in DEF_RE.finditer(text):
        func_name = match.group(1)
        if func_name.startswith("_"):
            continue

        open_paren = match.end() - 1
        args = extract_balanced_parentheses(
            text,
            open_paren + 1
        )
        if args is None:
            continue
        for arg in split_arguments(args):
            arg = arg.strip()
            if not arg:
                continue
            if arg.startswith("*"):
                continue
            m = ARG_DEFAULT_RE.match(arg)
            if m:
                keywords.add(m.group("name"))

    return keywords

def iter_source_files(root,
                      include=None,
                      exclude=None,
                      python_only=False,
                      cython_only=False):
    root = Path(root)

    include = [str(Path(x)) for x in (include or [])]
    exclude = [str(Path(x)) for x in (exclude or [])]

    for path in root.rglob("*"):
        if not path.is_file():
            continue

        suffix = path.suffix
        if python_only:
            if suffix != ".py":
                continue
        elif cython_only:
            if suffix not in (".pyx", ".pxd"):
                continue
        else:
            if suffix not in (".py", ".pyx", ".pxd"):
                continue

        path_str = str(path)
        if include:
            if not any(x in path_str for x in include):
                continue
        if exclude:
            if any(x in path_str for x in exclude):
                continue
        yield path


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "root",
        help="source tree root"
    )
    parser.add_argument(
        "--include",
        action="append",
        default=[],
        help="include only paths containing this string"
    )
    parser.add_argument(
        "--exclude",
        action="append",
        default=[],
        help="exclude paths containing this string"
    )
    parser.add_argument(
        "--python-only",
        action="store_true",
        help="scan only .py files"
    )
    parser.add_argument(
        "--cython-only",
        action="store_true",
        help="scan only .pyx/.pxd files"
    )
    parser.add_argument(
        "--find",
        metavar="REGEX",
        help="print files containing a keyword matching REGEX"
    )
    args = parser.parse_args()
    find_re = None
    if args.find:
        find_re = re.compile(args.find)

    keywords = set()
    matches_by_keyword = defaultdict(list)

    for path in iter_source_files(
        args.root,
        include=args.include,
        exclude=args.exclude,
        python_only=args.python_only,
        cython_only=args.cython_only,
    ):

        suffix = path.suffix
        try:
            if suffix == ".py":
                kw = keywords_from_python_file(path)
            elif suffix in (".pyx", ".pxd"):
                kw = keywords_from_pyx_file(path)
            else:
                continue

            if find_re:
                for k in kw:
                    if find_re.search(k):
                        matches_by_keyword[k].append(str(path))
            else:
                keywords |= kw

        except Exception as e:
            print("FAILED:", path, e)

    if find_re:
        for keyword in sorted(matches_by_keyword):
            print(keyword)
            for path in sorted(matches_by_keyword[keyword]):
                print("   ", path)

    else:
        print(sorted(keywords))

if __name__ == "__main__":
    main()

Reply all
Reply to author
Forward
0 new messages