Here is a script that I use to find all the names of keyword arguments in user visible methods and functions. For example:
martin@convex63:~/sage$ python3 collect_optional.py src/sage --exclude src/sage/libs --find return_map
return_map
src/sage/combinat/constellation.py
src/sage/graphs/generic_graph.py
src/sage/graphs/graph.py
return_maps
src/sage/quivers/morphism.py
src/sage/quivers/representation.py
Mostly LLM generated.
#!/usr/bin/env python3
import ast
import re
import argparse
from pathlib import Path
from collections import defaultdict
def keywords_from_python_file(path):
keywords = set()
try:
source = Path(path).read_text(encoding="utf-8")
tree = ast.parse(source)
except Exception:
return keywords
for node in ast.walk(tree):
if not isinstance(node, (ast.FunctionDef,
ast.AsyncFunctionDef)):
continue
# Ignore private/internal functions
if node.name.startswith("_"):
continue
args = node.args
# positional-or-keyword arguments
positional = args.posonlyargs + args.args
defaults = args.defaults
if defaults:
optional = positional[-len(defaults):]
for arg in optional:
keywords.add(arg.arg)
# keyword-only arguments
for arg, default in zip(args.kwonlyargs,
args.kw_defaults):
if default is not None:
keywords.add(arg.arg)
return keywords
DEF_RE = re.compile(
r'^\s*(?:cpdef|def)\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(',
re.MULTILINE
)
ARG_DEFAULT_RE = re.compile(
r'''
^
(?:
[A-Za-z_][A-Za-z0-9_<>*\s\[\],]* # optional type
\s+
)?
(?P<name>[A-Za-z_][A-Za-z0-9_]*)
\s*=
''',
re.VERBOSE
)
def split_arguments(arg_string):
result = []
current = []
level = 0
for ch in arg_string:
if ch in "([{":
level += 1
elif ch in ")]}":
level -= 1
elif ch == "," and level == 0:
result.append("".join(current).strip())
current = []
continue
current.append(ch)
if current:
result.append("".join(current).strip())
return result
def extract_balanced_parentheses(text, start):
level = 1
i = start
while i < len(text):
ch = text[i]
if ch == "(":
level += 1
elif ch == ")":
level -= 1
if level == 0:
return text[start:i]
i += 1
return None
def keywords_from_pyx_file(path):
keywords = set()
try:
text = Path(path).read_text(
encoding="utf-8",
errors="ignore"
)
except Exception:
return keywords
# Remove triple-quoted strings first.
# to prevent doctests from matching.
text = re.sub(
r"'''(.*?)'''",
"",
text,
flags=re.DOTALL
)
text = re.sub(
r'"""(.*?)"""',
"",
text,
flags=re.DOTALL
)
for match in DEF_RE.finditer(text):
func_name = match.group(1)
if func_name.startswith("_"):
continue
open_paren = match.end() - 1
args = extract_balanced_parentheses(
text,
open_paren + 1
)
if args is None:
continue
for arg in split_arguments(args):
arg = arg.strip()
if not arg:
continue
if arg.startswith("*"):
continue
m = ARG_DEFAULT_RE.match(arg)
if m:
keywords.add(m.group("name"))
return keywords
def iter_source_files(root,
include=None,
exclude=None,
python_only=False,
cython_only=False):
root = Path(root)
include = [str(Path(x)) for x in (include or [])]
exclude = [str(Path(x)) for x in (exclude or [])]
for path in root.rglob("*"):
if not path.is_file():
continue
suffix = path.suffix
if python_only:
if suffix != ".py":
continue
elif cython_only:
if suffix not in (".pyx", ".pxd"):
continue
else:
if suffix not in (".py", ".pyx", ".pxd"):
continue
path_str = str(path)
if include:
if not any(x in path_str for x in include):
continue
if exclude:
if any(x in path_str for x in exclude):
continue
yield path
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"root",
help="source tree root"
)
parser.add_argument(
"--include",
action="append",
default=[],
help="include only paths containing this string"
)
parser.add_argument(
"--exclude",
action="append",
default=[],
help="exclude paths containing this string"
)
parser.add_argument(
"--python-only",
action="store_true",
help="scan only .py files"
)
parser.add_argument(
"--cython-only",
action="store_true",
help="scan only .pyx/.pxd files"
)
parser.add_argument(
"--find",
metavar="REGEX",
help="print files containing a keyword matching REGEX"
)
args = parser.parse_args()
find_re = None
if args.find:
find_re = re.compile(args.find)
keywords = set()
matches_by_keyword = defaultdict(list)
for path in iter_source_files(
args.root,
include=args.include,
exclude=args.exclude,
python_only=args.python_only,
cython_only=args.cython_only,
):
suffix = path.suffix
try:
if suffix == ".py":
kw = keywords_from_python_file(path)
elif suffix in (".pyx", ".pxd"):
kw = keywords_from_pyx_file(path)
else:
continue
if find_re:
for k in kw:
if find_re.search(k):
matches_by_keyword[k].append(str(path))
else:
keywords |= kw
except Exception as e:
print("FAILED:", path, e)
if find_re:
for keyword in sorted(matches_by_keyword):
print(keyword)
for path in sorted(matches_by_keyword[keyword]):
print(" ", path)
else:
print(sorted(keywords))
if __name__ == "__main__":
main()