From: Chen Miao <chen...@openatom.club>
Add scripts/checkversion.py as an alternative implementation of
scripts/
checkversion.pl for `make versioncheck`.
The Python version keeps diagnostics compatible while reducing runtime
by:
- cheap byte-level prefilters to skip irrelevant files early;
- lazy regex passes for comment/string stripping only when needed;
- deferred line counting for diagnostics;
- configurable path excludes using regex patterns via command-line
options and scripts/checkversion.exclude.
Also fix string masking to avoid multi-line overmatching, which can hide
real version macro uses in files such as samples/bpf/spintest.bpf.c.
Wire `make versioncheck` to accept `CHECKVERSION=perl|python`, switch
the default to `python`, document how to select Perl in `make help`,
and allow passing VERSIONCHECK_EXCLUDE_REGEX_FILE to the Python backend.
On the same tree, this drops `make versioncheck` wall time from 13.08s
to 3.52s.
You can compare runtime output with:
diff -u <({ time -p make versioncheck CHECKVERSION=perl >/dev/null; } 2>&1) \
<({ time -p make versioncheck CHECKVERSION=python >/dev/null; } 2>&1)
Signed-off-by: Chen Miao <chen...@openatom.club>
---
Makefile | 13 ++-
scripts/checkversion.exclude | 6 +
scripts/checkversion.py | 217 +++++++++++++++++++++++++++++++++++
3 files changed, 235 insertions(+), 1 deletion(-)
create mode 100644 scripts/checkversion.exclude
create mode 100755 scripts/checkversion.py
diff --git a/Makefile b/Makefile
index bde507d5c03db..263c4032bc8a5 100644
--- a/Makefile
+++ b/Makefile
@@ -1721,6 +1721,8 @@ help:
@echo ' checkstack - Generate a list of stack hogs and consider all functions'
@echo ' with a stack size larger than MINSTACKSIZE (default: 100)'
@echo ' versioncheck - Sanity check on version.h usage'
+ @echo ' (set CHECKVERSION=perl to use scripts/
checkversion.pl)'
+ @echo ' (set VERSIONCHECK_EXCLUDE_REGEX_FILE=... for Python regex excludes)'
@echo ' includecheck - Check for duplicate included header files'
@echo ' headerdep - Detect inclusion cycles in headers'
@echo ' coccicheck - Check with Coccinelle'
@@ -2141,15 +2143,24 @@ endif
PHONY += includecheck versioncheck coccicheck
+VERSIONCHECK_PY_ARGS = $(foreach f,$(VERSIONCHECK_EXCLUDE_REGEX_FILE),\
+ --exclude-regex-file $(f))
+VERSIONCHECK_CMD_python = $(PYTHON3) $(srctree)/scripts/checkversion.py $(VERSIONCHECK_PY_ARGS)
+VERSIONCHECK_CMD_perl = $(PERL) -w $(srctree)/scripts/
checkversion.pl
+
includecheck:
find $(srctree)/* $(RCS_FIND_IGNORE) \
-name '*.[hcS]' -type f -print | sort \
| xargs $(PERL) -w $(srctree)/scripts/
checkincludes.pl
+versioncheck: CHECKVERSION ?= python
+versioncheck: VERSIONCHECK_CMD = $(if $(filter $(CHECKVERSION),perl python),\
+ $(VERSIONCHECK_CMD_$(CHECKVERSION)),\
+ $(error CHECKVERSION must be either 'perl' or 'python'))
versioncheck:
find $(srctree)/* $(RCS_FIND_IGNORE) \
-name '*.[hcS]' -type f -print | sort \
- | xargs $(PERL) -w $(srctree)/scripts/
checkversion.pl
+ | xargs $(VERSIONCHECK_CMD)
coccicheck:
$(Q)$(BASH) $(srctree)/scripts/$@
diff --git a/scripts/checkversion.exclude b/scripts/checkversion.exclude
new file mode 100644
index 0000000000000..24daa5f68627f
--- /dev/null
+++ b/scripts/checkversion.exclude
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# checkversion.py path excludes (Python regex, one pattern per line).
+# Paths are normalized to use "/" before matching.
+
+(^|/)bpf_helpers\.h$
diff --git a/scripts/checkversion.py b/scripts/checkversion.py
new file mode 100755
index 0000000000000..b7369027f3093
--- /dev/null
+++ b/scripts/checkversion.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# checkversion finds uses of all macros in <linux/version.h>
+# where the source files do not #include <linux/version.h>; or cases
+# of including <linux/version.h> where it is not needed.
+# Usage:
+# scripts/checkversion.py [--debug] <file> ...
+# find . -name '*.[hcS]' -type f -print | xargs scripts/checkversion.py
+# make versioncheck CHECKVERSION=python
+# make versioncheck VERSIONCHECK_EXCLUDE_REGEX_FILE=path/to/excludes
+#
+# Performance notes:
+# Most files do not reference version symbols. This script uses multiple
+# fast-path filters on raw bytes and only runs the expensive regex passes
+# when a file is likely relevant.
+#
+# Copyright (C) 2003, Randy Dunlap <
rdu...@infradead.org>
+# Copyright (C) 2026, Chen Miao <chen...@openatom.club>
+
+import argparse
+from pathlib import Path
+import re
+import sys
+
+
+COMMENT_RE = re.compile(br"/\*.*?(?:\*/|$)", re.S)
+STRING_RE = re.compile(br'".*?(?:"|$)')
+INCLUDE_RE = re.compile(
+ br'^\s*#\s*include\s*(?:"linux/version\.h"|<linux/version\.h>)',
+ re.M,
+)
+VERSION_USE_RE = re.compile(
+ br"LINUX_VERSION_CODE|\WKERNEL_VERSION|LINUX_VERSION_MAJOR|"
+ br"LINUX_VERSION_PATCHLEVEL|LINUX_VERSION_SUBLEVEL"
+)
+RAW_VERSION_MACRO_RE = re.compile(
+ br"LINUX_VERSION_CODE|KERNEL_VERSION|LINUX_VERSION_MAJOR|"
+ br"LINUX_VERSION_PATCHLEVEL|LINUX_VERSION_SUBLEVEL"
+)
+
+DEFAULT_EXCLUDE_REGEX_FILE = Path(__file__).with_name("checkversion.exclude")
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description="Check linux/version.h usage in source files.",
+ )
+ parser.add_argument("--debug", action="store_true",
+ help="print debug information")
+ parser.add_argument("--exclude-regex", action="append", default=[],
+ metavar="REGEX",
+ help="exclude files whose normalized path matches REGEX")
+ parser.add_argument("--exclude-regex-file", action="append", default=[],
+ metavar="FILE",
+ help="load path exclude REGEX entries from FILE")
+ parser.add_argument("--no-default-exclude-regex-file", action="store_true",
+ help="do not load scripts/checkversion.exclude")
+ parser.add_argument("files", nargs="*", help="files to check")
+ return parser.parse_args()
+
+
+def mask_token_preserving_newlines(match):
+ # Keep newline count stable so byte offsets still map to line numbers.
+ token = match.group(0)
+ newline_count = token.count(b"\n")
+ if newline_count:
+ return b" " + (b"\n" * newline_count) + b" "
+ return b" "
+
+
+def compile_exclude_regex(pattern, location):
+ try:
+ return re.compile(pattern)
+ except re.error as err:
+ sys.stderr.write(
+ f"{location}: invalid exclude regex '{pattern}': {err}\n"
+ )
+ raise SystemExit(1) from err
+
+
+def load_exclude_regex_file(path):
+ try:
+ with open(path, "r", encoding="utf-8") as src:
+ lines = src.readlines()
+ except OSError as err:
+ sys.stderr.write(f"Can't open {path}: {err.strerror}\n")
+ raise SystemExit(1) from err
+
+ regexes = []
+ for lineno, line in enumerate(lines, 1):
+ pattern = line.strip()
+ if not pattern or pattern.startswith("#"):
+ continue
+ regexes.append(compile_exclude_regex(pattern, f"{path}:{lineno}"))
+ return regexes
+
+
+def build_exclude_regexes(args):
+ regexes = []
+
+ if not args.no_default_exclude_regex_file and DEFAULT_EXCLUDE_REGEX_FILE.exists():
+ regexes.extend(load_exclude_regex_file(str(DEFAULT_EXCLUDE_REGEX_FILE)))
+
+ for path in args.exclude_regex_file:
+ regexes.extend(load_exclude_regex_file(path))
+
+ for pattern in args.exclude_regex:
+ regexes.append(compile_exclude_regex(pattern, "--exclude-regex"))
+
+ return tuple(regexes)
+
+
+def file_is_excluded(path, exclude_regexes):
+ normalized = path.replace("\\", "/")
+ if (normalized.endswith("include/generated/uapi/linux/version.h") or
+ normalized.endswith("usr/include/linux/version.h")):
+ return True
+
+ return any(regex.search(normalized) for regex in exclude_regexes)
+
+
+def line_count(blob):
+ if not blob:
+ return 0
+
+ count = blob.count(b"\n")
+ if blob[-1] != ord("\n"):
+ count += 1
+ return count
+
+
+def line_number_at(blob, offset):
+ return blob.count(b"\n", 0, offset) + 1
+
+
+def check_file(path, debug, exclude_regexes):
+ if file_is_excluded(path, exclude_regexes):
+ if debug:
+ print(f"{path}: skip version check (excluded)")
+ return
+
+ try:
+ with open(path, "rb") as src:
+ data = src.read()
+ except OSError as err:
+ sys.stderr.write(f"Can't open {path}: {err.strerror}\n")
+ raise SystemExit(1) from err
+
+ # Fast reject for obviously irrelevant files.
+ has_version_h_hint = b"version.h" in data
+ has_version_macro_hint = b"VERSION" in data
+
+ if not has_version_h_hint and not has_version_macro_hint:
+ if debug:
+ print(f"{path}: version use is OK (none)")
+ return
+
+ # Exact raw-byte probes. These are still much cheaper than regex masking.
+ has_raw_include = has_version_h_hint and b"linux/version.h" in data
+ has_raw_macro = (has_version_macro_hint and
+ RAW_VERSION_MACRO_RE.search(data) is not None)
+
+ if not has_raw_include and not has_raw_macro:
+ if debug:
+ print(f"{path}: version use is OK (none)")
+ return
+
+ # Most files do not have block comments; skip comment stripping then.
+ if b"/*" in data:
+ no_comments = COMMENT_RE.sub(mask_token_preserving_newlines, data)
+ else:
+ no_comments = data
+
+ linux_version_line = 0
+ if has_raw_include:
+ include_match = INCLUDE_RE.search(no_comments)
+ if include_match:
+ linux_version_line = line_number_at(no_comments, include_match.start())
+
+ use_version = False
+ if has_raw_macro:
+ # Only strip strings when needed. This keeps the common path cheap.
+ if b'"' in no_comments:
+ no_comments_or_strings = STRING_RE.sub(mask_token_preserving_newlines,
+ no_comments)
+ else:
+ no_comments_or_strings = no_comments
+ use_version = VERSION_USE_RE.search(no_comments_or_strings) is not None
+
+ if use_version and not linux_version_line:
+ # Compute total line count lazily; only needed for this diagnostic.
+ current_line = line_count(data)
+ print(f"{path}: {current_line}: need linux/version.h")
+
+ if linux_version_line and not use_version:
+ print(f"{path}: {linux_version_line} linux/version.h not needed.")
+
+ if debug:
+ if linux_version_line and use_version:
+ print(f"{path}: version use is OK ({linux_version_line})")
+ if not linux_version_line and not use_version:
+ print(f"{path}: version use is OK (none)")
+
+
+def main():
+ args = parse_args()
+ exclude_regexes = build_exclude_regexes(args)
+
+ for path in args.files:
+ check_file(path, args.debug, exclude_regexes)
+
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
--
2.43.0