From 417f2adbe044a6bc5ad0dc1c9d3b0aaae7502741 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 21 Jan 2026 01:53:11 +0900 Subject: [PATCH 1/2] [update_lib] todo --- scripts/update_lib/__main__.py | 10 ++ scripts/update_lib/deps.py | 64 +++++--- scripts/update_lib/show_todo.py | 221 ++++++++++++++++++++++++++ scripts/update_lib/tests/test_deps.py | 10 +- 4 files changed, 275 insertions(+), 30 deletions(-) create mode 100644 scripts/update_lib/show_todo.py diff --git a/scripts/update_lib/__main__.py b/scripts/update_lib/__main__.py index 2a4c03919fb..9bbd849c534 100644 --- a/scripts/update_lib/__main__.py +++ b/scripts/update_lib/__main__.py @@ -54,6 +54,11 @@ def main(argv: list[str] | None = None) -> int: help="Show dependency information for a module", add_help=False, ) + subparsers.add_parser( + "todo", + help="Show prioritized list of modules to update", + add_help=False, + ) args, remaining = parser.parse_known_args(argv) @@ -87,6 +92,11 @@ def main(argv: list[str] | None = None) -> int: return show_deps_main(remaining) + if args.command == "todo": + from update_lib.show_todo import main as show_todo_main + + return show_todo_main(remaining) + return 0 diff --git a/scripts/update_lib/deps.py b/scripts/update_lib/deps.py index 1e7435e9e0c..566f5ae5f0c 100644 --- a/scripts/update_lib/deps.py +++ b/scripts/update_lib/deps.py @@ -7,6 +7,7 @@ - Test dependencies (auto-detected from 'from test import ...') """ +import functools import pathlib from update_lib.io_utils import read_python_files, safe_parse_ast, safe_read_text @@ -145,7 +146,10 @@ } -def get_lib_paths(name: str, cpython_prefix: str = "cpython") -> list[pathlib.Path]: +@functools.cache +def get_lib_paths( + name: str, cpython_prefix: str = "cpython" +) -> tuple[pathlib.Path, ...]: """Get all library paths for a module. Args: @@ -153,7 +157,7 @@ def get_lib_paths(name: str, cpython_prefix: str = "cpython") -> list[pathlib.Pa cpython_prefix: CPython directory prefix Returns: - List of paths to copy + Tuple of paths to copy """ dep_info = DEPENDENCIES.get(name, {}) @@ -168,10 +172,13 @@ def get_lib_paths(name: str, cpython_prefix: str = "cpython") -> list[pathlib.Pa for dep in dep_info.get("hard_deps", []): paths.append(construct_lib_path(cpython_prefix, dep)) - return paths + return tuple(paths) -def get_test_paths(name: str, cpython_prefix: str = "cpython") -> list[pathlib.Path]: +@functools.cache +def get_test_paths( + name: str, cpython_prefix: str = "cpython" +) -> tuple[pathlib.Path, ...]: """Get all test paths for a module. Args: @@ -179,18 +186,21 @@ def get_test_paths(name: str, cpython_prefix: str = "cpython") -> list[pathlib.P cpython_prefix: CPython directory prefix Returns: - List of test paths + Tuple of test paths """ if name in DEPENDENCIES and "test" in DEPENDENCIES[name]: - return [ + return tuple( construct_lib_path(cpython_prefix, p) for p in DEPENDENCIES[name]["test"] - ] + ) # Default: try directory first, then file - return [resolve_module_path(f"test/test_{name}", cpython_prefix, prefer="dir")] + return (resolve_module_path(f"test/test_{name}", cpython_prefix, prefer="dir"),) -def get_data_paths(name: str, cpython_prefix: str = "cpython") -> list[pathlib.Path]: +@functools.cache +def get_data_paths( + name: str, cpython_prefix: str = "cpython" +) -> tuple[pathlib.Path, ...]: """Get additional data paths for a module. Args: @@ -198,13 +208,13 @@ def get_data_paths(name: str, cpython_prefix: str = "cpython") -> list[pathlib.P cpython_prefix: CPython directory prefix Returns: - List of data paths (may be empty) + Tuple of data paths (may be empty) """ if name in DEPENDENCIES and "data" in DEPENDENCIES[name]: - return [ + return tuple( construct_lib_path(cpython_prefix, p) for p in DEPENDENCIES[name]["data"] - ] - return [] + ) + return () def parse_test_imports(content: str) -> set[str]: @@ -272,7 +282,8 @@ def parse_lib_imports(content: str) -> set[str]: return imports -def get_all_imports(name: str, cpython_prefix: str = "cpython") -> set[str]: +@functools.cache +def get_all_imports(name: str, cpython_prefix: str = "cpython") -> frozenset[str]: """Get all imports from a library file. Args: @@ -280,7 +291,7 @@ def get_all_imports(name: str, cpython_prefix: str = "cpython") -> set[str]: cpython_prefix: CPython directory prefix Returns: - Set of all imported module names + Frozenset of all imported module names """ all_imports = set() for lib_path in get_lib_paths(name, cpython_prefix): @@ -290,10 +301,11 @@ def get_all_imports(name: str, cpython_prefix: str = "cpython") -> set[str]: # Remove self all_imports.discard(name) - return all_imports + return frozenset(all_imports) -def get_soft_deps(name: str, cpython_prefix: str = "cpython") -> set[str]: +@functools.cache +def get_soft_deps(name: str, cpython_prefix: str = "cpython") -> frozenset[str]: """Get soft dependencies by parsing imports from library file. Args: @@ -301,7 +313,7 @@ def get_soft_deps(name: str, cpython_prefix: str = "cpython") -> set[str]: cpython_prefix: CPython directory prefix Returns: - Set of imported stdlib module names (those that exist in cpython/Lib/) + Frozenset of imported stdlib module names (those that exist in cpython/Lib/) """ all_imports = get_all_imports(name, cpython_prefix) @@ -312,10 +324,11 @@ def get_soft_deps(name: str, cpython_prefix: str = "cpython") -> set[str]: if module_path.exists(): stdlib_deps.add(imp) - return stdlib_deps + return frozenset(stdlib_deps) -def get_rust_deps(name: str, cpython_prefix: str = "cpython") -> set[str]: +@functools.cache +def get_rust_deps(name: str, cpython_prefix: str = "cpython") -> frozenset[str]: """Get Rust/C dependencies (imports that don't exist in cpython/Lib/). Args: @@ -323,11 +336,11 @@ def get_rust_deps(name: str, cpython_prefix: str = "cpython") -> set[str]: cpython_prefix: CPython directory prefix Returns: - Set of imported module names that are built-in or C extensions + Frozenset of imported module names that are built-in or C extensions """ all_imports = get_all_imports(name, cpython_prefix) soft_deps = get_soft_deps(name, cpython_prefix) - return all_imports - soft_deps + return frozenset(all_imports - soft_deps) def _dircmp_is_same(dcmp) -> bool: @@ -350,6 +363,7 @@ def _dircmp_is_same(dcmp) -> bool: return True +@functools.cache def is_up_to_date( name: str, cpython_prefix: str = "cpython", lib_prefix: str = "Lib" ) -> bool: @@ -472,9 +486,9 @@ def resolve_all_paths( Dict with "lib", "test", "data", "test_deps" keys """ result = { - "lib": get_lib_paths(name, cpython_prefix), - "test": get_test_paths(name, cpython_prefix), - "data": get_data_paths(name, cpython_prefix), + "lib": list(get_lib_paths(name, cpython_prefix)), + "test": list(get_test_paths(name, cpython_prefix)), + "data": list(get_data_paths(name, cpython_prefix)), "test_deps": [], } diff --git a/scripts/update_lib/show_todo.py b/scripts/update_lib/show_todo.py new file mode 100644 index 00000000000..5cf7f90f68d --- /dev/null +++ b/scripts/update_lib/show_todo.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python +""" +Show prioritized list of modules to update. + +Usage: + python scripts/update_lib todo + python scripts/update_lib todo --limit 20 +""" + +import argparse +import pathlib +import sys + +sys.path.insert(0, str(pathlib.Path(__file__).parent.parent)) + + +def compute_todo_list( + cpython_prefix: str = "cpython", + lib_prefix: str = "Lib", + include_done: bool = False, +) -> list[dict]: + """Compute prioritized list of modules to update. + + Scoring: + - Modules with no pylib dependencies: score = -1 + - Modules with pylib dependencies: score = count of NOT up-to-date deps + + Sorting (ascending by score): + 1. More reverse dependencies (modules depending on this) = higher priority + 2. Fewer native dependencies = higher priority + + Returns: + List of dicts with module info, sorted by priority + """ + from update_lib.deps import get_rust_deps, get_soft_deps, is_up_to_date + from update_lib.show_deps import get_all_modules + + all_modules = get_all_modules(cpython_prefix) + + # Build dependency data for all modules + module_data = {} + for name in all_modules: + soft_deps = get_soft_deps(name, cpython_prefix) + native_deps = get_rust_deps(name, cpython_prefix) + up_to_date = is_up_to_date(name, cpython_prefix, lib_prefix) + + module_data[name] = { + "name": name, + "soft_deps": soft_deps, + "native_deps": native_deps, + "up_to_date": up_to_date, + } + + # Build reverse dependency map: who depends on this module + reverse_deps: dict[str, set[str]] = {name: set() for name in all_modules} + for name, data in module_data.items(): + for dep in data["soft_deps"]: + if dep in reverse_deps: + reverse_deps[dep].add(name) + + # Compute scores and filter + result = [] + for name, data in module_data.items(): + # Skip already up-to-date modules (unless --done) + if data["up_to_date"] and not include_done: + continue + + soft_deps = data["soft_deps"] + if not soft_deps: + # No pylib dependencies + score = -1 + total_deps = 0 + else: + # Count NOT up-to-date dependencies + score = sum( + 1 + for dep in soft_deps + if dep in module_data and not module_data[dep]["up_to_date"] + ) + total_deps = len(soft_deps) + + result.append( + { + "name": name, + "score": score, + "total_deps": total_deps, + "reverse_deps": reverse_deps[name], + "reverse_deps_count": len(reverse_deps[name]), + "native_deps_count": len(data["native_deps"]), + "native_deps": data["native_deps"], + "soft_deps": soft_deps, + "up_to_date": data["up_to_date"], + } + ) + + # Sort by: + # 1. score (ascending) - fewer outstanding deps first + # 2. reverse_deps_count (descending) - more dependents first + # 3. native_deps_count (ascending) - fewer native deps first + result.sort( + key=lambda x: ( + x["score"], + -x["reverse_deps_count"], + x["native_deps_count"], + ) + ) + + return result + + +def format_todo_list( + todo_list: list[dict], + limit: int | None = None, + verbose: bool = False, +) -> list[str]: + """Format todo list for display. + + Args: + todo_list: List from compute_todo_list() + limit: Maximum number of items to show + verbose: Show detailed dependency information + + Returns: + List of formatted lines + """ + lines = [] + + if limit: + todo_list = todo_list[:limit] + + for item in todo_list: + name = item["name"] + score = item["score"] + total_deps = item["total_deps"] + rev_count = item["reverse_deps_count"] + + done_mark = "[x]" if item["up_to_date"] else "[ ]" + + if score == -1: + score_str = "no deps" + else: + score_str = f"{score}/{total_deps} deps" + + rev_str = f"{rev_count} dependents" if rev_count else "" + + parts = [done_mark, f"[{score_str}]", name] + if rev_str: + parts.append(f"({rev_str})") + + lines.append(" ".join(parts)) + + # Verbose mode: show detailed dependency info + if verbose: + if item["reverse_deps"]: + lines.append(f" dependents: {', '.join(sorted(item['reverse_deps']))}") + if item["soft_deps"]: + lines.append(f" python: {', '.join(sorted(item['soft_deps']))}") + if item["native_deps"]: + lines.append(f" native: {', '.join(sorted(item['native_deps']))}") + + return lines + + +def show_todo( + cpython_prefix: str = "cpython", + lib_prefix: str = "Lib", + limit: int | None = None, + include_done: bool = False, + verbose: bool = False, +) -> None: + """Show prioritized list of modules to update.""" + todo_list = compute_todo_list(cpython_prefix, lib_prefix, include_done) + for line in format_todo_list(todo_list, limit, verbose): + print(line) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--cpython", + default="cpython", + help="CPython directory prefix (default: cpython)", + ) + parser.add_argument( + "--lib", + default="Lib", + help="Local Lib directory prefix (default: Lib)", + ) + parser.add_argument( + "--limit", + type=int, + default=None, + help="Maximum number of items to show", + ) + parser.add_argument( + "--done", + action="store_true", + help="Include already up-to-date modules", + ) + parser.add_argument( + "--verbose", + "-v", + action="store_true", + help="Show detailed dependency information", + ) + + args = parser.parse_args(argv) + + try: + show_todo(args.cpython, args.lib, args.limit, args.done, args.verbose) + return 0 + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/update_lib/tests/test_deps.py b/scripts/update_lib/tests/test_deps.py index 41a51990ad0..bc70925348b 100644 --- a/scripts/update_lib/tests/test_deps.py +++ b/scripts/update_lib/tests/test_deps.py @@ -82,7 +82,7 @@ def test_default_file(self): (lib_dir / "foo.py").write_text("# foo") paths = get_lib_paths("foo", str(tmpdir)) - self.assertEqual(paths, [tmpdir / "Lib" / "foo.py"]) + self.assertEqual(paths, (tmpdir / "Lib" / "foo.py",)) def test_default_directory(self): """Test default to directory when file doesn't exist.""" @@ -93,7 +93,7 @@ def test_default_directory(self): (lib_dir / "foo").mkdir() paths = get_lib_paths("foo", str(tmpdir)) - self.assertEqual(paths, [tmpdir / "Lib" / "foo"]) + self.assertEqual(paths, (tmpdir / "Lib" / "foo",)) class TestGetTestPaths(unittest.TestCase): @@ -114,7 +114,7 @@ def test_default_directory(self): (test_dir / "test_foo").mkdir() paths = get_test_paths("foo", str(tmpdir)) - self.assertEqual(paths, [tmpdir / "Lib" / "test" / "test_foo"]) + self.assertEqual(paths, (tmpdir / "Lib" / "test" / "test_foo",)) def test_default_file(self): """Test fallback to test_name.py file.""" @@ -125,7 +125,7 @@ def test_default_file(self): (test_dir / "test_foo.py").write_text("# test") paths = get_test_paths("foo", str(tmpdir)) - self.assertEqual(paths, [tmpdir / "Lib" / "test" / "test_foo.py"]) + self.assertEqual(paths, (tmpdir / "Lib" / "test" / "test_foo.py",)) class TestGetDataPaths(unittest.TestCase): @@ -140,7 +140,7 @@ def test_known_data(self): def test_no_data(self): """Test module without data paths.""" paths = get_data_paths("datetime", "cpython") - self.assertEqual(paths, []) + self.assertEqual(paths, ()) class TestGetTestDependencies(unittest.TestCase): From cc933292cbc6122218d980b218ddc8982741a4bf Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 21 Jan 2026 03:04:48 +0900 Subject: [PATCH 2/2] better CI comment --- .github/workflows/lib-deps-check.yaml | 7 ++++++- scripts/update_lib/show_deps.py | 12 ++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/workflows/lib-deps-check.yaml b/.github/workflows/lib-deps-check.yaml index 1903672045d..27b3dec3620 100644 --- a/.github/workflows/lib-deps-check.yaml +++ b/.github/workflows/lib-deps-check.yaml @@ -28,6 +28,11 @@ jobs: run: | git fetch origin ${{ github.event.pull_request.head.sha }} + - name: Checkout PR Lib files + run: | + # Checkout only Lib/ directory from PR head for accurate comparison + git checkout ${{ github.event.pull_request.head.sha }} -- Lib/ + - name: Checkout CPython run: | git clone --depth 1 --branch v3.14.2 https://github.com/python/cpython.git cpython @@ -104,7 +109,7 @@ jobs: **Legend:** - - `[+]` path exists, `[-]` path missing + - `[+]` path exists in CPython - `[x]` up-to-date, `[ ]` outdated - `native:` Rust/C extension modules diff --git a/scripts/update_lib/show_deps.py b/scripts/update_lib/show_deps.py index 1dcd3404898..b6beacacaab 100644 --- a/scripts/update_lib/show_deps.py +++ b/scripts/update_lib/show_deps.py @@ -169,17 +169,17 @@ def format_deps( lines = [] - # lib paths + # lib paths (only show existing) lib_paths = get_lib_paths(name, cpython_prefix) for p in lib_paths: - exists = "+" if p.exists() else "-" - lines.append(f"[{exists}] lib: {p}") + if p.exists(): + lines.append(f"[+] lib: {p}") - # test paths + # test paths (only show existing) test_paths = get_test_paths(name, cpython_prefix) for p in test_paths: - exists = "+" if p.exists() else "-" - lines.append(f"[{exists}] test: {p}") + if p.exists(): + lines.append(f"[+] test: {p}") # hard_deps (from DEPENDENCIES table) dep_info = DEPENDENCIES.get(name, {})