xworkmate-app/scripts/docs/extract_public_api_inventory.py
2026-04-16 13:54:33 +08:00

424 lines
13 KiB
Python

#!/usr/bin/env python3
"""Extract public API inventory for XWorkmate engineering docs.
This script intentionally stays lightweight:
- It only scans source files that define the public engineering surface.
- It extracts top-level public symbols and compact signatures.
- It does not attempt semantic explanation or method-level expansion.
"""
from __future__ import annotations
import json
import re
from collections import defaultdict
from dataclasses import dataclass, asdict
from datetime import datetime, timezone
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[2]
OUTPUT_DIR = REPO_ROOT / "docs" / "architecture" / "public-api" / "_generated"
TARGETS = (
"lib/app",
"lib/runtime",
"lib/models",
"lib/features",
"lib/theme",
"rust/src",
)
COVERAGE_PREFIXES = (
"lib/app",
"lib/runtime",
"lib/models",
"lib/features/assistant",
"lib/features/settings",
"lib/features/mobile",
"lib/theme",
"rust/src",
)
DART_GLOB = "*.dart"
RUST_GLOB = "*.rs"
@dataclass(frozen=True)
class SymbolRecord:
language: str
path: str
line: int
kind: str
name: str
signature: str
def repo_relative(path: Path) -> str:
return path.relative_to(REPO_ROOT).as_posix()
def normalize_signature(value: str) -> str:
return re.sub(r"\s+", " ", value.strip())
def iter_target_files() -> list[Path]:
files: list[Path] = []
for target in TARGETS:
root = REPO_ROOT / target
if not root.exists():
continue
pattern = DART_GLOB if root.parts[-1] != "src" else RUST_GLOB
files.extend(sorted(root.rglob(pattern)))
return files
def extract_symbols(path: Path) -> list[SymbolRecord]:
if path.suffix == ".dart":
return extract_dart_symbols(path)
if path.suffix == ".rs":
return extract_rust_symbols(path)
return []
def extract_dart_symbols(path: Path) -> list[SymbolRecord]:
lines = path.read_text(encoding="utf-8").splitlines()
symbols: list[SymbolRecord] = []
index = 0
type_patterns = (
("abstract interface", re.compile(r"^abstract\s+class\s+([A-Za-z]\w*)\b")),
("class", re.compile(r"^class\s+([A-Za-z]\w*)\b")),
("mixin", re.compile(r"^mixin\s+([A-Za-z]\w*)\b")),
("enum", re.compile(r"^enum\s+([A-Za-z]\w*)\b")),
("typedef", re.compile(r"^typedef\s+([A-Za-z]\w*)\b")),
("extension", re.compile(r"^extension\s+([A-Za-z]\w*)\b")),
)
function_pattern = re.compile(
r"^(?:[A-Za-z_<>{}\[\]\?.,\s]+)\s+([A-Za-z]\w*)\s*\("
)
while index < len(lines):
raw_line = lines[index]
stripped = raw_line.strip()
if not stripped or raw_line.startswith((" ", "\t")):
index += 1
continue
if stripped.startswith(("//", "/*", "*", "@", "import ", "export ", "part ")):
index += 1
continue
if stripped.startswith(("const ", "final ", "var ")):
index += 1
continue
matched = False
for kind, pattern in type_patterns:
result = pattern.match(stripped)
if not result:
continue
name = result.group(1)
if name.startswith("_"):
matched = True
break
signature, consumed = collect_signature(lines, index)
symbols.append(
SymbolRecord(
language="dart",
path=repo_relative(path),
line=index + 1,
kind=kind,
name=name,
signature=signature,
)
)
index += consumed
matched = True
break
if matched:
continue
if stripped.startswith(
(
"if ",
"for ",
"while ",
"switch ",
"return ",
"assert ",
"throw ",
"try ",
)
):
index += 1
continue
function_match = function_pattern.match(stripped)
if function_match:
name = function_match.group(1)
if not name.startswith("_"):
signature, consumed = collect_signature(lines, index)
symbols.append(
SymbolRecord(
language="dart",
path=repo_relative(path),
line=index + 1,
kind="top-level function",
name=name,
signature=signature,
)
)
index += consumed
continue
index += 1
return symbols
def collect_signature(lines: list[str], start: int) -> tuple[str, int]:
chunk: list[str] = []
paren_balance = 0
consumed = 0
while start + consumed < len(lines):
line = lines[start + consumed].strip()
chunk.append(line)
paren_balance += line.count("(") - line.count(")")
consumed += 1
if paren_balance <= 0 and (
"{" in line or line.endswith(";") or "=>" in line
):
break
return normalize_signature(" ".join(chunk)), consumed
def extract_rust_symbols(path: Path) -> list[SymbolRecord]:
lines = path.read_text(encoding="utf-8").splitlines()
symbols: list[SymbolRecord] = []
type_patterns = (
("struct", re.compile(r"^pub\s+struct\s+([A-Za-z]\w*)\b")),
("enum", re.compile(r"^pub\s+enum\s+([A-Za-z]\w*)\b")),
)
ffi_pattern = re.compile(r'^pub\s+unsafe\s+extern\s+"C"\s+fn\s+([A-Za-z]\w*)\s*\(')
index = 0
while index < len(lines):
stripped = lines[index].strip()
if not stripped or lines[index].startswith((" ", "\t")):
index += 1
continue
matched = False
for kind, pattern in type_patterns:
result = pattern.match(stripped)
if result:
signature, consumed = collect_signature(lines, index)
symbols.append(
SymbolRecord(
language="rust",
path=repo_relative(path),
line=index + 1,
kind=kind,
name=result.group(1),
signature=signature,
)
)
index += consumed
matched = True
break
if matched:
continue
ffi_match = ffi_pattern.match(stripped)
if ffi_match:
signature, consumed = collect_signature(lines, index)
symbols.append(
SymbolRecord(
language="rust",
path=repo_relative(path),
line=index + 1,
kind="FFI function",
name=ffi_match.group(1),
signature=signature,
)
)
index += consumed
continue
index += 1
return symbols
def build_inventory(symbols: list[SymbolRecord]) -> dict:
files = iter_target_files()
files_by_group: dict[str, list[dict]] = defaultdict(list)
symbol_counts_by_group: dict[str, int] = defaultdict(int)
file_counts_by_group: dict[str, int] = defaultdict(int)
symbols_by_path: dict[str, list[SymbolRecord]] = defaultdict(list)
for symbol in symbols:
symbols_by_path[symbol.path].append(symbol)
for file_path in files:
relative = repo_relative(file_path)
parts = relative.split("/")
group = "/".join(parts[:2]) if len(parts) >= 2 else relative
file_counts_by_group[group] += 1
file_symbols = sorted(symbols_by_path[relative], key=lambda item: item.line)
symbol_counts_by_group[group] += len(file_symbols)
files_by_group[group].append(
{
"path": relative,
"language": "dart" if file_path.suffix == ".dart" else "rust",
"symbolCount": len(file_symbols),
"symbols": [asdict(item) for item in file_symbols],
}
)
groups = []
for group in sorted(files_by_group):
groups.append(
{
"group": group,
"fileCount": file_counts_by_group[group],
"symbolCount": symbol_counts_by_group[group],
"files": files_by_group[group],
}
)
scope_summaries = []
all_paths = [repo_relative(path) for path in files]
for prefix in COVERAGE_PREFIXES:
scope_files = [
item
for item in all_paths
if item == prefix or item.startswith(f"{prefix}/")
]
scope_symbol_count = sum(
1
for symbol in symbols
if symbol.path == prefix or symbol.path.startswith(f"{prefix}/")
)
scope_summaries.append(
{
"scope": prefix,
"fileCount": len(scope_files),
"symbolCount": scope_symbol_count,
}
)
return {
"generatedAt": datetime.now(timezone.utc).isoformat(),
"targets": list(TARGETS),
"coverageScopes": scope_summaries,
"totals": {
"fileCount": len(files),
"symbolCount": len(symbols),
},
"groups": groups,
}
def render_markdown(inventory: dict) -> str:
lines: list[str] = [
"# Public Symbol Inventory",
"",
"> Auto-generated by `scripts/docs/extract_public_api_inventory.py`.",
">",
"> Scope: `lib/app`, `lib/runtime`, `lib/models`, `lib/features/**`, `lib/theme`, `rust/src`.",
"> Excludes private `_` symbols and non-top-level Dart members.",
"",
f"- Generated at: `{inventory['generatedAt']}`",
f"- Files scanned: `{inventory['totals']['fileCount']}`",
f"- Public symbols extracted: `{inventory['totals']['symbolCount']}`",
"",
"## Group Summary",
"",
"| Group | Files | Public Symbols |",
"| --- | ---: | ---: |",
]
for group in inventory["groups"]:
lines.append(
f"| `{group['group']}` | {group['fileCount']} | {group['symbolCount']} |"
)
lines.extend(
[
"",
"## Coverage Scope Summary",
"",
"| Scope | Files | Public Symbols |",
"| --- | ---: | ---: |",
]
)
for scope in inventory["coverageScopes"]:
lines.append(
f"| `{scope['scope']}` | {scope['fileCount']} | {scope['symbolCount']} |"
)
for group in inventory["groups"]:
lines.extend(
[
"",
f"## {group['group']}",
"",
f"- Files: `{group['fileCount']}`",
f"- Public symbols: `{group['symbolCount']}`",
]
)
for file_entry in group["files"]:
lines.extend(
[
"",
f"### `{file_entry['path']}`",
"",
f"- Language: `{file_entry['language']}`",
f"- Public symbols: `{file_entry['symbolCount']}`",
]
)
if not file_entry["symbols"]:
lines.extend(["", "_No extracted public top-level symbols._"])
continue
lines.extend(
[
"",
"| Line | Kind | Name | Signature |",
"| ---: | --- | --- | --- |",
]
)
for symbol in file_entry["symbols"]:
signature = symbol["signature"].replace("|", "\\|")
lines.append(
f"| {symbol['line']} | `{symbol['kind']}` | `{symbol['name']}` | `{signature}` |"
)
lines.append("")
return "\n".join(lines)
def main() -> None:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
files = iter_target_files()
symbols: list[SymbolRecord] = []
for file_path in files:
symbols.extend(extract_symbols(file_path))
inventory = build_inventory(symbols)
markdown = render_markdown(inventory)
json_path = OUTPUT_DIR / "public-symbol-inventory.json"
md_path = OUTPUT_DIR / "public-symbol-inventory.md"
json_path.write_text(
json.dumps(inventory, ensure_ascii=False, indent=2) + "\n",
encoding="utf-8",
)
md_path.write_text(markdown, encoding="utf-8")
print(f"Wrote {repo_relative(json_path)}")
print(f"Wrote {repo_relative(md_path)}")
print(
f"Scanned {inventory['totals']['fileCount']} files, extracted {inventory['totals']['symbolCount']} public symbols."
)
if __name__ == "__main__":
main()