316 lines
10 KiB
Python
Executable File
316 lines
10 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""Generate a documentation manifest for dl.svc.plus/docs.
|
|
|
|
The script scans the documentation tree (typically mounted at
|
|
``/data/update-server/docs``) and emits a single ``all.json`` file containing
|
|
metadata for every HTML/PDF pair that can be presented in the dashboard docs
|
|
portal.
|
|
|
|
It infers titles, categories, versions and presentation tags from the directory
|
|
structure and provides canonical URLs to the rendered assets. The resulting
|
|
JSON structure is a list of ``DocResource`` dictionaries that match the shape
|
|
consumed by the Next.js UI under ``dashboard/app/docs``.
|
|
|
|
Usage example::
|
|
|
|
python3 scripts/gen_docs_manifest.py \
|
|
--root /data/update-server/docs \
|
|
--base-url-prefix https://dl.svc.plus/docs
|
|
|
|
The command is idempotent and safe to rerun. Hidden files/directories (prefixed
|
|
with ``.``) are ignored. Only ``.pdf`` and ``.html`` assets are considered for
|
|
listing.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
|
|
|
SUPPORTED_EXTENSIONS = {".pdf", ".html"}
|
|
|
|
|
|
@dataclass
|
|
class DocEntry:
|
|
slug: str
|
|
title: str
|
|
category: Optional[str]
|
|
version: Optional[str]
|
|
version_dir: Optional[str]
|
|
version_slug: Optional[str]
|
|
collection_dir: Optional[str]
|
|
collection_slug: Optional[str]
|
|
collection_label: Optional[str]
|
|
language: Optional[str]
|
|
description: str
|
|
pdf_url: Optional[str]
|
|
html_url: Optional[str]
|
|
tags: Set[str]
|
|
updated_ts: float
|
|
path_segments: Sequence[str]
|
|
|
|
def to_payload(self) -> Dict[str, object]:
|
|
updated_at = iso8601(self.updated_ts) if self.updated_ts else None
|
|
tags = sorted({t for t in self.tags if t})
|
|
description = self.description
|
|
if not description:
|
|
description = build_default_description(
|
|
self.title,
|
|
self.category,
|
|
self.version,
|
|
self.version_dir,
|
|
[fmt for fmt in ["PDF" if self.pdf_url else None, "HTML" if self.html_url else None] if fmt],
|
|
)
|
|
payload: Dict[str, object] = {
|
|
"slug": self.slug,
|
|
"title": self.title,
|
|
"description": description,
|
|
}
|
|
if self.category:
|
|
payload["category"] = self.category
|
|
if self.version:
|
|
payload["version"] = self.version
|
|
if updated_at:
|
|
payload["updatedAt"] = updated_at
|
|
if self.collection_dir:
|
|
payload["collection"] = self.collection_dir
|
|
if self.collection_slug:
|
|
payload["collectionSlug"] = self.collection_slug
|
|
if self.collection_label:
|
|
payload["collectionLabel"] = self.collection_label
|
|
if self.pdf_url:
|
|
payload["pdfUrl"] = self.pdf_url
|
|
if self.html_url:
|
|
payload["htmlUrl"] = self.html_url
|
|
if self.language:
|
|
payload["language"] = self.language
|
|
if self.version_dir and (not self.version or self.version_dir != self.version.replace(" ", "-")):
|
|
payload["variant"] = self.version_dir
|
|
if self.version_slug:
|
|
payload["versionSlug"] = self.version_slug
|
|
if tags:
|
|
payload["tags"] = tags
|
|
if self.path_segments:
|
|
payload["pathSegments"] = list(self.path_segments)
|
|
return payload
|
|
|
|
|
|
def iso8601(ts: float) -> str:
|
|
return datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
|
|
def humanize_segment(segment: str) -> str:
|
|
"""Convert a path fragment into a presentation friendly label."""
|
|
|
|
if not segment:
|
|
return ""
|
|
# Replace delimiters with spaces and split camelCase boundaries.
|
|
cleaned = segment.replace("_", " ")
|
|
cleaned = cleaned.replace("-", " ")
|
|
cleaned = re.sub(r"(?<=[a-z0-9])(?=[A-Z])", " ", cleaned)
|
|
cleaned = re.sub(r"\s+", " ", cleaned)
|
|
return cleaned.strip()
|
|
|
|
|
|
def slugify(parts: Iterable[str]) -> str:
|
|
tokens: List[str] = []
|
|
for part in parts:
|
|
cleaned = re.sub(r"[^A-Za-z0-9]+", "-", part)
|
|
cleaned = cleaned.strip("-").lower()
|
|
if cleaned:
|
|
tokens.append(cleaned)
|
|
return "-".join(tokens) or "doc"
|
|
|
|
|
|
def format_version_label(version_dir: Optional[str]) -> Optional[str]:
|
|
if not version_dir:
|
|
return None
|
|
tokens = re.split(r"[-_]+", version_dir)
|
|
if len(tokens) >= 2:
|
|
return " ".join(tokens[:2])
|
|
return humanize_segment(version_dir)
|
|
|
|
|
|
def build_version_slug(version_dir: Optional[str], version_label: Optional[str]) -> Optional[str]:
|
|
"""Generate a stable slug for the version route segment."""
|
|
|
|
candidate = version_dir or version_label
|
|
if not candidate:
|
|
return None
|
|
return slugify([candidate])
|
|
|
|
|
|
def detect_language(version_dir: Optional[str]) -> Optional[str]:
|
|
if not version_dir:
|
|
return None
|
|
match = re.match(r"^([A-Z]{2,3})(?:-|$)", version_dir)
|
|
if not match:
|
|
return None
|
|
token = match.group(1)
|
|
if len(token) <= 3:
|
|
return token
|
|
return None
|
|
|
|
|
|
def build_default_description(
|
|
title: str,
|
|
category: Optional[str],
|
|
version: Optional[str],
|
|
version_dir: Optional[str],
|
|
formats: Sequence[str],
|
|
) -> str:
|
|
lead = title or "Documentation"
|
|
if category:
|
|
lead = f"{lead} — {category}"
|
|
suffix_parts: List[str] = []
|
|
if version:
|
|
suffix_parts.append(f"edition {version}")
|
|
elif version_dir:
|
|
suffix_parts.append(f"edition {humanize_segment(version_dir)}")
|
|
if version_dir and version_dir != humanize_segment(version_dir):
|
|
suffix_parts.append(f"release {version_dir}")
|
|
if formats:
|
|
if len(formats) == 1:
|
|
suffix_parts.append(f"available as {formats[0]}")
|
|
else:
|
|
suffix_parts.append(f"available as {' and '.join(formats)}")
|
|
suffix = ", ".join(suffix_parts)
|
|
if suffix:
|
|
return f"{lead} ({suffix})."
|
|
return f"{lead}."
|
|
|
|
|
|
def should_skip(path: Path) -> bool:
|
|
return any(part.startswith(".") for part in path.parts)
|
|
|
|
|
|
def build_url(root: Path, file_path: Path, base_prefix: str) -> str:
|
|
rel = file_path.relative_to(root).as_posix()
|
|
# Ensure a single leading slash before appending to prefix.
|
|
rel = "/" + rel.lstrip("/")
|
|
prefix = base_prefix.rstrip("/")
|
|
if prefix:
|
|
return f"{prefix}{rel}"
|
|
return rel
|
|
|
|
|
|
def create_entry(parts: Tuple[str, ...]) -> DocEntry:
|
|
category = humanize_segment(parts[0]) if parts else None
|
|
version_dir = parts[1] if len(parts) > 1 else None
|
|
collection_dir = parts[0] if parts else None
|
|
collection_slug = slugify(parts[:1]) if parts else None
|
|
collection_label = humanize_segment(collection_dir or "") if collection_dir else None
|
|
version_label = format_version_label(version_dir)
|
|
version_slug = build_version_slug(version_dir, version_label)
|
|
title = humanize_segment(parts[-1]) if parts else ""
|
|
language = detect_language(version_dir)
|
|
|
|
tags: Set[str] = set()
|
|
if category:
|
|
tags.add(category)
|
|
if language:
|
|
tags.add(language)
|
|
if version_label:
|
|
tags.add(version_label)
|
|
|
|
return DocEntry(
|
|
slug=slugify(parts),
|
|
title=title or parts[-1],
|
|
category=category,
|
|
version=version_label,
|
|
version_dir=version_dir,
|
|
version_slug=version_slug,
|
|
collection_dir=collection_dir,
|
|
collection_slug=collection_slug,
|
|
collection_label=collection_label,
|
|
language=language,
|
|
description="",
|
|
pdf_url=None,
|
|
html_url=None,
|
|
tags=tags,
|
|
updated_ts=0.0,
|
|
path_segments=parts[:-1],
|
|
)
|
|
|
|
|
|
def collect_docs(root: Path, base_prefix: str) -> List[DocEntry]:
|
|
entries: Dict[Tuple[str, ...], DocEntry] = {}
|
|
|
|
for file_path in root.rglob("*"):
|
|
if not file_path.is_file():
|
|
continue
|
|
if file_path.suffix.lower() not in SUPPORTED_EXTENSIONS:
|
|
continue
|
|
rel = file_path.relative_to(root)
|
|
if should_skip(rel):
|
|
continue
|
|
|
|
parts = rel.parts[:-1] + (file_path.stem,)
|
|
if not parts:
|
|
continue
|
|
key = tuple(parts)
|
|
entry = entries.get(key)
|
|
if entry is None:
|
|
entry = create_entry(key)
|
|
entries[key] = entry
|
|
|
|
url = build_url(root, file_path, base_prefix)
|
|
if file_path.suffix.lower() == ".pdf":
|
|
entry.pdf_url = url
|
|
elif file_path.suffix.lower() == ".html":
|
|
entry.html_url = url
|
|
|
|
try:
|
|
mtime = file_path.stat().st_mtime
|
|
except FileNotFoundError:
|
|
mtime = 0.0
|
|
entry.updated_ts = max(entry.updated_ts, mtime)
|
|
|
|
return sorted(entries.values(), key=lambda e: (-e.updated_ts, e.slug))
|
|
|
|
|
|
def write_manifest(output_path: Path, entries: Sequence[DocEntry]) -> None:
|
|
payload = [entry.to_payload() for entry in entries]
|
|
tmp_path = output_path.with_suffix(output_path.suffix + ".tmp")
|
|
with tmp_path.open("w", encoding="utf-8") as fh:
|
|
json.dump(payload, fh, ensure_ascii=False, indent=2)
|
|
fh.write("\n")
|
|
tmp_path.replace(output_path)
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(description="Generate documentation manifest (all.json)")
|
|
parser.add_argument("--root", required=True, help="Root directory of the docs tree (e.g. /data/update-server/docs)")
|
|
parser.add_argument("--base-url-prefix", default="/docs", help="URL prefix to prepend to asset paths")
|
|
parser.add_argument("--output", default="all.json", help="Output filename (default: all.json)")
|
|
parser.add_argument("--quiet", action="store_true", help="Suppress progress output")
|
|
return parser.parse_args()
|
|
|
|
|
|
def main() -> None:
|
|
args = parse_args()
|
|
root = Path(args.root).resolve()
|
|
if not root.exists() or not root.is_dir():
|
|
raise SystemExit(f"Root path does not exist or is not a directory: {root}")
|
|
|
|
entries = collect_docs(root, args.base_url_prefix)
|
|
|
|
if not args.quiet:
|
|
print(f"Discovered {len(entries)} documentation entries under {root}")
|
|
|
|
output_path = root / args.output
|
|
write_manifest(output_path, entries)
|
|
|
|
if not args.quiet:
|
|
print(f"Wrote {output_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|