accounts/scripts/gen_mirror_manifest.py
2025-09-16 10:46:45 +08:00

230 lines
7.2 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Generate manifest metadata for the ui/dl static download portal.
- Writes <root>/manifest.json aggregating directory listings (formerly all.json)
Paths in JSON use leading "/" (URL-style) and directory hrefs end with "/".
`sha256` for an item is set if a sibling "<file>.sha256sum" exists, or if the
directory contains "SHA256SUMS" (then that path is referenced).
Usage:
python3 scripts/gen_mirror_manifest.py \
--root /data/update-server \
--base-url-prefix / \
[--exclude docs --exclude xray-core]
This script is idempotent and safe to re-run.
"""
import argparse
import json
import os
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Optional, Set
HIDE_NAMES = {".git", ".github", ".DS_Store"}
# metadata files generated by this script that should be excluded from listings
METADATA_FILES = {"manifest.json", "dir.json", "all.json"}
def iso8601(ts: float) -> str:
return datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def is_hidden(name: str) -> bool:
return name.startswith(".") or name in HIDE_NAMES
def latest_mtime(path: Path) -> float:
"""Latest mtime under a directory (or the file itself)."""
if path.is_file():
return path.stat().st_mtime
m = path.stat().st_mtime
for p, _, files in os.walk(path):
for f in files:
fp = Path(p) / f
try:
mt = fp.stat().st_mtime
if mt > m:
m = mt
except FileNotFoundError:
pass
return m
def rel_url(root: Path, path: Path, base_prefix: str) -> str:
rel = "/" + str(path.relative_to(root)).replace(os.sep, "/").lstrip("/")
if path.is_dir() and not rel.endswith("/"):
rel += "/"
# prefix
prefix = base_prefix.rstrip("/")
if prefix:
if not rel.startswith("/"):
rel = "/" + rel
return prefix + rel
return rel
def guess_sha256_path(dir_path: Path, file_path: Path, root: Path, base_prefix: str) -> Optional[str]:
# Option 1: per-file .sha256sum
per_file = file_path.with_suffix(file_path.suffix + ".sha256sum")
if per_file.exists():
return rel_url(root, per_file, base_prefix)
# Option 2: directory-level SHA256SUMS
sums = dir_path / "SHA256SUMS"
if sums.exists():
return rel_url(root, sums, base_prefix)
# Option 3: common "sha256sum.txt"
sums2 = dir_path / "sha256sum.txt"
if sums2.exists():
return rel_url(root, sums2, base_prefix)
return None
def should_exclude(path: Path, excluded: Set[Path]) -> bool:
if not excluded:
return False
resolved = path.resolve(strict=False)
for base in excluded:
if resolved == base:
return True
if base in resolved.parents:
return True
return False
def normalize_excludes(raw_values: List[str], root: Path) -> Set[Path]:
normalized: Set[Path] = set()
for raw in raw_values:
if not raw:
continue
text = raw.strip()
if not text:
continue
candidate_path = Path(text)
if candidate_path.is_absolute():
resolved = candidate_path.resolve(strict=False)
else:
cleaned = text.strip("/")
relative = Path(cleaned) if cleaned else Path(".")
resolved = (root / relative).resolve(strict=False)
try:
resolved.relative_to(root)
except ValueError:
print(
f"Warning: exclude path '{raw}' is outside mirror root {root}, ignoring",
file=sys.stderr,
)
continue
normalized.add(resolved)
return normalized
def build_dir_listing(dir_path: Path, root: Path, base_prefix: str, excluded: Set[Path]) -> Dict:
"""Build a DirListing structure for the manifest."""
rel = str(dir_path.relative_to(root)).replace(os.sep, "/")
if rel == ".":
rel = ""
if rel and not rel.endswith("/"):
rel += "/"
entries: List[Dict] = []
try:
children = sorted(
[p for p in dir_path.iterdir() if not is_hidden(p.name)], key=lambda p: p.name
)
except FileNotFoundError:
children = []
for child in children:
if child.name in METADATA_FILES:
continue
if should_exclude(child, excluded):
continue
href = rel_url(root, child, base_prefix)
if child.is_dir():
entries.append(
{
"name": child.name + "/",
"href": href,
"type": "dir",
"lastModified": iso8601(latest_mtime(child)),
}
)
elif child.is_file():
item = {
"name": child.name,
"href": href,
"type": "file",
"size": child.stat().st_size,
"lastModified": iso8601(child.stat().st_mtime),
}
sha = guess_sha256_path(dir_path, child, root, base_prefix)
if sha:
item["sha256"] = sha
entries.append(item)
return {"path": rel, "entries": entries}
def write_json(path: Path, data: Dict):
tmp = path.with_suffix(path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
f.write("\n")
tmp.replace(path)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--root", required=True, help="Filesystem root of the mirror (e.g., /data/update-server)")
ap.add_argument("--base-url-prefix", default="/", help="URL prefix (default '/')")
ap.add_argument("--quiet", action="store_true")
ap.add_argument(
"--exclude",
action="append",
default=[],
help="Relative paths (from root) to exclude from the manifest. Can be provided multiple times.",
)
args = ap.parse_args()
root = Path(args.root).resolve()
if not root.exists():
print(f"Root does not exist: {root}", file=sys.stderr)
sys.exit(2)
excluded = normalize_excludes(args.exclude, root)
# Build listings for every directory that is not excluded
listings: List[Dict] = []
for current_dir, subdirs, _ in os.walk(root):
dir_path = Path(current_dir)
# Skip hidden dirs
if dir_path != root and is_hidden(dir_path.name):
subdirs[:] = []
continue
if should_exclude(dir_path, excluded):
subdirs[:] = []
continue
pruned_subdirs = [
d
for d in subdirs
if not is_hidden(d) and not should_exclude(dir_path / d, excluded)
]
pruned_subdirs.sort()
subdirs[:] = pruned_subdirs
listings.append(
build_dir_listing(dir_path, root, args.base_url_prefix, excluded)
)
if not args.quiet:
rel_display = "." if dir_path == root else str(dir_path.relative_to(root))
print(f"Indexed {rel_display}")
# Write manifest.json containing directory listings (previously all.json)
write_json(root / "manifest.json", listings)
if not args.quiet:
print(f"Wrote {root / 'manifest.json'}")
if __name__ == "__main__":
main()