accounts/scripts/gen_offline-package_manifest.py
Haitao Pan d647d02992 feat: merge quick wrapper into main manifest scripts
### Changes:

1. **gen_offline-package_manifest.py**
   - Merged gen_offline-package_manifest_quick.py functionality
   - Added auto-derivation of --root from --output path
   - Auto-discovers subdirectories when --include not specified
   - Outputs to offline-package-manifest.json
   - Removed quick wrapper script (merged into main)

2. **gen_docs_manifest.py**
   - Added auto-discovery of subdirectories
   - Scans all category/version/*.html and *.pdf when --include not specified

### Usage:

For gen_offline-package_manifest.py:
  # Auto-derive root from output
  python3 scripts/gen_offline-package_manifest.py \
    --output /data/update-server/dl-index

  # Or specify root explicitly
  python3 scripts/gen_offline-package_manifest.py \
    --root /data/update-server/offline-package \
    --output /data/update-server/dl-index

For gen_docs_manifest.py:
  # Auto-discover all subdirectories
  python3 scripts/gen_docs_manifest.py \
    --root /data/update-server/docs \
    --output /data/update-server/dl-index

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-11 18:58:37 +08:00

288 lines
9.7 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Generate manifest metadata for the ui/dl static download portal.
- Writes <output>/manifest.json aggregating directory listings (formerly all.json)
Paths in JSON use leading "/" (URL-style) and directory hrefs end with "/".
`sha256` for an item is set if a sibling "<file>.sha256sum" exists, or if the
directory contains "SHA256SUMS" (then that path is referenced).
Usage:
python3 scripts/gen_mirror_manifest.py \
--root /data/update-server \
--base-url-prefix https://dl.svc.plus/offline-package \
--include offline-package \
--output dl-index/ \
[--exclude docs --exclude xray-core]
This script is idempotent and safe to re-run.
"""
import argparse
import json
import os
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Optional, Set
HIDE_NAMES = {".git", ".github", ".DS_Store"}
# metadata files generated by this script that should be excluded from listings
METADATA_FILES = {"manifest.json", "dir.json", "all.json"}
def iso8601(ts: float) -> str:
return datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def is_hidden(name: str) -> bool:
return name.startswith(".") or name in HIDE_NAMES
def latest_mtime(path: Path) -> float:
"""Latest mtime under a directory (or the file itself)."""
if path.is_file():
return path.stat().st_mtime
m = path.stat().st_mtime
for p, _, files in os.walk(path):
for f in files:
fp = Path(p) / f
try:
mt = fp.stat().st_mtime
if mt > m:
m = mt
except FileNotFoundError:
pass
return m
def rel_url(root: Path, path: Path, base_prefix: str) -> str:
rel = "/" + str(path.relative_to(root)).replace(os.sep, "/").lstrip("/")
if path.is_dir() and not rel.endswith("/"):
rel += "/"
# prefix
prefix = base_prefix.rstrip("/")
if prefix:
if not rel.startswith("/"):
rel = "/" + rel
return prefix + rel
return rel
def guess_sha256_path(dir_path: Path, file_path: Path, root: Path, base_prefix: str) -> Optional[str]:
# Option 1: per-file .sha256sum
per_file = file_path.with_suffix(file_path.suffix + ".sha256sum")
if per_file.exists():
return rel_url(root, per_file, base_prefix)
# Option 2: directory-level SHA256SUMS
sums = dir_path / "SHA256SUMS"
if sums.exists():
return rel_url(root, sums, base_prefix)
# Option 3: common "sha256sum.txt"
sums2 = dir_path / "sha256sum.txt"
if sums2.exists():
return rel_url(root, sums2, base_prefix)
return None
def should_exclude(path: Path, excluded: Set[Path]) -> bool:
if not excluded:
return False
resolved = path.resolve(strict=False)
for base in excluded:
if resolved == base:
return True
if base in resolved.parents:
return True
return False
def normalize_excludes(raw_values: List[str], root: Path) -> Set[Path]:
normalized: Set[Path] = set()
for raw in raw_values:
if not raw:
continue
text = raw.strip()
if not text:
continue
candidate_path = Path(text)
if candidate_path.is_absolute():
resolved = candidate_path.resolve(strict=False)
else:
cleaned = text.strip("/")
relative = Path(cleaned) if cleaned else Path(".")
resolved = (root / relative).resolve(strict=False)
try:
resolved.relative_to(root)
except ValueError:
print(
f"Warning: exclude path '{raw}' is outside mirror root {root}, ignoring",
file=sys.stderr,
)
continue
normalized.add(resolved)
return normalized
def build_dir_listing(dir_path: Path, root: Path, base_prefix: str, excluded: Set[Path]) -> Dict:
"""Build a DirListing structure for the manifest."""
rel = str(dir_path.relative_to(root)).replace(os.sep, "/")
if rel == ".":
rel = ""
if rel and not rel.endswith("/"):
rel += "/"
entries: List[Dict] = []
try:
children = sorted(
[p for p in dir_path.iterdir() if not is_hidden(p.name)], key=lambda p: p.name
)
except FileNotFoundError:
children = []
for child in children:
if child.name in METADATA_FILES:
continue
if should_exclude(child, excluded):
continue
href = rel_url(root, child, base_prefix)
if child.is_dir():
entries.append(
{
"name": child.name + "/",
"href": href,
"type": "dir",
"lastModified": iso8601(latest_mtime(child)),
}
)
elif child.is_file():
item = {
"name": child.name,
"href": href,
"type": "file",
"size": child.stat().st_size,
"lastModified": iso8601(child.stat().st_mtime),
}
sha = guess_sha256_path(dir_path, child, root, base_prefix)
if sha:
item["sha256"] = sha
entries.append(item)
return {"path": rel, "entries": entries}
def write_json(path: Path, data: Dict):
tmp = path.with_suffix(path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
f.write("\n")
tmp.replace(path)
def main():
ap = argparse.ArgumentParser(description="Generate offline-package manifest")
ap.add_argument("--root", help="Filesystem root of the mirror. If not specified, will be auto-derived from --output")
ap.add_argument("--base-url-prefix", default="https://dl.svc.plus/offline-package", help="URL prefix (default: https://dl.svc.plus/offline-package)")
ap.add_argument("--quiet", action="store_true")
ap.add_argument(
"--exclude",
action="append",
default=[],
help="Relative paths (from root) to exclude from the manifest. Can be provided multiple times.",
)
ap.add_argument(
"--include",
action="append",
help="Directory names to include in the manifest. Can be provided multiple times. If not specified, auto-discovers all subdirectories.",
)
ap.add_argument(
"--output",
default="dl-index/",
help="Output directory for the manifest file (default: dl-index/)",
)
args = ap.parse_args()
# Auto-derive root from output if not specified
if not args.root:
output_path = Path(args.output).resolve()
# Try to derive root from common patterns
# If output is /data/update-server/dl-index, root could be /data/update-server or /data/update-server/offline-package
if str(output_path).endswith('/dl-index'):
# Try parent / offline-package first
candidate = output_path.parent / "offline-package"
if candidate.exists() and candidate.is_dir():
args.root = str(candidate)
else:
# Fall back to parent
args.root = str(output_path.parent)
else:
# Default fallback
args.root = str(output_path.parent)
root = Path(args.root).resolve()
if not root.exists():
print(f"Root does not exist: {root}", file=sys.stderr)
sys.exit(2)
excluded = normalize_excludes(args.exclude, root)
# Auto-discover directories if include is not specified
if not args.include:
args.include = [d.name for d in root.iterdir() if d.is_dir() and not is_hidden(d.name)]
if not args.quiet:
print(f"Auto-discovered directories: {', '.join(args.include)}")
# Create set of included directories
include_set = set(args.include)
# Build listings for every directory that is not excluded
listings: List[Dict] = []
for current_dir, subdirs, _ in os.walk(root):
dir_path = Path(current_dir)
# Skip hidden dirs
if dir_path != root and is_hidden(dir_path.name):
subdirs[:] = []
continue
if should_exclude(dir_path, excluded):
subdirs[:] = []
continue
# Filter based on include parameter
if dir_path == root:
# At root, only process subdirs that are in include list
pruned_subdirs = [
d
for d in subdirs
if d in include_set and not is_hidden(d) and not should_exclude(dir_path / d, excluded)
]
else:
# For non-root dirs, check if any parent is in include list
rel_path = dir_path.relative_to(root)
is_included = any(part in include_set for part in rel_path.parts)
if not is_included:
subdirs[:] = []
continue
pruned_subdirs = [
d
for d in subdirs
if not is_hidden(d) and not should_exclude(dir_path / d, excluded)
]
pruned_subdirs.sort()
subdirs[:] = pruned_subdirs
listings.append(
build_dir_listing(dir_path, root, args.base_url_prefix, excluded)
)
if not args.quiet:
rel_display = "." if dir_path == root else str(dir_path.relative_to(root))
print(f"Indexed {rel_display}")
# Create output directory if it doesn't exist
output_path = Path(args.output)
output_path.mkdir(parents=True, exist_ok=True)
# Write offline-package-manifest.json to output directory
write_json(output_path / "offline-package-manifest.json", listings)
if not args.quiet:
print(f"Wrote {output_path / 'offline-package-manifest.json'}")
if __name__ == "__main__":
main()