diff --git a/README.md b/README.md
index da2218b..2e401cf 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,7 @@
| AI 信息差快报 | 新闻检索、素材匹配、口播字幕、视频渲染 | `skills/ai-tech-news-video/SKILL.md` |
| IT 基础设施连续 PNG | 根据描述或参考图生成 1-N 张连续风格竖版 PNG 素材 | `skills/it-infra-continuous-png/SKILL.md` |
| IT 基础设施长图讲解视频 | 基于长图素材生成 HyperFrames 讲解视频、口播、字幕和 timeline | `skills/it-infra-evolution-video/SKILL.md` |
+| IT 基础设施长图讲解视频 v2 | 从 PNG manifest 强制生成配置、HTML、音频、验收和 MP4 | `skills/it-infra-evolution-video-v2/SKILL.md` |
| 产品介绍视频 | 官网信息提炼、叙事结构、成片节奏 | `skills/product-intro-video/SKILL.md` |
| 视频音效工作流 | 音效搜索、下载与合成、时间线接入 | `skills/sound-fx-for-video/SKILL.md` |
| 简笔画动画视频 | 线稿风 + 短画面字;**主动网络搜参考图临摹**;逼真非抽象;GSAP 主时间线 + 可选 Anime.js;抽检闭环 | `skills/sketch-animation-video/SKILL.md` |
@@ -50,6 +51,25 @@
2. 按文档准备素材、音频与脚本
3. 在项目中执行渲染与抽检流程
+### IT 基础设施 PNG -> 视频闭环
+
+当一个任务同时需要 `it-infra-continuous-png` 和 `it-infra-evolution-video` 时,优先使用 v2 链路:
+
+1. `it-infra-continuous-png` 先输出 `assets/images/*.png` 和 `assets/images/manifest.md`
+2. `it-infra-evolution-video-v2` 读取 manifest,并调用 `scripts/build_it_infra_video.py`
+3. 任务目录中必须留下 `video.config.json`、`index.html`、`renders/*.mp4`、`ffprobe.json`
+
+示例:
+
+```bash
+python3 scripts/build_it_infra_video.py \
+ --project-dir /path/to/task/service-mesh-video \
+ --title "云原生 Service Mesh 网络科普视频" \
+ --audio-mode edge-tts \
+ --run-acceptance \
+ --output-name service-mesh-video.mp4
+```
+
## 账号信息
- 名称:拓扑同学
diff --git a/scripts/build_it_infra_video.py b/scripts/build_it_infra_video.py
new file mode 100755
index 0000000..df833ea
--- /dev/null
+++ b/scripts/build_it_infra_video.py
@@ -0,0 +1,664 @@
+#!/usr/bin/env python3
+"""Build an IT infrastructure explainer video project from a PNG manifest.
+
+The runner is intentionally deterministic: it turns a manifest produced by
+it-infra-continuous-png into one HyperFrames project, validates clip timing, and
+optionally runs the HyperFrames/ffprobe acceptance chain.
+"""
+
+from __future__ import annotations
+
+import argparse
+import html
+import json
+import os
+import re
+import shutil
+import subprocess
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable
+
+
+REQUIRED_MANIFEST_COLUMNS = [
+ "chapter_id",
+ "title",
+ "file",
+ "source_type",
+ "video_usage",
+ "scan_mode",
+ "safe_focus",
+]
+
+PNG_MAGIC = b"\x89PNG\r\n\x1a\n"
+HYPERFRAMES_VERSION = "0.6.15"
+
+
+class BuildError(RuntimeError):
+ pass
+
+
+@dataclass(frozen=True)
+class ManifestRow:
+ chapter_id: str
+ title: str
+ file: str
+ source_type: str
+ video_usage: str
+ scan_mode: str
+ safe_focus: str
+
+
+@dataclass(frozen=True)
+class Section:
+ id: str
+ start: float
+ duration: float
+ time_label: str
+ timeline_label: str
+ title: str
+ subtitle: str
+ tags: list[str]
+ image: str
+ image_fit: str
+ voiceover: str
+ caption: str
+ source_type: str
+ safe_focus: str
+
+
+def fail(message: str) -> None:
+ raise BuildError(message)
+
+
+def run(cmd: list[str], cwd: Path, *, capture: bool = False) -> subprocess.CompletedProcess[str]:
+ print("+ " + " ".join(cmd), flush=True)
+ return subprocess.run(
+ cmd,
+ cwd=str(cwd),
+ check=True,
+ text=True,
+ stdout=subprocess.PIPE if capture else None,
+ stderr=subprocess.STDOUT if capture else None,
+ )
+
+
+def slugify(value: str, fallback: str) -> str:
+ value = value.strip().lower()
+ value = re.sub(r"[^a-z0-9_-]+", "-", value)
+ value = re.sub(r"-{2,}", "-", value).strip("-")
+ return value or fallback
+
+
+def parse_markdown_table(path: Path) -> list[dict[str, str]]:
+ if not path.exists():
+ fail(f"Manifest not found: {path}")
+ lines = [line.strip() for line in path.read_text(encoding="utf-8").splitlines()]
+ table_lines = [line for line in lines if line.startswith("|") and line.endswith("|")]
+ if len(table_lines) < 3:
+ fail(f"Manifest must contain a markdown table with data rows: {path}")
+
+ headers = [cell.strip() for cell in table_lines[0].strip("|").split("|")]
+ missing = [column for column in REQUIRED_MANIFEST_COLUMNS if column not in headers]
+ if missing:
+ fail(f"Manifest missing required columns: {', '.join(missing)}")
+
+ rows: list[dict[str, str]] = []
+ for line in table_lines[2:]:
+ cells = [cell.strip() for cell in line.strip("|").split("|")]
+ if len(cells) != len(headers):
+ fail(f"Manifest row has {len(cells)} cells but header has {len(headers)}: {line}")
+ row = dict(zip(headers, cells, strict=True))
+ if any(row[column] for column in REQUIRED_MANIFEST_COLUMNS):
+ rows.append(row)
+ if not rows:
+ fail("Manifest has no image rows")
+ return rows
+
+
+def read_manifest(path: Path, project_dir: Path) -> list[ManifestRow]:
+ rows = []
+ for index, raw in enumerate(parse_markdown_table(path), start=1):
+ row = ManifestRow(**{column: raw[column] for column in REQUIRED_MANIFEST_COLUMNS})
+ if not row.chapter_id:
+ fail(f"Manifest row {index} has an empty chapter_id")
+ if row.scan_mode not in {"cover", "contain"}:
+ fail(f"Manifest row {index} scan_mode must be cover or contain: {row.scan_mode}")
+ image_path = project_dir / row.file
+ if not image_path.exists():
+ fail(f"Manifest row {index} image file not found: {row.file}")
+ if image_path.read_bytes()[:8] != PNG_MAGIC:
+ fail(f"Manifest row {index} image is not a real PNG: {row.file}")
+ rows.append(row)
+ return rows
+
+
+def format_time(seconds: float) -> str:
+ total = max(0, int(round(seconds)))
+ return f"{total // 60}:{total % 60:02d}"
+
+
+def build_sections(rows: list[ManifestRow], section_duration: float) -> list[Section]:
+ sections: list[Section] = []
+ for index, row in enumerate(rows):
+ start = round(index * section_duration, 3)
+ chapter_id = slugify(row.chapter_id, f"chapter-{index + 1}")
+ title = row.title.strip()
+ subtitle = row.video_usage.strip() or row.safe_focus.strip()
+ caption = f"{title}: {subtitle}" if subtitle else title
+ tags = [
+ row.source_type.replace("_", " "),
+ "long image",
+ row.scan_mode,
+ ]
+ sections.append(
+ Section(
+ id=chapter_id,
+ start=start,
+ duration=section_duration,
+ time_label=format_time(start),
+ timeline_label=title[:8] or f"Chapter {index + 1}",
+ title=title,
+ subtitle=subtitle,
+ tags=tags,
+ image=row.file,
+ image_fit=row.scan_mode,
+ voiceover=f"assets/audio/vo-{index + 1:02d}-{chapter_id}.mp3",
+ caption=caption,
+ source_type=row.source_type,
+ safe_focus=row.safe_focus,
+ )
+ )
+ validate_non_overlapping("section", ((s.start, s.duration, s.id) for s in sections))
+ return sections
+
+
+def validate_non_overlapping(name: str, clips: Iterable[tuple[float, float, str]]) -> None:
+ previous_end = -1.0
+ previous_id = ""
+ for start, duration, clip_id in sorted(clips):
+ if duration <= 0:
+ fail(f"{name} clip has non-positive duration: {clip_id}")
+ if start < previous_end - 0.001:
+ fail(f"{name} clips overlap: {previous_id} and {clip_id}")
+ previous_end = start + duration
+ previous_id = clip_id
+
+
+def write_json_config(project_dir: Path, title: str, sections: list[Section]) -> dict:
+ duration = round(max(s.start + s.duration for s in sections), 3)
+ config = {
+ "duration": duration,
+ "timelineColumns": len(sections),
+ "canvas": {"width": 1920, "height": 1080},
+ "stylePreset": "it-infra-v2-blue-white-two-column-scan",
+ "title": title,
+ "sections": [
+ {
+ "id": section.id,
+ "start": section.start,
+ "duration": section.duration,
+ "timeLabel": section.time_label,
+ "timelineLabel": section.timeline_label,
+ "title": section.title,
+ "subtitle": section.subtitle,
+ "tags": section.tags,
+ "image": section.image,
+ "imageFit": section.image_fit,
+ "voiceover": section.voiceover,
+ "caption": section.caption,
+ "sourceType": section.source_type,
+ "safeFocus": section.safe_focus,
+ }
+ for section in sections
+ ],
+ "inspectTimes": [round(section.start + section.duration / 2, 3) for section in sections],
+ }
+ (project_dir / "video.config.json").write_text(
+ json.dumps(config, ensure_ascii=False, indent=2) + "\n",
+ encoding="utf-8",
+ )
+ return config
+
+
+def ensure_project_scaffold(project_dir: Path) -> None:
+ for relative in ["assets/audio", "assets/images", "renders", "snapshots"]:
+ (project_dir / relative).mkdir(parents=True, exist_ok=True)
+ package_json = project_dir / "package.json"
+ if not package_json.exists():
+ package_json.write_text(
+ json.dumps(
+ {
+ "name": "it-infra-evolution-video-v2-project",
+ "private": True,
+ "type": "module",
+ "scripts": {
+ "lint": f"npx --yes hyperframes@{HYPERFRAMES_VERSION} lint",
+ "inspect": f"npx --yes hyperframes@{HYPERFRAMES_VERSION} inspect",
+ "snapshot": f"npx --yes hyperframes@{HYPERFRAMES_VERSION} snapshot",
+ "render": f"npx --yes hyperframes@{HYPERFRAMES_VERSION} render",
+ },
+ },
+ indent=2,
+ )
+ + "\n",
+ encoding="utf-8",
+ )
+ hyperframes_json = project_dir / "hyperframes.json"
+ if not hyperframes_json.exists():
+ hyperframes_json.write_text(
+ json.dumps(
+ {
+ "$schema": "https://hyperframes.heygen.com/schema/hyperframes.json",
+ "registry": "https://raw.githubusercontent.com/heygen-com/hyperframes/main/registry",
+ "paths": {
+ "blocks": "compositions",
+ "components": "compositions/components",
+ "assets": "assets",
+ },
+ },
+ indent=2,
+ )
+ + "\n",
+ encoding="utf-8",
+ )
+
+
+def generate_tone_audio(project_dir: Path, sections: list[Section]) -> None:
+ if not shutil.which("ffmpeg"):
+ fail("ffmpeg is required for --audio-mode tone")
+ for index, section in enumerate(sections, start=1):
+ out = project_dir / section.voiceover
+ out.parent.mkdir(parents=True, exist_ok=True)
+ frequency = str(360 + index * 60)
+ duration = str(max(0.4, section.duration - 0.4))
+ run(
+ [
+ "ffmpeg",
+ "-y",
+ "-f",
+ "lavfi",
+ "-i",
+ f"sine=frequency={frequency}:duration={duration}",
+ "-q:a",
+ "9",
+ str(out),
+ ],
+ project_dir,
+ capture=True,
+ )
+ bgm = project_dir / "assets/audio/bgm.wav"
+ total_duration = str(max(s.start + s.duration for s in sections))
+ run(
+ [
+ "ffmpeg",
+ "-y",
+ "-f",
+ "lavfi",
+ "-i",
+ f"sine=frequency=120:duration={total_duration}",
+ "-filter:a",
+ "volume=0.08",
+ str(bgm),
+ ],
+ project_dir,
+ capture=True,
+ )
+
+
+def generate_edge_tts_audio(project_dir: Path, sections: list[Section]) -> None:
+ if not shutil.which("edge-tts"):
+ fail("edge-tts is required for production voiceover generation")
+ for section in sections:
+ out = project_dir / section.voiceover
+ out.parent.mkdir(parents=True, exist_ok=True)
+ run(
+ [
+ "edge-tts",
+ "--voice",
+ "zh-CN-YunxiNeural",
+ "--rate",
+ "+20%",
+ "--text",
+ section.caption,
+ "--write-media",
+ str(out),
+ ],
+ project_dir,
+ )
+ bgm = project_dir / "assets/audio/bgm.wav"
+ if not bgm.exists():
+ generate_tone_bgm(project_dir, max(s.start + s.duration for s in sections))
+
+
+def generate_tone_bgm(project_dir: Path, duration: float) -> None:
+ if not shutil.which("ffmpeg"):
+ fail("ffmpeg is required to synthesize fallback BGM")
+ run(
+ [
+ "ffmpeg",
+ "-y",
+ "-f",
+ "lavfi",
+ "-i",
+ f"sine=frequency=120:duration={duration}",
+ "-filter:a",
+ "volume=0.08",
+ "assets/audio/bgm.wav",
+ ],
+ project_dir,
+ capture=True,
+ )
+
+
+def css() -> str:
+ return """
+ :root { --timeline-columns: 1; }
+ * { margin: 0; padding: 0; box-sizing: border-box; }
+ html, body {
+ width: 1920px;
+ height: 1080px;
+ overflow: hidden;
+ background: #f3faff;
+ font-family: Inter, "Noto Sans JP", Arial, sans-serif;
+ color: #07194f;
+ }
+ #root {
+ position: relative;
+ width: 1920px;
+ height: 1080px;
+ overflow: hidden;
+ background:
+ radial-gradient(circle at 82% 16%, rgba(73,217,255,0.26), transparent 30%),
+ radial-gradient(circle at 10% 92%, rgba(21,91,255,0.15), transparent 28%),
+ linear-gradient(135deg, #ffffff 0%, #f3faff 46%, #dceeff 100%);
+ }
+ .clip { position: absolute; overflow: hidden; }
+ .scene { inset: 0; opacity: 0; }
+ .topbar {
+ position: absolute; z-index: 40; top: 42px; left: 72px; right: 72px;
+ display: flex; justify-content: space-between; align-items: center;
+ font-size: 26px; font-weight: 850; color: rgba(7,25,79,0.82);
+ }
+ .brand-pill {
+ display: inline-flex; gap: 14px; align-items: center; padding: 14px 24px;
+ border-radius: 999px; color: #fff; background: linear-gradient(135deg, #155bff, #18bfa6);
+ box-shadow: 0 16px 40px rgba(21,91,255,0.22);
+ }
+ .brand-dot { width: 14px; height: 14px; border-radius: 50%; background: #fff; box-shadow: 0 0 24px #49d9ff; }
+ .scene-content {
+ width: 100%; height: 100%; padding: 92px 104px 214px;
+ display: grid; grid-template-columns: 0.96fr 1.04fr; gap: 58px; align-items: center;
+ }
+ .image-panel {
+ position: relative; width: 100%; height: 760px; border-radius: 34px; overflow: hidden;
+ background: #fff; border: 1px solid rgba(21,91,255,0.16);
+ box-shadow: 0 34px 90px rgba(17,60,128,0.18);
+ }
+ .image-panel img {
+ position: absolute; inset: 0; width: 100%; height: 100%; object-fit: cover;
+ object-position: center top; filter: saturate(1.06) contrast(1.03);
+ }
+ .image-panel.contain img { object-fit: contain; padding: 20px; background: #fff; }
+ .copy { position: relative; z-index: 2; display: flex; flex-direction: column; gap: 26px; }
+ .kicker {
+ width: max-content; max-width: 100%; color: #155bff; background: rgba(21,91,255,0.08);
+ border: 1px solid rgba(21,91,255,0.22); border-radius: 999px; padding: 12px 22px;
+ font-size: 27px; font-weight: 950;
+ }
+ h1, h2 { font-size: 64px; line-height: 1.08; font-weight: 950; letter-spacing: 0; }
+ .lead { font-size: 32px; line-height: 1.55; font-weight: 760; color: rgba(7,25,79,0.78); }
+ .tag-row { display: flex; gap: 14px; flex-wrap: wrap; }
+ .tag {
+ padding: 12px 16px; border-radius: 16px; color: #07194f; background: rgba(255,255,255,0.76);
+ border: 1px solid rgba(73,217,255,0.34); box-shadow: 0 14px 30px rgba(17,60,128,0.08);
+ font-size: 24px; font-weight: 900;
+ }
+ .caption {
+ position: absolute; z-index: 55; left: 320px; right: 320px; bottom: 132px; min-height: 78px;
+ display: grid; place-items: center; padding: 14px 32px; border-radius: 26px;
+ background: rgba(7,25,79,0.88); color: #fff; font-size: 32px; line-height: 1.34;
+ font-weight: 850; text-align: center; box-shadow: 0 20px 55px rgba(7,25,79,0.26); opacity: 0;
+ }
+ .timeline {
+ position: absolute; z-index: 52; left: 70px; right: 70px; bottom: 28px; height: 88px;
+ padding: 12px 14px 18px; display: grid; grid-template-columns: repeat(var(--timeline-columns), minmax(0, 1fr));
+ gap: 10px; align-items: center; overflow: visible; border-radius: 30px;
+ background: rgba(255,255,255,0.76); border: 1px solid rgba(21,91,255,0.15);
+ box-shadow: 0 20px 60px rgba(17,60,128,0.14); backdrop-filter: blur(12px);
+ }
+ .timeline-fill { position: absolute; left: 22px; right: 22px; bottom: 8px; height: 8px; border-radius: 999px; background: rgba(7,25,79,0.12); overflow: hidden; }
+ .timeline-progress { display: block; width: 0%; height: 100%; border-radius: inherit; background: linear-gradient(90deg, #155bff, #49d9ff, #18bfa6); }
+ .chapter-tag {
+ position: relative; z-index: 2; min-width: 0; height: 50px; display: flex; align-items: center; justify-content: center;
+ gap: 8px; padding: 0 10px; border-radius: 18px; background: rgba(255,255,255,0.72);
+ border: 1px solid rgba(21,91,255,0.18); color: rgba(7,25,79,0.74); font-size: 20px;
+ line-height: 1; font-weight: 900; white-space: nowrap; box-shadow: 0 12px 26px rgba(17,60,128,0.08);
+ }
+ .chapter-time { color: #155bff; font-variant-numeric: tabular-nums; }
+ .chapter-title { overflow: hidden; text-overflow: ellipsis; }
+ .chapter-tag.active { color: #fff; background: linear-gradient(135deg, #155bff, #18bfa6); border-color: rgba(255,255,255,0.62); box-shadow: 0 20px 42px rgba(21,91,255,0.28); }
+ .chapter-tag.active .chapter-time { color: #fff; }
+ .glow-line { position: absolute; width: 560px; height: 560px; border-radius: 50%; border: 2px solid rgba(73,217,255,0.34); right: -160px; top: -150px; }
+ """
+
+
+def js_array(values: list[str | float]) -> str:
+ return json.dumps(values, ensure_ascii=False)
+
+
+def write_html(project_dir: Path, title: str, config: dict) -> None:
+ sections = config["sections"]
+ duration = config["duration"]
+ timeline_columns = config["timelineColumns"]
+
+ scene_html = []
+ caption_html = []
+ chapter_html = []
+ audio_html = [
+ f''
+ ]
+
+ for index, section in enumerate(sections):
+ scene_id = f"scene-{section['id']}"
+ cap_id = f"cap-{index + 1:02d}-{section['id']}"
+ vo_id = f"vo-{index + 1:02d}-{section['id']}"
+ image_panel_class = "image-panel contain" if section["imageFit"] == "contain" else "image-panel"
+ tags = "".join(f'{html.escape(tag)}' for tag in section["tags"])
+ heading_tag = "h1" if index == 0 else "h2"
+ caption_start = round(section["start"] + 0.2, 3)
+ caption_duration = round(max(0.4, section["duration"] - 0.4), 3)
+ scene_html.append(
+ f"""
+ {html.escape(section['subtitle'])}