diff --git a/README.md b/README.md index da2218b..2e401cf 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ | AI 信息差快报 | 新闻检索、素材匹配、口播字幕、视频渲染 | `skills/ai-tech-news-video/SKILL.md` | | IT 基础设施连续 PNG | 根据描述或参考图生成 1-N 张连续风格竖版 PNG 素材 | `skills/it-infra-continuous-png/SKILL.md` | | IT 基础设施长图讲解视频 | 基于长图素材生成 HyperFrames 讲解视频、口播、字幕和 timeline | `skills/it-infra-evolution-video/SKILL.md` | +| IT 基础设施长图讲解视频 v2 | 从 PNG manifest 强制生成配置、HTML、音频、验收和 MP4 | `skills/it-infra-evolution-video-v2/SKILL.md` | | 产品介绍视频 | 官网信息提炼、叙事结构、成片节奏 | `skills/product-intro-video/SKILL.md` | | 视频音效工作流 | 音效搜索、下载与合成、时间线接入 | `skills/sound-fx-for-video/SKILL.md` | | 简笔画动画视频 | 线稿风 + 短画面字;**主动网络搜参考图临摹**;逼真非抽象;GSAP 主时间线 + 可选 Anime.js;抽检闭环 | `skills/sketch-animation-video/SKILL.md` | @@ -50,6 +51,25 @@ 2. 按文档准备素材、音频与脚本 3. 在项目中执行渲染与抽检流程 +### IT 基础设施 PNG -> 视频闭环 + +当一个任务同时需要 `it-infra-continuous-png` 和 `it-infra-evolution-video` 时,优先使用 v2 链路: + +1. `it-infra-continuous-png` 先输出 `assets/images/*.png` 和 `assets/images/manifest.md` +2. `it-infra-evolution-video-v2` 读取 manifest,并调用 `scripts/build_it_infra_video.py` +3. 任务目录中必须留下 `video.config.json`、`index.html`、`renders/*.mp4`、`ffprobe.json` + +示例: + +```bash +python3 scripts/build_it_infra_video.py \ + --project-dir /path/to/task/service-mesh-video \ + --title "云原生 Service Mesh 网络科普视频" \ + --audio-mode edge-tts \ + --run-acceptance \ + --output-name service-mesh-video.mp4 +``` + ## 账号信息 - 名称:拓扑同学 diff --git a/scripts/build_it_infra_video.py b/scripts/build_it_infra_video.py new file mode 100755 index 0000000..df833ea --- /dev/null +++ b/scripts/build_it_infra_video.py @@ -0,0 +1,664 @@ +#!/usr/bin/env python3 +"""Build an IT infrastructure explainer video project from a PNG manifest. + +The runner is intentionally deterministic: it turns a manifest produced by +it-infra-continuous-png into one HyperFrames project, validates clip timing, and +optionally runs the HyperFrames/ffprobe acceptance chain. +""" + +from __future__ import annotations + +import argparse +import html +import json +import os +import re +import shutil +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable + + +REQUIRED_MANIFEST_COLUMNS = [ + "chapter_id", + "title", + "file", + "source_type", + "video_usage", + "scan_mode", + "safe_focus", +] + +PNG_MAGIC = b"\x89PNG\r\n\x1a\n" +HYPERFRAMES_VERSION = "0.6.15" + + +class BuildError(RuntimeError): + pass + + +@dataclass(frozen=True) +class ManifestRow: + chapter_id: str + title: str + file: str + source_type: str + video_usage: str + scan_mode: str + safe_focus: str + + +@dataclass(frozen=True) +class Section: + id: str + start: float + duration: float + time_label: str + timeline_label: str + title: str + subtitle: str + tags: list[str] + image: str + image_fit: str + voiceover: str + caption: str + source_type: str + safe_focus: str + + +def fail(message: str) -> None: + raise BuildError(message) + + +def run(cmd: list[str], cwd: Path, *, capture: bool = False) -> subprocess.CompletedProcess[str]: + print("+ " + " ".join(cmd), flush=True) + return subprocess.run( + cmd, + cwd=str(cwd), + check=True, + text=True, + stdout=subprocess.PIPE if capture else None, + stderr=subprocess.STDOUT if capture else None, + ) + + +def slugify(value: str, fallback: str) -> str: + value = value.strip().lower() + value = re.sub(r"[^a-z0-9_-]+", "-", value) + value = re.sub(r"-{2,}", "-", value).strip("-") + return value or fallback + + +def parse_markdown_table(path: Path) -> list[dict[str, str]]: + if not path.exists(): + fail(f"Manifest not found: {path}") + lines = [line.strip() for line in path.read_text(encoding="utf-8").splitlines()] + table_lines = [line for line in lines if line.startswith("|") and line.endswith("|")] + if len(table_lines) < 3: + fail(f"Manifest must contain a markdown table with data rows: {path}") + + headers = [cell.strip() for cell in table_lines[0].strip("|").split("|")] + missing = [column for column in REQUIRED_MANIFEST_COLUMNS if column not in headers] + if missing: + fail(f"Manifest missing required columns: {', '.join(missing)}") + + rows: list[dict[str, str]] = [] + for line in table_lines[2:]: + cells = [cell.strip() for cell in line.strip("|").split("|")] + if len(cells) != len(headers): + fail(f"Manifest row has {len(cells)} cells but header has {len(headers)}: {line}") + row = dict(zip(headers, cells, strict=True)) + if any(row[column] for column in REQUIRED_MANIFEST_COLUMNS): + rows.append(row) + if not rows: + fail("Manifest has no image rows") + return rows + + +def read_manifest(path: Path, project_dir: Path) -> list[ManifestRow]: + rows = [] + for index, raw in enumerate(parse_markdown_table(path), start=1): + row = ManifestRow(**{column: raw[column] for column in REQUIRED_MANIFEST_COLUMNS}) + if not row.chapter_id: + fail(f"Manifest row {index} has an empty chapter_id") + if row.scan_mode not in {"cover", "contain"}: + fail(f"Manifest row {index} scan_mode must be cover or contain: {row.scan_mode}") + image_path = project_dir / row.file + if not image_path.exists(): + fail(f"Manifest row {index} image file not found: {row.file}") + if image_path.read_bytes()[:8] != PNG_MAGIC: + fail(f"Manifest row {index} image is not a real PNG: {row.file}") + rows.append(row) + return rows + + +def format_time(seconds: float) -> str: + total = max(0, int(round(seconds))) + return f"{total // 60}:{total % 60:02d}" + + +def build_sections(rows: list[ManifestRow], section_duration: float) -> list[Section]: + sections: list[Section] = [] + for index, row in enumerate(rows): + start = round(index * section_duration, 3) + chapter_id = slugify(row.chapter_id, f"chapter-{index + 1}") + title = row.title.strip() + subtitle = row.video_usage.strip() or row.safe_focus.strip() + caption = f"{title}: {subtitle}" if subtitle else title + tags = [ + row.source_type.replace("_", " "), + "long image", + row.scan_mode, + ] + sections.append( + Section( + id=chapter_id, + start=start, + duration=section_duration, + time_label=format_time(start), + timeline_label=title[:8] or f"Chapter {index + 1}", + title=title, + subtitle=subtitle, + tags=tags, + image=row.file, + image_fit=row.scan_mode, + voiceover=f"assets/audio/vo-{index + 1:02d}-{chapter_id}.mp3", + caption=caption, + source_type=row.source_type, + safe_focus=row.safe_focus, + ) + ) + validate_non_overlapping("section", ((s.start, s.duration, s.id) for s in sections)) + return sections + + +def validate_non_overlapping(name: str, clips: Iterable[tuple[float, float, str]]) -> None: + previous_end = -1.0 + previous_id = "" + for start, duration, clip_id in sorted(clips): + if duration <= 0: + fail(f"{name} clip has non-positive duration: {clip_id}") + if start < previous_end - 0.001: + fail(f"{name} clips overlap: {previous_id} and {clip_id}") + previous_end = start + duration + previous_id = clip_id + + +def write_json_config(project_dir: Path, title: str, sections: list[Section]) -> dict: + duration = round(max(s.start + s.duration for s in sections), 3) + config = { + "duration": duration, + "timelineColumns": len(sections), + "canvas": {"width": 1920, "height": 1080}, + "stylePreset": "it-infra-v2-blue-white-two-column-scan", + "title": title, + "sections": [ + { + "id": section.id, + "start": section.start, + "duration": section.duration, + "timeLabel": section.time_label, + "timelineLabel": section.timeline_label, + "title": section.title, + "subtitle": section.subtitle, + "tags": section.tags, + "image": section.image, + "imageFit": section.image_fit, + "voiceover": section.voiceover, + "caption": section.caption, + "sourceType": section.source_type, + "safeFocus": section.safe_focus, + } + for section in sections + ], + "inspectTimes": [round(section.start + section.duration / 2, 3) for section in sections], + } + (project_dir / "video.config.json").write_text( + json.dumps(config, ensure_ascii=False, indent=2) + "\n", + encoding="utf-8", + ) + return config + + +def ensure_project_scaffold(project_dir: Path) -> None: + for relative in ["assets/audio", "assets/images", "renders", "snapshots"]: + (project_dir / relative).mkdir(parents=True, exist_ok=True) + package_json = project_dir / "package.json" + if not package_json.exists(): + package_json.write_text( + json.dumps( + { + "name": "it-infra-evolution-video-v2-project", + "private": True, + "type": "module", + "scripts": { + "lint": f"npx --yes hyperframes@{HYPERFRAMES_VERSION} lint", + "inspect": f"npx --yes hyperframes@{HYPERFRAMES_VERSION} inspect", + "snapshot": f"npx --yes hyperframes@{HYPERFRAMES_VERSION} snapshot", + "render": f"npx --yes hyperframes@{HYPERFRAMES_VERSION} render", + }, + }, + indent=2, + ) + + "\n", + encoding="utf-8", + ) + hyperframes_json = project_dir / "hyperframes.json" + if not hyperframes_json.exists(): + hyperframes_json.write_text( + json.dumps( + { + "$schema": "https://hyperframes.heygen.com/schema/hyperframes.json", + "registry": "https://raw.githubusercontent.com/heygen-com/hyperframes/main/registry", + "paths": { + "blocks": "compositions", + "components": "compositions/components", + "assets": "assets", + }, + }, + indent=2, + ) + + "\n", + encoding="utf-8", + ) + + +def generate_tone_audio(project_dir: Path, sections: list[Section]) -> None: + if not shutil.which("ffmpeg"): + fail("ffmpeg is required for --audio-mode tone") + for index, section in enumerate(sections, start=1): + out = project_dir / section.voiceover + out.parent.mkdir(parents=True, exist_ok=True) + frequency = str(360 + index * 60) + duration = str(max(0.4, section.duration - 0.4)) + run( + [ + "ffmpeg", + "-y", + "-f", + "lavfi", + "-i", + f"sine=frequency={frequency}:duration={duration}", + "-q:a", + "9", + str(out), + ], + project_dir, + capture=True, + ) + bgm = project_dir / "assets/audio/bgm.wav" + total_duration = str(max(s.start + s.duration for s in sections)) + run( + [ + "ffmpeg", + "-y", + "-f", + "lavfi", + "-i", + f"sine=frequency=120:duration={total_duration}", + "-filter:a", + "volume=0.08", + str(bgm), + ], + project_dir, + capture=True, + ) + + +def generate_edge_tts_audio(project_dir: Path, sections: list[Section]) -> None: + if not shutil.which("edge-tts"): + fail("edge-tts is required for production voiceover generation") + for section in sections: + out = project_dir / section.voiceover + out.parent.mkdir(parents=True, exist_ok=True) + run( + [ + "edge-tts", + "--voice", + "zh-CN-YunxiNeural", + "--rate", + "+20%", + "--text", + section.caption, + "--write-media", + str(out), + ], + project_dir, + ) + bgm = project_dir / "assets/audio/bgm.wav" + if not bgm.exists(): + generate_tone_bgm(project_dir, max(s.start + s.duration for s in sections)) + + +def generate_tone_bgm(project_dir: Path, duration: float) -> None: + if not shutil.which("ffmpeg"): + fail("ffmpeg is required to synthesize fallback BGM") + run( + [ + "ffmpeg", + "-y", + "-f", + "lavfi", + "-i", + f"sine=frequency=120:duration={duration}", + "-filter:a", + "volume=0.08", + "assets/audio/bgm.wav", + ], + project_dir, + capture=True, + ) + + +def css() -> str: + return """ + :root { --timeline-columns: 1; } + * { margin: 0; padding: 0; box-sizing: border-box; } + html, body { + width: 1920px; + height: 1080px; + overflow: hidden; + background: #f3faff; + font-family: Inter, "Noto Sans JP", Arial, sans-serif; + color: #07194f; + } + #root { + position: relative; + width: 1920px; + height: 1080px; + overflow: hidden; + background: + radial-gradient(circle at 82% 16%, rgba(73,217,255,0.26), transparent 30%), + radial-gradient(circle at 10% 92%, rgba(21,91,255,0.15), transparent 28%), + linear-gradient(135deg, #ffffff 0%, #f3faff 46%, #dceeff 100%); + } + .clip { position: absolute; overflow: hidden; } + .scene { inset: 0; opacity: 0; } + .topbar { + position: absolute; z-index: 40; top: 42px; left: 72px; right: 72px; + display: flex; justify-content: space-between; align-items: center; + font-size: 26px; font-weight: 850; color: rgba(7,25,79,0.82); + } + .brand-pill { + display: inline-flex; gap: 14px; align-items: center; padding: 14px 24px; + border-radius: 999px; color: #fff; background: linear-gradient(135deg, #155bff, #18bfa6); + box-shadow: 0 16px 40px rgba(21,91,255,0.22); + } + .brand-dot { width: 14px; height: 14px; border-radius: 50%; background: #fff; box-shadow: 0 0 24px #49d9ff; } + .scene-content { + width: 100%; height: 100%; padding: 92px 104px 214px; + display: grid; grid-template-columns: 0.96fr 1.04fr; gap: 58px; align-items: center; + } + .image-panel { + position: relative; width: 100%; height: 760px; border-radius: 34px; overflow: hidden; + background: #fff; border: 1px solid rgba(21,91,255,0.16); + box-shadow: 0 34px 90px rgba(17,60,128,0.18); + } + .image-panel img { + position: absolute; inset: 0; width: 100%; height: 100%; object-fit: cover; + object-position: center top; filter: saturate(1.06) contrast(1.03); + } + .image-panel.contain img { object-fit: contain; padding: 20px; background: #fff; } + .copy { position: relative; z-index: 2; display: flex; flex-direction: column; gap: 26px; } + .kicker { + width: max-content; max-width: 100%; color: #155bff; background: rgba(21,91,255,0.08); + border: 1px solid rgba(21,91,255,0.22); border-radius: 999px; padding: 12px 22px; + font-size: 27px; font-weight: 950; + } + h1, h2 { font-size: 64px; line-height: 1.08; font-weight: 950; letter-spacing: 0; } + .lead { font-size: 32px; line-height: 1.55; font-weight: 760; color: rgba(7,25,79,0.78); } + .tag-row { display: flex; gap: 14px; flex-wrap: wrap; } + .tag { + padding: 12px 16px; border-radius: 16px; color: #07194f; background: rgba(255,255,255,0.76); + border: 1px solid rgba(73,217,255,0.34); box-shadow: 0 14px 30px rgba(17,60,128,0.08); + font-size: 24px; font-weight: 900; + } + .caption { + position: absolute; z-index: 55; left: 320px; right: 320px; bottom: 132px; min-height: 78px; + display: grid; place-items: center; padding: 14px 32px; border-radius: 26px; + background: rgba(7,25,79,0.88); color: #fff; font-size: 32px; line-height: 1.34; + font-weight: 850; text-align: center; box-shadow: 0 20px 55px rgba(7,25,79,0.26); opacity: 0; + } + .timeline { + position: absolute; z-index: 52; left: 70px; right: 70px; bottom: 28px; height: 88px; + padding: 12px 14px 18px; display: grid; grid-template-columns: repeat(var(--timeline-columns), minmax(0, 1fr)); + gap: 10px; align-items: center; overflow: visible; border-radius: 30px; + background: rgba(255,255,255,0.76); border: 1px solid rgba(21,91,255,0.15); + box-shadow: 0 20px 60px rgba(17,60,128,0.14); backdrop-filter: blur(12px); + } + .timeline-fill { position: absolute; left: 22px; right: 22px; bottom: 8px; height: 8px; border-radius: 999px; background: rgba(7,25,79,0.12); overflow: hidden; } + .timeline-progress { display: block; width: 0%; height: 100%; border-radius: inherit; background: linear-gradient(90deg, #155bff, #49d9ff, #18bfa6); } + .chapter-tag { + position: relative; z-index: 2; min-width: 0; height: 50px; display: flex; align-items: center; justify-content: center; + gap: 8px; padding: 0 10px; border-radius: 18px; background: rgba(255,255,255,0.72); + border: 1px solid rgba(21,91,255,0.18); color: rgba(7,25,79,0.74); font-size: 20px; + line-height: 1; font-weight: 900; white-space: nowrap; box-shadow: 0 12px 26px rgba(17,60,128,0.08); + } + .chapter-time { color: #155bff; font-variant-numeric: tabular-nums; } + .chapter-title { overflow: hidden; text-overflow: ellipsis; } + .chapter-tag.active { color: #fff; background: linear-gradient(135deg, #155bff, #18bfa6); border-color: rgba(255,255,255,0.62); box-shadow: 0 20px 42px rgba(21,91,255,0.28); } + .chapter-tag.active .chapter-time { color: #fff; } + .glow-line { position: absolute; width: 560px; height: 560px; border-radius: 50%; border: 2px solid rgba(73,217,255,0.34); right: -160px; top: -150px; } + """ + + +def js_array(values: list[str | float]) -> str: + return json.dumps(values, ensure_ascii=False) + + +def write_html(project_dir: Path, title: str, config: dict) -> None: + sections = config["sections"] + duration = config["duration"] + timeline_columns = config["timelineColumns"] + + scene_html = [] + caption_html = [] + chapter_html = [] + audio_html = [ + f'' + ] + + for index, section in enumerate(sections): + scene_id = f"scene-{section['id']}" + cap_id = f"cap-{index + 1:02d}-{section['id']}" + vo_id = f"vo-{index + 1:02d}-{section['id']}" + image_panel_class = "image-panel contain" if section["imageFit"] == "contain" else "image-panel" + tags = "".join(f'{html.escape(tag)}' for tag in section["tags"]) + heading_tag = "h1" if index == 0 else "h2" + caption_start = round(section["start"] + 0.2, 3) + caption_duration = round(max(0.4, section["duration"] - 0.4), 3) + scene_html.append( + f""" +
+
+
{html.escape(section['title'])}
+
+
{html.escape(section['timeLabel'])} / {html.escape(section['timelineLabel'])}
+ <{heading_tag}>{html.escape(section['title'])} +

{html.escape(section['subtitle'])}

+
{tags}
+
+
+
""" + ) + caption_html.append( + f'
{html.escape(section["caption"])}
' + ) + chapter_html.append( + f'
{html.escape(section["timeLabel"])}{html.escape(section["timelineLabel"])}
' + ) + audio_html.append( + f'' + ) + + starts = [section["start"] for section in sections] + durations = [section["duration"] for section in sections] + scenes = [f"#scene-{section['id']}" for section in sections] + captions = [f"#cap-{index + 1:02d}-{section['id']}" for index, section in enumerate(sections)] + chapters = [f"#chapter-{index}" for index in range(len(sections))] + + index_html = f""" + + + + + + + + +
+
+
+
{html.escape(title)}
+
PNG manifest -> HyperFrames -> MP4
+
+ {''.join(scene_html)} + {''.join(caption_html)} +
+ {''.join(chapter_html)} +
+
+ {''.join(audio_html)} +
+ + + +""" + (project_dir / "index.html").write_text(index_html, encoding="utf-8") + + +def doctor(audio_mode: str, run_acceptance: bool) -> None: + required = ["ffmpeg", "ffprobe", "npx"] + if audio_mode == "edge-tts": + required.append("edge-tts") + missing = [tool for tool in required if not shutil.which(tool)] + if missing: + fail(f"Missing required tool(s): {', '.join(missing)}") + if run_acceptance and not shutil.which("npx"): + fail("npx is required to run HyperFrames acceptance") + + +def run_acceptance(project_dir: Path, config: dict, output_name: str) -> None: + inspect_at = ",".join(str(value) for value in config["inspectTimes"]) + output_path = f"renders/{output_name}" + run(["npx", "--yes", f"hyperframes@{HYPERFRAMES_VERSION}", "lint"], project_dir) + run(["npx", "--yes", f"hyperframes@{HYPERFRAMES_VERSION}", "inspect", "--at", inspect_at], project_dir) + run(["npx", "--yes", f"hyperframes@{HYPERFRAMES_VERSION}", "snapshot", "--at", inspect_at], project_dir) + run(["npx", "--yes", f"hyperframes@{HYPERFRAMES_VERSION}", "render", "--output", output_path, "--quality", "standard"], project_dir) + probe = run( + [ + "ffprobe", + "-v", + "quiet", + "-show_entries", + "format=duration,size:stream=codec_type,width,height,r_frame_rate", + "-of", + "json", + output_path, + ], + project_dir, + capture=True, + ) + probe_json = json.loads(probe.stdout or "{}") + streams = probe_json.get("streams", []) + has_video = any(stream.get("codec_type") == "video" for stream in streams) + has_audio = any(stream.get("codec_type") == "audio" for stream in streams) + video_stream = next((stream for stream in streams if stream.get("codec_type") == "video"), {}) + if not has_video or not has_audio: + fail("ffprobe acceptance failed: rendered MP4 must contain video and audio streams") + if video_stream.get("width") != 1920 or video_stream.get("height") != 1080: + fail(f"ffprobe acceptance failed: expected 1920x1080, got {video_stream.get('width')}x{video_stream.get('height')}") + actual_duration = float(probe_json.get("format", {}).get("duration", 0) or 0) + expected_duration = float(config["duration"]) + tolerance = max(3.0, expected_duration * 0.15) + if abs(actual_duration - expected_duration) > tolerance: + fail( + "ffprobe acceptance failed: " + f"expected duration near {expected_duration:.3f}s, got {actual_duration:.3f}s" + ) + (project_dir / "ffprobe.json").write_text(json.dumps(probe_json, indent=2) + "\n", encoding="utf-8") + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--project-dir", type=Path, default=Path.cwd(), help="HyperFrames project directory") + parser.add_argument("--manifest", type=Path, default=None, help="PNG manifest path") + parser.add_argument("--title", default="IT 基础设施长图讲解视频", help="Video title") + parser.add_argument("--section-duration", type=float, default=8.0, help="Seconds per manifest row") + parser.add_argument("--audio-mode", choices=["edge-tts", "tone", "none"], default="edge-tts") + parser.add_argument("--run-acceptance", action="store_true", help="Run lint/inspect/snapshot/render/ffprobe") + parser.add_argument("--output-name", default="it-infra-evolution.mp4", help="Rendered MP4 file name") + args = parser.parse_args(argv) + + try: + project_dir = args.project_dir.resolve() + manifest = (args.manifest or project_dir / "assets/images/manifest.md").resolve() + ensure_project_scaffold(project_dir) + doctor(args.audio_mode, args.run_acceptance) + rows = read_manifest(manifest, project_dir) + sections = build_sections(rows, args.section_duration) + config = write_json_config(project_dir, args.title, sections) + if args.audio_mode == "edge-tts": + generate_edge_tts_audio(project_dir, sections) + elif args.audio_mode == "tone": + generate_tone_audio(project_dir, sections) + elif not (project_dir / "assets/audio/bgm.wav").exists(): + fail("--audio-mode none requires existing assets/audio/bgm.wav") + write_html(project_dir, args.title, config) + if args.run_acceptance: + run_acceptance(project_dir, config, args.output_name) + print("Build complete. Required task artifacts: index.html, video.config.json, assets/images/manifest.md, assets/audio/, renders/ or run with --run-acceptance.") + return 0 + except (BuildError, subprocess.CalledProcessError, json.JSONDecodeError) as exc: + print(f"Build failed: {exc}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/skills/it-infra-continuous-png/SKILL.md b/skills/it-infra-continuous-png/SKILL.md index 0dfb727..89ddda7 100644 --- a/skills/it-infra-continuous-png/SKILL.md +++ b/skills/it-infra-continuous-png/SKILL.md @@ -81,6 +81,23 @@ description: "生成 IT 基础设施系列连续风格 PNG 图片。适用于一 `it-infra-evolution-video` 不应重新发明这些图片的风格,只读取 manifest 并作为真实长图素材使用。 +当任务还选择了 `it-infra-evolution-video-v2` 时,本 skill 完成后必须停在清晰的交接点: + +1. 确认 `assets/images/*.png` 的数量与 manifest 数据行数量一致。 +2. 确认每个 `file` 指向真实 PNG 文件,而不是 SVG、空文件或占位路径。 +3. 将下一步命令写给视频 skill: + +```bash +python3 /path/to/ai-video-skills/scripts/build_it_infra_video.py \ + --project-dir . \ + --title "<用户主题>" \ + --audio-mode edge-tts \ + --run-acceptance \ + --output-name ".mp4" +``` + +不要在本 skill 中生成 `index.html`、`video.config.json` 或 MP4;这些是视频 skill 的职责。 + ## 参考文件 - 风格规范:`references/style-spec.md` diff --git a/skills/it-infra-evolution-video-v2/SKILL.md b/skills/it-infra-evolution-video-v2/SKILL.md new file mode 100644 index 0000000..0fba663 --- /dev/null +++ b/skills/it-infra-evolution-video-v2/SKILL.md @@ -0,0 +1,73 @@ +--- +name: it-infra-evolution-video-v2 +version: "v2" +description: "从 it-infra-continuous-png 的真实 PNG manifest 生成 IT 基础设施长图讲解视频。强制执行 manifest -> video.config.json -> index.html -> audio -> HyperFrames acceptance -> MP4 -> ffprobe 的闭环。" +--- + +# IT 基础设施长图讲解视频 v2 + +本 skill 是 `it-infra-evolution-video` 的可执行 v2 路径。v1 模板保持 frozen;v2 的主路径必须通过仓库 runner 完成,不再让 Agent 临时手写 `generate_index.py` 或自由拼接模板片段。 + +## 调用前置条件 + +必须先完成 `it-infra-continuous-png`: + +- `assets/images/*.png` 存在,且每个文件是真实 PNG。 +- `assets/images/manifest.md` 存在。 +- manifest 每一行都包含 `chapter_id`、`title`、`file`、`source_type`、`video_usage`、`scan_mode`、`safe_focus`。 + +缺少这些输入时,不要继续生成视频,不要用 CSS 卡片、假截图或 SVG 冒充 PNG。 + +## 标准调用 + +在当前任务工作目录或视频项目目录执行: + +```bash +python3 /path/to/ai-video-skills/scripts/build_it_infra_video.py \ + --project-dir . \ + --title "云原生 Service Mesh 网络科普视频" \ + --audio-mode edge-tts \ + --run-acceptance \ + --output-name service-mesh-video.mp4 +``` + +OpenClaw 任务中如果同时选择了 `it-infra-continuous-png` 和 `it-infra-evolution-video-v2`,必须按以下顺序执行: + +1. 先用 `it-infra-continuous-png` 生成多张 PNG 和 manifest。 +2. 再用本 skill 的 runner 读取 manifest。 +3. 最后把 `renders/service-mesh-video.mp4`、`video.config.json`、`assets/images/manifest.md`、`ffprobe.json` 留在当前 task workspace。 + +## Runner 合同 + +runner 负责: + +- 解析并校验 manifest。 +- 拒绝缺失图片、伪 PNG、缺失列、非法 `scan_mode`。 +- 生成唯一 ID 的 `index.html`。 +- 保证 scene、caption、voiceover 在各自 track 上不重叠。 +- 只保留一个全局 BGM 音轨。 +- 生成 `video.config.json` 和 `inspectTimes`。 +- 执行 `lint -> inspect -> snapshot -> render -> ffprobe`。 + +生产模式默认 `--audio-mode edge-tts`。本地测试或无网络 dry-run 可以使用 `--audio-mode tone`,但不能把 tone 输出当作正式口播成片。 + +## 验收标准 + +只有以下文件都存在,才能在 XWorkmate/OpenClaw 中报告完成: + +- `index.html` +- `video.config.json` +- `assets/images/manifest.md` +- `assets/audio/*.mp3` +- `assets/audio/bgm.wav` +- `renders/.mp4` +- `ffprobe.json` + +`ffprobe.json` 必须显示: + +- 分辨率为 `1920x1080` +- 有 video stream +- 有 audio stream +- 时长接近 `video.config.json` 的 `duration` + +如果 HyperFrames 或 ffprobe 任一阶段失败,只输出失败阶段和原因,不输出“完成”。 diff --git a/tests/fixtures/it-infra-chain/assets/images/manifest.md b/tests/fixtures/it-infra-chain/assets/images/manifest.md new file mode 100644 index 0000000..981931d --- /dev/null +++ b/tests/fixtures/it-infra-chain/assets/images/manifest.md @@ -0,0 +1,7 @@ +# Fixture Image Manifest + +| chapter_id | title | file | source_type | video_usage | scan_mode | safe_focus | +| --- | --- | --- | --- | --- | --- | --- | +| service-mesh-control-plane | 控制平面 | assets/images/001-control-plane.png | generated_from_description | 解释 Service Mesh 如何下发流量治理策略 | contain | center diagram and top title | +| service-mesh-data-plane | 数据平面 | assets/images/002-data-plane.png | generated_from_description | 解释 Sidecar 如何接管东西向流量 | cover | middle service nodes | +| service-mesh-observability | 可观测性 | assets/images/003-observability.png | generated_from_description | 解释指标、日志和链路追踪如何汇总 | contain | bottom telemetry cards | diff --git a/tests/test_build_it_infra_video.py b/tests/test_build_it_infra_video.py new file mode 100644 index 0000000..f79d8f1 --- /dev/null +++ b/tests/test_build_it_infra_video.py @@ -0,0 +1,112 @@ +import importlib.util +import json +import shutil +import struct +import sys +import tempfile +import unittest +import zlib +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +SCRIPT = ROOT / "scripts/build_it_infra_video.py" +FIXTURE = ROOT / "tests/fixtures/it-infra-chain" + + +spec = importlib.util.spec_from_file_location("build_it_infra_video", SCRIPT) +runner = importlib.util.module_from_spec(spec) +assert spec.loader is not None +sys.modules[spec.name] = runner +spec.loader.exec_module(runner) + + +def write_png(path: Path, rgb: tuple[int, int, int]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + width = height = 16 + raw = b"".join(b"\x00" + bytes(rgb) * width for _ in range(height)) + + def chunk(kind: bytes, data: bytes) -> bytes: + return ( + struct.pack(">I", len(data)) + + kind + + data + + struct.pack(">I", zlib.crc32(kind + data) & 0xFFFFFFFF) + ) + + path.write_bytes( + b"\x89PNG\r\n\x1a\n" + + chunk(b"IHDR", struct.pack(">IIBBBBB", width, height, 8, 2, 0, 0, 0)) + + chunk(b"IDAT", zlib.compress(raw)) + + chunk(b"IEND", b"") + ) + + +def copy_fixture(tmp_path: Path) -> Path: + project = tmp_path / "project" + shutil.copytree(FIXTURE, project) + write_png(project / "assets/images/001-control-plane.png", (21, 91, 255)) + write_png(project / "assets/images/002-data-plane.png", (24, 191, 166)) + write_png(project / "assets/images/003-observability.png", (73, 217, 255)) + return project + + +class BuildItInfraVideoTest(unittest.TestCase): + def test_manifest_drives_config_and_html_without_duplicate_ids(self): + with tempfile.TemporaryDirectory() as tmp: + project = copy_fixture(Path(tmp)) + + code = runner.main( + [ + "--project-dir", + str(project), + "--title", + "Service Mesh fixture", + "--audio-mode", + "none", + ] + ) + + self.assertEqual(code, 1) + self.assertFalse((project / "index.html").exists()) + + code = runner.main( + [ + "--project-dir", + str(project), + "--title", + "Service Mesh fixture", + "--audio-mode", + "tone", + "--section-duration", + "1.2", + ] + ) + + self.assertEqual(code, 0) + config = json.loads((project / "video.config.json").read_text(encoding="utf-8")) + html = (project / "index.html").read_text(encoding="utf-8") + + self.assertEqual(config["timelineColumns"], 3) + self.assertEqual(len(config["sections"]), 3) + self.assertEqual(config["sections"][0]["image"], "assets/images/001-control-plane.png") + self.assertEqual(config["sections"][1]["start"], 1.2) + self.assertEqual(html.count('id="bgm"'), 1) + self.assertEqual(html.count('id="scene-service-mesh-control-plane"'), 1) + self.assertEqual(html.count('data-track-index="1"'), 3) + self.assertEqual(html.count('data-track-index="5"'), 3) + + def test_rejects_manifest_image_that_is_not_real_png(self): + with tempfile.TemporaryDirectory() as tmp: + project = copy_fixture(Path(tmp)) + (project / "assets/images/002-data-plane.png").write_text("", encoding="utf-8") + + rows = runner.parse_markdown_table(project / "assets/images/manifest.md") + self.assertEqual(len(rows), 3) + + with self.assertRaisesRegex(runner.BuildError, "not a real PNG"): + runner.read_manifest(project / "assets/images/manifest.md", project) + + +if __name__ == "__main__": + unittest.main()