Add deterministic IT infra video skill chain
This commit is contained in:
parent
03645d5bfd
commit
dc3719fee2
20
README.md
20
README.md
@ -16,6 +16,7 @@
|
||||
| AI 信息差快报 | 新闻检索、素材匹配、口播字幕、视频渲染 | `skills/ai-tech-news-video/SKILL.md` |
|
||||
| IT 基础设施连续 PNG | 根据描述或参考图生成 1-N 张连续风格竖版 PNG 素材 | `skills/it-infra-continuous-png/SKILL.md` |
|
||||
| IT 基础设施长图讲解视频 | 基于长图素材生成 HyperFrames 讲解视频、口播、字幕和 timeline | `skills/it-infra-evolution-video/SKILL.md` |
|
||||
| IT 基础设施长图讲解视频 v2 | 从 PNG manifest 强制生成配置、HTML、音频、验收和 MP4 | `skills/it-infra-evolution-video-v2/SKILL.md` |
|
||||
| 产品介绍视频 | 官网信息提炼、叙事结构、成片节奏 | `skills/product-intro-video/SKILL.md` |
|
||||
| 视频音效工作流 | 音效搜索、下载与合成、时间线接入 | `skills/sound-fx-for-video/SKILL.md` |
|
||||
| 简笔画动画视频 | 线稿风 + 短画面字;**主动网络搜参考图临摹**;逼真非抽象;GSAP 主时间线 + 可选 Anime.js;抽检闭环 | `skills/sketch-animation-video/SKILL.md` |
|
||||
@ -50,6 +51,25 @@
|
||||
2. 按文档准备素材、音频与脚本
|
||||
3. 在项目中执行渲染与抽检流程
|
||||
|
||||
### IT 基础设施 PNG -> 视频闭环
|
||||
|
||||
当一个任务同时需要 `it-infra-continuous-png` 和 `it-infra-evolution-video` 时,优先使用 v2 链路:
|
||||
|
||||
1. `it-infra-continuous-png` 先输出 `assets/images/*.png` 和 `assets/images/manifest.md`
|
||||
2. `it-infra-evolution-video-v2` 读取 manifest,并调用 `scripts/build_it_infra_video.py`
|
||||
3. 任务目录中必须留下 `video.config.json`、`index.html`、`renders/*.mp4`、`ffprobe.json`
|
||||
|
||||
示例:
|
||||
|
||||
```bash
|
||||
python3 scripts/build_it_infra_video.py \
|
||||
--project-dir /path/to/task/service-mesh-video \
|
||||
--title "云原生 Service Mesh 网络科普视频" \
|
||||
--audio-mode edge-tts \
|
||||
--run-acceptance \
|
||||
--output-name service-mesh-video.mp4
|
||||
```
|
||||
|
||||
## 账号信息
|
||||
|
||||
- 名称:拓扑同学
|
||||
|
||||
664
scripts/build_it_infra_video.py
Executable file
664
scripts/build_it_infra_video.py
Executable file
@ -0,0 +1,664 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Build an IT infrastructure explainer video project from a PNG manifest.
|
||||
|
||||
The runner is intentionally deterministic: it turns a manifest produced by
|
||||
it-infra-continuous-png into one HyperFrames project, validates clip timing, and
|
||||
optionally runs the HyperFrames/ffprobe acceptance chain.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import html
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
REQUIRED_MANIFEST_COLUMNS = [
|
||||
"chapter_id",
|
||||
"title",
|
||||
"file",
|
||||
"source_type",
|
||||
"video_usage",
|
||||
"scan_mode",
|
||||
"safe_focus",
|
||||
]
|
||||
|
||||
PNG_MAGIC = b"\x89PNG\r\n\x1a\n"
|
||||
HYPERFRAMES_VERSION = "0.6.15"
|
||||
|
||||
|
||||
class BuildError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ManifestRow:
|
||||
chapter_id: str
|
||||
title: str
|
||||
file: str
|
||||
source_type: str
|
||||
video_usage: str
|
||||
scan_mode: str
|
||||
safe_focus: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Section:
|
||||
id: str
|
||||
start: float
|
||||
duration: float
|
||||
time_label: str
|
||||
timeline_label: str
|
||||
title: str
|
||||
subtitle: str
|
||||
tags: list[str]
|
||||
image: str
|
||||
image_fit: str
|
||||
voiceover: str
|
||||
caption: str
|
||||
source_type: str
|
||||
safe_focus: str
|
||||
|
||||
|
||||
def fail(message: str) -> None:
|
||||
raise BuildError(message)
|
||||
|
||||
|
||||
def run(cmd: list[str], cwd: Path, *, capture: bool = False) -> subprocess.CompletedProcess[str]:
|
||||
print("+ " + " ".join(cmd), flush=True)
|
||||
return subprocess.run(
|
||||
cmd,
|
||||
cwd=str(cwd),
|
||||
check=True,
|
||||
text=True,
|
||||
stdout=subprocess.PIPE if capture else None,
|
||||
stderr=subprocess.STDOUT if capture else None,
|
||||
)
|
||||
|
||||
|
||||
def slugify(value: str, fallback: str) -> str:
|
||||
value = value.strip().lower()
|
||||
value = re.sub(r"[^a-z0-9_-]+", "-", value)
|
||||
value = re.sub(r"-{2,}", "-", value).strip("-")
|
||||
return value or fallback
|
||||
|
||||
|
||||
def parse_markdown_table(path: Path) -> list[dict[str, str]]:
|
||||
if not path.exists():
|
||||
fail(f"Manifest not found: {path}")
|
||||
lines = [line.strip() for line in path.read_text(encoding="utf-8").splitlines()]
|
||||
table_lines = [line for line in lines if line.startswith("|") and line.endswith("|")]
|
||||
if len(table_lines) < 3:
|
||||
fail(f"Manifest must contain a markdown table with data rows: {path}")
|
||||
|
||||
headers = [cell.strip() for cell in table_lines[0].strip("|").split("|")]
|
||||
missing = [column for column in REQUIRED_MANIFEST_COLUMNS if column not in headers]
|
||||
if missing:
|
||||
fail(f"Manifest missing required columns: {', '.join(missing)}")
|
||||
|
||||
rows: list[dict[str, str]] = []
|
||||
for line in table_lines[2:]:
|
||||
cells = [cell.strip() for cell in line.strip("|").split("|")]
|
||||
if len(cells) != len(headers):
|
||||
fail(f"Manifest row has {len(cells)} cells but header has {len(headers)}: {line}")
|
||||
row = dict(zip(headers, cells, strict=True))
|
||||
if any(row[column] for column in REQUIRED_MANIFEST_COLUMNS):
|
||||
rows.append(row)
|
||||
if not rows:
|
||||
fail("Manifest has no image rows")
|
||||
return rows
|
||||
|
||||
|
||||
def read_manifest(path: Path, project_dir: Path) -> list[ManifestRow]:
|
||||
rows = []
|
||||
for index, raw in enumerate(parse_markdown_table(path), start=1):
|
||||
row = ManifestRow(**{column: raw[column] for column in REQUIRED_MANIFEST_COLUMNS})
|
||||
if not row.chapter_id:
|
||||
fail(f"Manifest row {index} has an empty chapter_id")
|
||||
if row.scan_mode not in {"cover", "contain"}:
|
||||
fail(f"Manifest row {index} scan_mode must be cover or contain: {row.scan_mode}")
|
||||
image_path = project_dir / row.file
|
||||
if not image_path.exists():
|
||||
fail(f"Manifest row {index} image file not found: {row.file}")
|
||||
if image_path.read_bytes()[:8] != PNG_MAGIC:
|
||||
fail(f"Manifest row {index} image is not a real PNG: {row.file}")
|
||||
rows.append(row)
|
||||
return rows
|
||||
|
||||
|
||||
def format_time(seconds: float) -> str:
|
||||
total = max(0, int(round(seconds)))
|
||||
return f"{total // 60}:{total % 60:02d}"
|
||||
|
||||
|
||||
def build_sections(rows: list[ManifestRow], section_duration: float) -> list[Section]:
|
||||
sections: list[Section] = []
|
||||
for index, row in enumerate(rows):
|
||||
start = round(index * section_duration, 3)
|
||||
chapter_id = slugify(row.chapter_id, f"chapter-{index + 1}")
|
||||
title = row.title.strip()
|
||||
subtitle = row.video_usage.strip() or row.safe_focus.strip()
|
||||
caption = f"{title}: {subtitle}" if subtitle else title
|
||||
tags = [
|
||||
row.source_type.replace("_", " "),
|
||||
"long image",
|
||||
row.scan_mode,
|
||||
]
|
||||
sections.append(
|
||||
Section(
|
||||
id=chapter_id,
|
||||
start=start,
|
||||
duration=section_duration,
|
||||
time_label=format_time(start),
|
||||
timeline_label=title[:8] or f"Chapter {index + 1}",
|
||||
title=title,
|
||||
subtitle=subtitle,
|
||||
tags=tags,
|
||||
image=row.file,
|
||||
image_fit=row.scan_mode,
|
||||
voiceover=f"assets/audio/vo-{index + 1:02d}-{chapter_id}.mp3",
|
||||
caption=caption,
|
||||
source_type=row.source_type,
|
||||
safe_focus=row.safe_focus,
|
||||
)
|
||||
)
|
||||
validate_non_overlapping("section", ((s.start, s.duration, s.id) for s in sections))
|
||||
return sections
|
||||
|
||||
|
||||
def validate_non_overlapping(name: str, clips: Iterable[tuple[float, float, str]]) -> None:
|
||||
previous_end = -1.0
|
||||
previous_id = ""
|
||||
for start, duration, clip_id in sorted(clips):
|
||||
if duration <= 0:
|
||||
fail(f"{name} clip has non-positive duration: {clip_id}")
|
||||
if start < previous_end - 0.001:
|
||||
fail(f"{name} clips overlap: {previous_id} and {clip_id}")
|
||||
previous_end = start + duration
|
||||
previous_id = clip_id
|
||||
|
||||
|
||||
def write_json_config(project_dir: Path, title: str, sections: list[Section]) -> dict:
|
||||
duration = round(max(s.start + s.duration for s in sections), 3)
|
||||
config = {
|
||||
"duration": duration,
|
||||
"timelineColumns": len(sections),
|
||||
"canvas": {"width": 1920, "height": 1080},
|
||||
"stylePreset": "it-infra-v2-blue-white-two-column-scan",
|
||||
"title": title,
|
||||
"sections": [
|
||||
{
|
||||
"id": section.id,
|
||||
"start": section.start,
|
||||
"duration": section.duration,
|
||||
"timeLabel": section.time_label,
|
||||
"timelineLabel": section.timeline_label,
|
||||
"title": section.title,
|
||||
"subtitle": section.subtitle,
|
||||
"tags": section.tags,
|
||||
"image": section.image,
|
||||
"imageFit": section.image_fit,
|
||||
"voiceover": section.voiceover,
|
||||
"caption": section.caption,
|
||||
"sourceType": section.source_type,
|
||||
"safeFocus": section.safe_focus,
|
||||
}
|
||||
for section in sections
|
||||
],
|
||||
"inspectTimes": [round(section.start + section.duration / 2, 3) for section in sections],
|
||||
}
|
||||
(project_dir / "video.config.json").write_text(
|
||||
json.dumps(config, ensure_ascii=False, indent=2) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
def ensure_project_scaffold(project_dir: Path) -> None:
|
||||
for relative in ["assets/audio", "assets/images", "renders", "snapshots"]:
|
||||
(project_dir / relative).mkdir(parents=True, exist_ok=True)
|
||||
package_json = project_dir / "package.json"
|
||||
if not package_json.exists():
|
||||
package_json.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"name": "it-infra-evolution-video-v2-project",
|
||||
"private": True,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"lint": f"npx --yes hyperframes@{HYPERFRAMES_VERSION} lint",
|
||||
"inspect": f"npx --yes hyperframes@{HYPERFRAMES_VERSION} inspect",
|
||||
"snapshot": f"npx --yes hyperframes@{HYPERFRAMES_VERSION} snapshot",
|
||||
"render": f"npx --yes hyperframes@{HYPERFRAMES_VERSION} render",
|
||||
},
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
+ "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
hyperframes_json = project_dir / "hyperframes.json"
|
||||
if not hyperframes_json.exists():
|
||||
hyperframes_json.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"$schema": "https://hyperframes.heygen.com/schema/hyperframes.json",
|
||||
"registry": "https://raw.githubusercontent.com/heygen-com/hyperframes/main/registry",
|
||||
"paths": {
|
||||
"blocks": "compositions",
|
||||
"components": "compositions/components",
|
||||
"assets": "assets",
|
||||
},
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
+ "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def generate_tone_audio(project_dir: Path, sections: list[Section]) -> None:
|
||||
if not shutil.which("ffmpeg"):
|
||||
fail("ffmpeg is required for --audio-mode tone")
|
||||
for index, section in enumerate(sections, start=1):
|
||||
out = project_dir / section.voiceover
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
frequency = str(360 + index * 60)
|
||||
duration = str(max(0.4, section.duration - 0.4))
|
||||
run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-f",
|
||||
"lavfi",
|
||||
"-i",
|
||||
f"sine=frequency={frequency}:duration={duration}",
|
||||
"-q:a",
|
||||
"9",
|
||||
str(out),
|
||||
],
|
||||
project_dir,
|
||||
capture=True,
|
||||
)
|
||||
bgm = project_dir / "assets/audio/bgm.wav"
|
||||
total_duration = str(max(s.start + s.duration for s in sections))
|
||||
run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-f",
|
||||
"lavfi",
|
||||
"-i",
|
||||
f"sine=frequency=120:duration={total_duration}",
|
||||
"-filter:a",
|
||||
"volume=0.08",
|
||||
str(bgm),
|
||||
],
|
||||
project_dir,
|
||||
capture=True,
|
||||
)
|
||||
|
||||
|
||||
def generate_edge_tts_audio(project_dir: Path, sections: list[Section]) -> None:
|
||||
if not shutil.which("edge-tts"):
|
||||
fail("edge-tts is required for production voiceover generation")
|
||||
for section in sections:
|
||||
out = project_dir / section.voiceover
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
run(
|
||||
[
|
||||
"edge-tts",
|
||||
"--voice",
|
||||
"zh-CN-YunxiNeural",
|
||||
"--rate",
|
||||
"+20%",
|
||||
"--text",
|
||||
section.caption,
|
||||
"--write-media",
|
||||
str(out),
|
||||
],
|
||||
project_dir,
|
||||
)
|
||||
bgm = project_dir / "assets/audio/bgm.wav"
|
||||
if not bgm.exists():
|
||||
generate_tone_bgm(project_dir, max(s.start + s.duration for s in sections))
|
||||
|
||||
|
||||
def generate_tone_bgm(project_dir: Path, duration: float) -> None:
|
||||
if not shutil.which("ffmpeg"):
|
||||
fail("ffmpeg is required to synthesize fallback BGM")
|
||||
run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-f",
|
||||
"lavfi",
|
||||
"-i",
|
||||
f"sine=frequency=120:duration={duration}",
|
||||
"-filter:a",
|
||||
"volume=0.08",
|
||||
"assets/audio/bgm.wav",
|
||||
],
|
||||
project_dir,
|
||||
capture=True,
|
||||
)
|
||||
|
||||
|
||||
def css() -> str:
|
||||
return """
|
||||
:root { --timeline-columns: 1; }
|
||||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||
html, body {
|
||||
width: 1920px;
|
||||
height: 1080px;
|
||||
overflow: hidden;
|
||||
background: #f3faff;
|
||||
font-family: Inter, "Noto Sans JP", Arial, sans-serif;
|
||||
color: #07194f;
|
||||
}
|
||||
#root {
|
||||
position: relative;
|
||||
width: 1920px;
|
||||
height: 1080px;
|
||||
overflow: hidden;
|
||||
background:
|
||||
radial-gradient(circle at 82% 16%, rgba(73,217,255,0.26), transparent 30%),
|
||||
radial-gradient(circle at 10% 92%, rgba(21,91,255,0.15), transparent 28%),
|
||||
linear-gradient(135deg, #ffffff 0%, #f3faff 46%, #dceeff 100%);
|
||||
}
|
||||
.clip { position: absolute; overflow: hidden; }
|
||||
.scene { inset: 0; opacity: 0; }
|
||||
.topbar {
|
||||
position: absolute; z-index: 40; top: 42px; left: 72px; right: 72px;
|
||||
display: flex; justify-content: space-between; align-items: center;
|
||||
font-size: 26px; font-weight: 850; color: rgba(7,25,79,0.82);
|
||||
}
|
||||
.brand-pill {
|
||||
display: inline-flex; gap: 14px; align-items: center; padding: 14px 24px;
|
||||
border-radius: 999px; color: #fff; background: linear-gradient(135deg, #155bff, #18bfa6);
|
||||
box-shadow: 0 16px 40px rgba(21,91,255,0.22);
|
||||
}
|
||||
.brand-dot { width: 14px; height: 14px; border-radius: 50%; background: #fff; box-shadow: 0 0 24px #49d9ff; }
|
||||
.scene-content {
|
||||
width: 100%; height: 100%; padding: 92px 104px 214px;
|
||||
display: grid; grid-template-columns: 0.96fr 1.04fr; gap: 58px; align-items: center;
|
||||
}
|
||||
.image-panel {
|
||||
position: relative; width: 100%; height: 760px; border-radius: 34px; overflow: hidden;
|
||||
background: #fff; border: 1px solid rgba(21,91,255,0.16);
|
||||
box-shadow: 0 34px 90px rgba(17,60,128,0.18);
|
||||
}
|
||||
.image-panel img {
|
||||
position: absolute; inset: 0; width: 100%; height: 100%; object-fit: cover;
|
||||
object-position: center top; filter: saturate(1.06) contrast(1.03);
|
||||
}
|
||||
.image-panel.contain img { object-fit: contain; padding: 20px; background: #fff; }
|
||||
.copy { position: relative; z-index: 2; display: flex; flex-direction: column; gap: 26px; }
|
||||
.kicker {
|
||||
width: max-content; max-width: 100%; color: #155bff; background: rgba(21,91,255,0.08);
|
||||
border: 1px solid rgba(21,91,255,0.22); border-radius: 999px; padding: 12px 22px;
|
||||
font-size: 27px; font-weight: 950;
|
||||
}
|
||||
h1, h2 { font-size: 64px; line-height: 1.08; font-weight: 950; letter-spacing: 0; }
|
||||
.lead { font-size: 32px; line-height: 1.55; font-weight: 760; color: rgba(7,25,79,0.78); }
|
||||
.tag-row { display: flex; gap: 14px; flex-wrap: wrap; }
|
||||
.tag {
|
||||
padding: 12px 16px; border-radius: 16px; color: #07194f; background: rgba(255,255,255,0.76);
|
||||
border: 1px solid rgba(73,217,255,0.34); box-shadow: 0 14px 30px rgba(17,60,128,0.08);
|
||||
font-size: 24px; font-weight: 900;
|
||||
}
|
||||
.caption {
|
||||
position: absolute; z-index: 55; left: 320px; right: 320px; bottom: 132px; min-height: 78px;
|
||||
display: grid; place-items: center; padding: 14px 32px; border-radius: 26px;
|
||||
background: rgba(7,25,79,0.88); color: #fff; font-size: 32px; line-height: 1.34;
|
||||
font-weight: 850; text-align: center; box-shadow: 0 20px 55px rgba(7,25,79,0.26); opacity: 0;
|
||||
}
|
||||
.timeline {
|
||||
position: absolute; z-index: 52; left: 70px; right: 70px; bottom: 28px; height: 88px;
|
||||
padding: 12px 14px 18px; display: grid; grid-template-columns: repeat(var(--timeline-columns), minmax(0, 1fr));
|
||||
gap: 10px; align-items: center; overflow: visible; border-radius: 30px;
|
||||
background: rgba(255,255,255,0.76); border: 1px solid rgba(21,91,255,0.15);
|
||||
box-shadow: 0 20px 60px rgba(17,60,128,0.14); backdrop-filter: blur(12px);
|
||||
}
|
||||
.timeline-fill { position: absolute; left: 22px; right: 22px; bottom: 8px; height: 8px; border-radius: 999px; background: rgba(7,25,79,0.12); overflow: hidden; }
|
||||
.timeline-progress { display: block; width: 0%; height: 100%; border-radius: inherit; background: linear-gradient(90deg, #155bff, #49d9ff, #18bfa6); }
|
||||
.chapter-tag {
|
||||
position: relative; z-index: 2; min-width: 0; height: 50px; display: flex; align-items: center; justify-content: center;
|
||||
gap: 8px; padding: 0 10px; border-radius: 18px; background: rgba(255,255,255,0.72);
|
||||
border: 1px solid rgba(21,91,255,0.18); color: rgba(7,25,79,0.74); font-size: 20px;
|
||||
line-height: 1; font-weight: 900; white-space: nowrap; box-shadow: 0 12px 26px rgba(17,60,128,0.08);
|
||||
}
|
||||
.chapter-time { color: #155bff; font-variant-numeric: tabular-nums; }
|
||||
.chapter-title { overflow: hidden; text-overflow: ellipsis; }
|
||||
.chapter-tag.active { color: #fff; background: linear-gradient(135deg, #155bff, #18bfa6); border-color: rgba(255,255,255,0.62); box-shadow: 0 20px 42px rgba(21,91,255,0.28); }
|
||||
.chapter-tag.active .chapter-time { color: #fff; }
|
||||
.glow-line { position: absolute; width: 560px; height: 560px; border-radius: 50%; border: 2px solid rgba(73,217,255,0.34); right: -160px; top: -150px; }
|
||||
"""
|
||||
|
||||
|
||||
def js_array(values: list[str | float]) -> str:
|
||||
return json.dumps(values, ensure_ascii=False)
|
||||
|
||||
|
||||
def write_html(project_dir: Path, title: str, config: dict) -> None:
|
||||
sections = config["sections"]
|
||||
duration = config["duration"]
|
||||
timeline_columns = config["timelineColumns"]
|
||||
|
||||
scene_html = []
|
||||
caption_html = []
|
||||
chapter_html = []
|
||||
audio_html = [
|
||||
f'<audio id="bgm" class="clip" data-start="0" data-duration="{duration}" data-track-index="20" data-volume="0.10" src="assets/audio/bgm.wav"></audio>'
|
||||
]
|
||||
|
||||
for index, section in enumerate(sections):
|
||||
scene_id = f"scene-{section['id']}"
|
||||
cap_id = f"cap-{index + 1:02d}-{section['id']}"
|
||||
vo_id = f"vo-{index + 1:02d}-{section['id']}"
|
||||
image_panel_class = "image-panel contain" if section["imageFit"] == "contain" else "image-panel"
|
||||
tags = "".join(f'<span class="tag">{html.escape(tag)}</span>' for tag in section["tags"])
|
||||
heading_tag = "h1" if index == 0 else "h2"
|
||||
caption_start = round(section["start"] + 0.2, 3)
|
||||
caption_duration = round(max(0.4, section["duration"] - 0.4), 3)
|
||||
scene_html.append(
|
||||
f"""
|
||||
<section id="{scene_id}" class="clip scene" data-start="{section['start']}" data-duration="{section['duration']}" data-track-index="1">
|
||||
<div class="scene-content">
|
||||
<div class="{image_panel_class}"><img data-layout-allow-overflow src="{html.escape(section['image'])}" alt="{html.escape(section['title'])}" /></div>
|
||||
<div class="copy">
|
||||
<div class="kicker">{html.escape(section['timeLabel'])} / {html.escape(section['timelineLabel'])}</div>
|
||||
<{heading_tag}>{html.escape(section['title'])}</{heading_tag}>
|
||||
<p class="lead">{html.escape(section['subtitle'])}</p>
|
||||
<div class="tag-row">{tags}</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>"""
|
||||
)
|
||||
caption_html.append(
|
||||
f'<div class="caption clip" id="{cap_id}" data-start="{caption_start}" data-duration="{caption_duration}" data-track-index="10">{html.escape(section["caption"])}</div>'
|
||||
)
|
||||
chapter_html.append(
|
||||
f'<div class="chapter-tag" id="chapter-{index}"><span class="chapter-time">{html.escape(section["timeLabel"])}</span><span class="chapter-title">{html.escape(section["timelineLabel"])}</span></div>'
|
||||
)
|
||||
audio_html.append(
|
||||
f'<audio id="{vo_id}" class="clip" data-start="{section["start"]}" data-duration="{caption_duration}" data-track-index="5" data-volume="0.92" src="{html.escape(section["voiceover"])}"></audio>'
|
||||
)
|
||||
|
||||
starts = [section["start"] for section in sections]
|
||||
durations = [section["duration"] for section in sections]
|
||||
scenes = [f"#scene-{section['id']}" for section in sections]
|
||||
captions = [f"#cap-{index + 1:02d}-{section['id']}" for index, section in enumerate(sections)]
|
||||
chapters = [f"#chapter-{index}" for index in range(len(sections))]
|
||||
|
||||
index_html = f"""<!doctype html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=1920, height=1080" />
|
||||
<script src="https://cdn.jsdelivr.net/npm/gsap@3.14.2/dist/gsap.min.js"></script>
|
||||
<style>{css()}</style>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root" data-composition-id="main" data-start="0" data-duration="{duration}" data-width="1920" data-height="1080" style="--timeline-columns: {timeline_columns}">
|
||||
<div class="glow-line" data-layout-ignore></div>
|
||||
<div class="topbar" data-layout-ignore>
|
||||
<div class="brand-pill"><span class="brand-dot"></span><span>{html.escape(title)}</span></div>
|
||||
<div>PNG manifest -> HyperFrames -> MP4</div>
|
||||
</div>
|
||||
{''.join(scene_html)}
|
||||
{''.join(caption_html)}
|
||||
<div class="timeline" data-layout-ignore>
|
||||
{''.join(chapter_html)}
|
||||
<div class="timeline-fill"><div class="timeline-progress"></div></div>
|
||||
</div>
|
||||
{''.join(audio_html)}
|
||||
</div>
|
||||
<script>
|
||||
window.__timelines = window.__timelines || {{}};
|
||||
const rootDuration = Number(document.querySelector("#root").dataset.duration || {duration});
|
||||
const tl = gsap.timeline({{ paused: true }});
|
||||
const scenes = {js_array(scenes)};
|
||||
const starts = {js_array(starts)};
|
||||
const durations = {js_array(durations)};
|
||||
const captions = {js_array(captions)};
|
||||
const chapters = {js_array(chapters)};
|
||||
|
||||
scenes.forEach((scene, index) => {{
|
||||
const start = starts[index];
|
||||
const duration = durations[index];
|
||||
tl.set(scene, {{ opacity: 1 }}, start);
|
||||
tl.to(scene, {{ opacity: 0, duration: 0.28, ease: "power1.in" }}, start + duration - 0.28);
|
||||
tl.from(`${{scene}} .image-panel`, {{ x: -78, opacity: 0, scale: 0.96, duration: 0.72, ease: "power3.out" }}, start + 0.08);
|
||||
tl.from(`${{scene}} .kicker`, {{ y: 28, opacity: 0, duration: 0.42, ease: "power2.out" }}, start + 0.18);
|
||||
tl.from(`${{scene}} h1, ${{scene}} h2`, {{ y: 46, opacity: 0, duration: 0.62, ease: "power3.out" }}, start + 0.3);
|
||||
tl.from(`${{scene}} .lead`, {{ y: 36, opacity: 0, duration: 0.54, ease: "power2.out" }}, start + 0.58);
|
||||
tl.from(`${{scene}} .tag`, {{ y: 24, opacity: 0, scale: 0.94, duration: 0.42, stagger: 0.06, ease: "power2.out" }}, start + 0.82);
|
||||
tl.to(`${{scene}} .image-panel img`, {{ y: -70, scale: 1.1, duration: Math.max(4, duration - 1), ease: "none" }}, start + 0.4);
|
||||
}});
|
||||
|
||||
captions.forEach((caption, index) => {{
|
||||
const start = starts[index] + 0.2;
|
||||
const duration = Math.max(0.4, durations[index] - 0.4);
|
||||
tl.to(caption, {{ opacity: 1, y: 0, duration: 0.16, ease: "power1.out" }}, start);
|
||||
tl.to(caption, {{ opacity: 0, y: 14, duration: 0.16, ease: "power1.in" }}, start + duration - 0.16);
|
||||
}});
|
||||
|
||||
chapters.forEach((chapter, index) => {{
|
||||
const start = starts[index];
|
||||
const duration = durations[index];
|
||||
tl.to(chapter, {{ y: -8, scale: 1.04, duration: 0.18, ease: "power1.out" }}, start);
|
||||
tl.set(chapter, {{ className: "chapter-tag active" }}, start);
|
||||
tl.set(chapter, {{ className: "chapter-tag" }}, start + duration - 0.1);
|
||||
tl.to(chapter, {{ y: 0, scale: 1, duration: 0.18, ease: "power1.in" }}, start + duration - 0.28);
|
||||
}});
|
||||
|
||||
tl.to(".timeline-progress", {{ width: "100%", duration: rootDuration, ease: "none" }}, 0);
|
||||
tl.from(".topbar", {{ y: -28, opacity: 0, duration: 0.5, ease: "power2.out" }}, 0.1);
|
||||
tl.to(".glow-line", {{ scale: 1.18, rotation: 20, duration: rootDuration, ease: "none" }}, 0);
|
||||
tl.to("#root", {{ opacity: 0, duration: 0.65, ease: "power2.in" }}, Math.max(0, rootDuration - 0.75));
|
||||
window.__timelines["main"] = tl;
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
(project_dir / "index.html").write_text(index_html, encoding="utf-8")
|
||||
|
||||
|
||||
def doctor(audio_mode: str, run_acceptance: bool) -> None:
|
||||
required = ["ffmpeg", "ffprobe", "npx"]
|
||||
if audio_mode == "edge-tts":
|
||||
required.append("edge-tts")
|
||||
missing = [tool for tool in required if not shutil.which(tool)]
|
||||
if missing:
|
||||
fail(f"Missing required tool(s): {', '.join(missing)}")
|
||||
if run_acceptance and not shutil.which("npx"):
|
||||
fail("npx is required to run HyperFrames acceptance")
|
||||
|
||||
|
||||
def run_acceptance(project_dir: Path, config: dict, output_name: str) -> None:
|
||||
inspect_at = ",".join(str(value) for value in config["inspectTimes"])
|
||||
output_path = f"renders/{output_name}"
|
||||
run(["npx", "--yes", f"hyperframes@{HYPERFRAMES_VERSION}", "lint"], project_dir)
|
||||
run(["npx", "--yes", f"hyperframes@{HYPERFRAMES_VERSION}", "inspect", "--at", inspect_at], project_dir)
|
||||
run(["npx", "--yes", f"hyperframes@{HYPERFRAMES_VERSION}", "snapshot", "--at", inspect_at], project_dir)
|
||||
run(["npx", "--yes", f"hyperframes@{HYPERFRAMES_VERSION}", "render", "--output", output_path, "--quality", "standard"], project_dir)
|
||||
probe = run(
|
||||
[
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"quiet",
|
||||
"-show_entries",
|
||||
"format=duration,size:stream=codec_type,width,height,r_frame_rate",
|
||||
"-of",
|
||||
"json",
|
||||
output_path,
|
||||
],
|
||||
project_dir,
|
||||
capture=True,
|
||||
)
|
||||
probe_json = json.loads(probe.stdout or "{}")
|
||||
streams = probe_json.get("streams", [])
|
||||
has_video = any(stream.get("codec_type") == "video" for stream in streams)
|
||||
has_audio = any(stream.get("codec_type") == "audio" for stream in streams)
|
||||
video_stream = next((stream for stream in streams if stream.get("codec_type") == "video"), {})
|
||||
if not has_video or not has_audio:
|
||||
fail("ffprobe acceptance failed: rendered MP4 must contain video and audio streams")
|
||||
if video_stream.get("width") != 1920 or video_stream.get("height") != 1080:
|
||||
fail(f"ffprobe acceptance failed: expected 1920x1080, got {video_stream.get('width')}x{video_stream.get('height')}")
|
||||
actual_duration = float(probe_json.get("format", {}).get("duration", 0) or 0)
|
||||
expected_duration = float(config["duration"])
|
||||
tolerance = max(3.0, expected_duration * 0.15)
|
||||
if abs(actual_duration - expected_duration) > tolerance:
|
||||
fail(
|
||||
"ffprobe acceptance failed: "
|
||||
f"expected duration near {expected_duration:.3f}s, got {actual_duration:.3f}s"
|
||||
)
|
||||
(project_dir / "ffprobe.json").write_text(json.dumps(probe_json, indent=2) + "\n", encoding="utf-8")
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--project-dir", type=Path, default=Path.cwd(), help="HyperFrames project directory")
|
||||
parser.add_argument("--manifest", type=Path, default=None, help="PNG manifest path")
|
||||
parser.add_argument("--title", default="IT 基础设施长图讲解视频", help="Video title")
|
||||
parser.add_argument("--section-duration", type=float, default=8.0, help="Seconds per manifest row")
|
||||
parser.add_argument("--audio-mode", choices=["edge-tts", "tone", "none"], default="edge-tts")
|
||||
parser.add_argument("--run-acceptance", action="store_true", help="Run lint/inspect/snapshot/render/ffprobe")
|
||||
parser.add_argument("--output-name", default="it-infra-evolution.mp4", help="Rendered MP4 file name")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
try:
|
||||
project_dir = args.project_dir.resolve()
|
||||
manifest = (args.manifest or project_dir / "assets/images/manifest.md").resolve()
|
||||
ensure_project_scaffold(project_dir)
|
||||
doctor(args.audio_mode, args.run_acceptance)
|
||||
rows = read_manifest(manifest, project_dir)
|
||||
sections = build_sections(rows, args.section_duration)
|
||||
config = write_json_config(project_dir, args.title, sections)
|
||||
if args.audio_mode == "edge-tts":
|
||||
generate_edge_tts_audio(project_dir, sections)
|
||||
elif args.audio_mode == "tone":
|
||||
generate_tone_audio(project_dir, sections)
|
||||
elif not (project_dir / "assets/audio/bgm.wav").exists():
|
||||
fail("--audio-mode none requires existing assets/audio/bgm.wav")
|
||||
write_html(project_dir, args.title, config)
|
||||
if args.run_acceptance:
|
||||
run_acceptance(project_dir, config, args.output_name)
|
||||
print("Build complete. Required task artifacts: index.html, video.config.json, assets/images/manifest.md, assets/audio/, renders/ or run with --run-acceptance.")
|
||||
return 0
|
||||
except (BuildError, subprocess.CalledProcessError, json.JSONDecodeError) as exc:
|
||||
print(f"Build failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@ -81,6 +81,23 @@ description: "生成 IT 基础设施系列连续风格 PNG 图片。适用于一
|
||||
|
||||
`it-infra-evolution-video` 不应重新发明这些图片的风格,只读取 manifest 并作为真实长图素材使用。
|
||||
|
||||
当任务还选择了 `it-infra-evolution-video-v2` 时,本 skill 完成后必须停在清晰的交接点:
|
||||
|
||||
1. 确认 `assets/images/*.png` 的数量与 manifest 数据行数量一致。
|
||||
2. 确认每个 `file` 指向真实 PNG 文件,而不是 SVG、空文件或占位路径。
|
||||
3. 将下一步命令写给视频 skill:
|
||||
|
||||
```bash
|
||||
python3 /path/to/ai-video-skills/scripts/build_it_infra_video.py \
|
||||
--project-dir . \
|
||||
--title "<用户主题>" \
|
||||
--audio-mode edge-tts \
|
||||
--run-acceptance \
|
||||
--output-name "<topic-slug>.mp4"
|
||||
```
|
||||
|
||||
不要在本 skill 中生成 `index.html`、`video.config.json` 或 MP4;这些是视频 skill 的职责。
|
||||
|
||||
## 参考文件
|
||||
|
||||
- 风格规范:`references/style-spec.md`
|
||||
|
||||
73
skills/it-infra-evolution-video-v2/SKILL.md
Normal file
73
skills/it-infra-evolution-video-v2/SKILL.md
Normal file
@ -0,0 +1,73 @@
|
||||
---
|
||||
name: it-infra-evolution-video-v2
|
||||
version: "v2"
|
||||
description: "从 it-infra-continuous-png 的真实 PNG manifest 生成 IT 基础设施长图讲解视频。强制执行 manifest -> video.config.json -> index.html -> audio -> HyperFrames acceptance -> MP4 -> ffprobe 的闭环。"
|
||||
---
|
||||
|
||||
# IT 基础设施长图讲解视频 v2
|
||||
|
||||
本 skill 是 `it-infra-evolution-video` 的可执行 v2 路径。v1 模板保持 frozen;v2 的主路径必须通过仓库 runner 完成,不再让 Agent 临时手写 `generate_index.py` 或自由拼接模板片段。
|
||||
|
||||
## 调用前置条件
|
||||
|
||||
必须先完成 `it-infra-continuous-png`:
|
||||
|
||||
- `assets/images/*.png` 存在,且每个文件是真实 PNG。
|
||||
- `assets/images/manifest.md` 存在。
|
||||
- manifest 每一行都包含 `chapter_id`、`title`、`file`、`source_type`、`video_usage`、`scan_mode`、`safe_focus`。
|
||||
|
||||
缺少这些输入时,不要继续生成视频,不要用 CSS 卡片、假截图或 SVG 冒充 PNG。
|
||||
|
||||
## 标准调用
|
||||
|
||||
在当前任务工作目录或视频项目目录执行:
|
||||
|
||||
```bash
|
||||
python3 /path/to/ai-video-skills/scripts/build_it_infra_video.py \
|
||||
--project-dir . \
|
||||
--title "云原生 Service Mesh 网络科普视频" \
|
||||
--audio-mode edge-tts \
|
||||
--run-acceptance \
|
||||
--output-name service-mesh-video.mp4
|
||||
```
|
||||
|
||||
OpenClaw 任务中如果同时选择了 `it-infra-continuous-png` 和 `it-infra-evolution-video-v2`,必须按以下顺序执行:
|
||||
|
||||
1. 先用 `it-infra-continuous-png` 生成多张 PNG 和 manifest。
|
||||
2. 再用本 skill 的 runner 读取 manifest。
|
||||
3. 最后把 `renders/service-mesh-video.mp4`、`video.config.json`、`assets/images/manifest.md`、`ffprobe.json` 留在当前 task workspace。
|
||||
|
||||
## Runner 合同
|
||||
|
||||
runner 负责:
|
||||
|
||||
- 解析并校验 manifest。
|
||||
- 拒绝缺失图片、伪 PNG、缺失列、非法 `scan_mode`。
|
||||
- 生成唯一 ID 的 `index.html`。
|
||||
- 保证 scene、caption、voiceover 在各自 track 上不重叠。
|
||||
- 只保留一个全局 BGM 音轨。
|
||||
- 生成 `video.config.json` 和 `inspectTimes`。
|
||||
- 执行 `lint -> inspect -> snapshot -> render -> ffprobe`。
|
||||
|
||||
生产模式默认 `--audio-mode edge-tts`。本地测试或无网络 dry-run 可以使用 `--audio-mode tone`,但不能把 tone 输出当作正式口播成片。
|
||||
|
||||
## 验收标准
|
||||
|
||||
只有以下文件都存在,才能在 XWorkmate/OpenClaw 中报告完成:
|
||||
|
||||
- `index.html`
|
||||
- `video.config.json`
|
||||
- `assets/images/manifest.md`
|
||||
- `assets/audio/*.mp3`
|
||||
- `assets/audio/bgm.wav`
|
||||
- `renders/<output-name>.mp4`
|
||||
- `ffprobe.json`
|
||||
|
||||
`ffprobe.json` 必须显示:
|
||||
|
||||
- 分辨率为 `1920x1080`
|
||||
- 有 video stream
|
||||
- 有 audio stream
|
||||
- 时长接近 `video.config.json` 的 `duration`
|
||||
|
||||
如果 HyperFrames 或 ffprobe 任一阶段失败,只输出失败阶段和原因,不输出“完成”。
|
||||
7
tests/fixtures/it-infra-chain/assets/images/manifest.md
vendored
Normal file
7
tests/fixtures/it-infra-chain/assets/images/manifest.md
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
# Fixture Image Manifest
|
||||
|
||||
| chapter_id | title | file | source_type | video_usage | scan_mode | safe_focus |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| service-mesh-control-plane | 控制平面 | assets/images/001-control-plane.png | generated_from_description | 解释 Service Mesh 如何下发流量治理策略 | contain | center diagram and top title |
|
||||
| service-mesh-data-plane | 数据平面 | assets/images/002-data-plane.png | generated_from_description | 解释 Sidecar 如何接管东西向流量 | cover | middle service nodes |
|
||||
| service-mesh-observability | 可观测性 | assets/images/003-observability.png | generated_from_description | 解释指标、日志和链路追踪如何汇总 | contain | bottom telemetry cards |
|
||||
112
tests/test_build_it_infra_video.py
Normal file
112
tests/test_build_it_infra_video.py
Normal file
@ -0,0 +1,112 @@
|
||||
import importlib.util
|
||||
import json
|
||||
import shutil
|
||||
import struct
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
import zlib
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SCRIPT = ROOT / "scripts/build_it_infra_video.py"
|
||||
FIXTURE = ROOT / "tests/fixtures/it-infra-chain"
|
||||
|
||||
|
||||
spec = importlib.util.spec_from_file_location("build_it_infra_video", SCRIPT)
|
||||
runner = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
sys.modules[spec.name] = runner
|
||||
spec.loader.exec_module(runner)
|
||||
|
||||
|
||||
def write_png(path: Path, rgb: tuple[int, int, int]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
width = height = 16
|
||||
raw = b"".join(b"\x00" + bytes(rgb) * width for _ in range(height))
|
||||
|
||||
def chunk(kind: bytes, data: bytes) -> bytes:
|
||||
return (
|
||||
struct.pack(">I", len(data))
|
||||
+ kind
|
||||
+ data
|
||||
+ struct.pack(">I", zlib.crc32(kind + data) & 0xFFFFFFFF)
|
||||
)
|
||||
|
||||
path.write_bytes(
|
||||
b"\x89PNG\r\n\x1a\n"
|
||||
+ chunk(b"IHDR", struct.pack(">IIBBBBB", width, height, 8, 2, 0, 0, 0))
|
||||
+ chunk(b"IDAT", zlib.compress(raw))
|
||||
+ chunk(b"IEND", b"")
|
||||
)
|
||||
|
||||
|
||||
def copy_fixture(tmp_path: Path) -> Path:
|
||||
project = tmp_path / "project"
|
||||
shutil.copytree(FIXTURE, project)
|
||||
write_png(project / "assets/images/001-control-plane.png", (21, 91, 255))
|
||||
write_png(project / "assets/images/002-data-plane.png", (24, 191, 166))
|
||||
write_png(project / "assets/images/003-observability.png", (73, 217, 255))
|
||||
return project
|
||||
|
||||
|
||||
class BuildItInfraVideoTest(unittest.TestCase):
|
||||
def test_manifest_drives_config_and_html_without_duplicate_ids(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
project = copy_fixture(Path(tmp))
|
||||
|
||||
code = runner.main(
|
||||
[
|
||||
"--project-dir",
|
||||
str(project),
|
||||
"--title",
|
||||
"Service Mesh fixture",
|
||||
"--audio-mode",
|
||||
"none",
|
||||
]
|
||||
)
|
||||
|
||||
self.assertEqual(code, 1)
|
||||
self.assertFalse((project / "index.html").exists())
|
||||
|
||||
code = runner.main(
|
||||
[
|
||||
"--project-dir",
|
||||
str(project),
|
||||
"--title",
|
||||
"Service Mesh fixture",
|
||||
"--audio-mode",
|
||||
"tone",
|
||||
"--section-duration",
|
||||
"1.2",
|
||||
]
|
||||
)
|
||||
|
||||
self.assertEqual(code, 0)
|
||||
config = json.loads((project / "video.config.json").read_text(encoding="utf-8"))
|
||||
html = (project / "index.html").read_text(encoding="utf-8")
|
||||
|
||||
self.assertEqual(config["timelineColumns"], 3)
|
||||
self.assertEqual(len(config["sections"]), 3)
|
||||
self.assertEqual(config["sections"][0]["image"], "assets/images/001-control-plane.png")
|
||||
self.assertEqual(config["sections"][1]["start"], 1.2)
|
||||
self.assertEqual(html.count('id="bgm"'), 1)
|
||||
self.assertEqual(html.count('id="scene-service-mesh-control-plane"'), 1)
|
||||
self.assertEqual(html.count('data-track-index="1"'), 3)
|
||||
self.assertEqual(html.count('data-track-index="5"'), 3)
|
||||
|
||||
def test_rejects_manifest_image_that_is_not_real_png(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
project = copy_fixture(Path(tmp))
|
||||
(project / "assets/images/002-data-plane.png").write_text("<svg></svg>", encoding="utf-8")
|
||||
|
||||
rows = runner.parse_markdown_table(project / "assets/images/manifest.md")
|
||||
self.assertEqual(len(rows), 3)
|
||||
|
||||
with self.assertRaisesRegex(runner.BuildError, "not a real PNG"):
|
||||
runner.read_manifest(project / "assets/images/manifest.md", project)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Loading…
Reference in New Issue
Block a user