Add git history secret remediation skill

This commit is contained in:
Haitao Pan 2026-03-17 12:05:16 +08:00
parent ce53e1cc3b
commit fb9ded514a
9 changed files with 564 additions and 0 deletions

60
scripts/skills/package_skill.py Executable file
View File

@ -0,0 +1,60 @@
#!/usr/bin/env python3
"""Package a skill folder into a distributable .skill archive."""
from __future__ import annotations
import sys
import zipfile
from pathlib import Path
from validate_skill import validate_skill
def should_include(file_path: Path) -> bool:
if "__pycache__" in file_path.parts:
return False
if file_path.suffix == ".pyc":
return False
return True
def package_skill(skill_path: str | Path, output_dir: str | Path | None = None) -> Path:
skill_dir = Path(skill_path).resolve()
if not skill_dir.exists():
raise FileNotFoundError(f"Skill folder not found: {skill_dir}")
if not skill_dir.is_dir():
raise NotADirectoryError(f"Path is not a directory: {skill_dir}")
valid, message = validate_skill(skill_dir)
if not valid:
raise ValueError(message)
destination = Path(output_dir).resolve() if output_dir else Path.cwd()
destination.mkdir(parents=True, exist_ok=True)
output_path = destination / f"{skill_dir.name}.skill"
with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as archive:
for file_path in skill_dir.rglob("*"):
if file_path.is_file() and should_include(file_path):
archive.write(file_path, file_path.relative_to(skill_dir.parent))
return output_path
def main() -> int:
if len(sys.argv) < 2 or len(sys.argv) > 3:
print("Usage: package_skill.py <path/to/skill-folder> [output-directory]")
return 1
try:
output_path = package_skill(sys.argv[1], sys.argv[2] if len(sys.argv) == 3 else None)
except Exception as exc: # pragma: no cover - command-line wrapper
print(f"Error: {exc}")
return 1
print(output_path)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@ -0,0 +1,78 @@
#!/usr/bin/env python3
"""Minimal ClawHub-style skill validation."""
from __future__ import annotations
import re
import sys
from pathlib import Path
import yaml
ALLOWED_PROPERTIES = {"name", "description", "license", "allowed-tools", "metadata"}
def validate_skill(skill_path: str | Path) -> tuple[bool, str]:
skill_dir = Path(skill_path)
skill_md = skill_dir / "SKILL.md"
if not skill_md.exists():
return False, "SKILL.md not found"
content = skill_md.read_text(encoding="utf-8")
if not content.startswith("---"):
return False, "No YAML frontmatter found"
match = re.match(r"^---\n(.*?)\n---", content, re.DOTALL)
if not match:
return False, "Invalid frontmatter format"
try:
frontmatter = yaml.safe_load(match.group(1))
except yaml.YAMLError as exc:
return False, f"Invalid YAML in frontmatter: {exc}"
if not isinstance(frontmatter, dict):
return False, "Frontmatter must be a YAML dictionary"
unexpected = set(frontmatter.keys()) - ALLOWED_PROPERTIES
if unexpected:
return (
False,
"Unexpected key(s) in SKILL.md frontmatter: "
+ ", ".join(sorted(unexpected))
+ ". Allowed properties are: "
+ ", ".join(sorted(ALLOWED_PROPERTIES)),
)
for key in ("name", "description"):
if key not in frontmatter:
return False, f"Missing '{key}' in frontmatter"
name = str(frontmatter["name"]).strip()
if not re.fullmatch(r"[a-z0-9-]+", name) or name.startswith("-") or name.endswith("-") or "--" in name:
return False, f"Name '{name}' should be hyphen-case (lowercase letters, digits, and hyphens only)"
if len(name) > 64:
return False, f"Name is too long ({len(name)} characters). Maximum is 64 characters."
description = str(frontmatter["description"]).strip()
if "<" in description or ">" in description:
return False, "Description cannot contain angle brackets (< or >)"
if len(description) > 1024:
return False, f"Description is too long ({len(description)} characters). Maximum is 1024 characters."
return True, "Skill is valid!"
def main() -> int:
if len(sys.argv) != 2:
print("Usage: validate_skill.py <skill-directory>")
return 1
valid, message = validate_skill(sys.argv[1])
print(message)
return 0 if valid else 1
if __name__ == "__main__":
raise SystemExit(main())

View File

@ -0,0 +1,190 @@
---
name: git-history-secret-remediation
description: Use when a user asks to detect secrets in git commit history, clean tracked sensitive data, rewrite history with git-filter-repo, or verify cleanup with gitleaks. Covers gitleaks detect -v, replacement mapping, path removal, ref inventory, history rewrites, force-push planning, and post-cleanup coordination.
license: Internal use only
metadata:
owner: cloud-neutral-toolkit
distribution: clawhub-compatible
package-format: .skill
---
# Git History Secret Remediation
Use this skill when secrets have already been committed and the task is to inspect, scrub, verify, and coordinate git history cleanup.
Core tools:
- `gitleaks detect -v`
- `git filter-repo`
Bundled scripts:
- `scripts/list_git_refs.sh`
- `scripts/run_gitleaks_history_scan.sh`
- `scripts/backup_git_remotes.py`
- `scripts/restore_git_remotes.py`
- `scripts/run_filter_repo_redaction.sh`
- `scripts/run_history_remediation.sh`
## When To Use
Trigger this skill when the user asks to:
- scan commit history for secrets
- run `gitleaks detect -v`
- remove passwords, API keys, tokens, or private keys from git history
- run `git filter-repo`
- clean up old commits after a leak
- rewrite history and force-push the cleaned repository
## Safety Rules
1. Clean current `HEAD` first, then rewrite history.
2. Rotate real leaked credentials out-of-band. History cleanup is not secret rotation.
3. Prefer empty values or angle-bracket placeholders in tracked samples.
4. Do not use fake secret-looking placeholders such as `` when scanners still match them.
5. Treat history rewrite as destructive:
- inventory refs first
- expect force-push
- warn that teammates must reclone or fully scrub old clones
6. Back up `git remote -v` before rewrite and restore it after rewrite or force-push preparation.
## Workflow
### 1. Inventory refs
At repo root:
```bash
bash skills/git-history-secret-remediation/scripts/list_git_refs.sh /path/to/repo
```
This tells you which branches and tags may need to be force-pushed after rewriting.
### 2. Run the history scan
Use the bundled wrapper:
```bash
bash skills/git-history-secret-remediation/scripts/run_gitleaks_history_scan.sh /path/to/repo
```
Behavior:
- auto-detects `config/gitleaks.toml` when present
- otherwise runs `gitleaks detect -v` with tool defaults
Classify findings into:
- current-file leaks still present in `HEAD`
- history-only leaks from deleted or renamed files
### 3. Sanitize current HEAD
Before rewriting history:
- replace real secrets in tracked sample/config files
- prefer:
- `""`
- empty env values
- `<OPENSSH_PRIVATE_KEY_CONTENT>`
- keep real values only in local `.env` or a secret manager
### 4. Build a replace-text file
Create a temporary mapping file, for example:
```text
real-secret-1==>
real-secret-2==>
OPENSSH_PRIVATE_KEY_BEGIN_LINE==><OPENSSH_PRIVATE_KEY_BEGIN_LINE>
OPENSSH_PRIVATE_KEY_END_LINE==><OPENSSH_PRIVATE_KEY_END_LINE>
```
Notes:
- default replacement can be empty
- use explicit placeholders only when file syntax requires visible text
- if an old placeholder also triggers scanners, run a second rewrite replacing it with an empty string
### 5. Remove history-only artifact files when appropriate
If a file exists only as a leak artifact, prefer removing it from history entirely.
Examples:
- `leaks_github.json`
- obsolete docs that embed private-key examples
- scratch backup files that contain real credentials
### 6. Rewrite history
Use the bundled wrapper:
```bash
bash skills/git-history-secret-remediation/scripts/run_filter_repo_redaction.sh \
/path/to/repo \
/tmp/replace-text.txt \
[path-to-remove...]
```
Behavior:
- backs up `git remote -v` metadata before rewriting
- restores remotes after rewriting if needed
- runs `git filter-repo --force --sensitive-data-removal --no-fetch`
- clears `.git/filter-repo/already_ran` when present
- optionally removes listed paths from history with `--invert-paths`
### 6b. Single-command remediation
If you already know the replacement mapping and the paths to purge, use the orchestrator:
```bash
bash skills/git-history-secret-remediation/scripts/run_history_remediation.sh \
/path/to/repo \
/tmp/replace-text.txt \
[path-to-remove...]
```
Behavior:
- inventories refs
- runs a pre-scan
- rewrites history
- restores remotes
- re-runs `gitleaks`
- exits non-zero until the repo scans clean
### 7. Re-run gitleaks
Repeat until:
- real secrets are gone from all commits
- remaining findings, if any, are only deliberate placeholders you explicitly accept
### 8. Push rewritten refs
For normal repos with all relevant local branches:
```bash
git push --force origin --all
git push --force origin --tags
```
If the remote has important branches not present locally:
- create local tracking branches first
- or do the rewrite in a fresh mirror clone and push from there
Do not assume a normal non-bare clone can safely use `git push --mirror`.
### 9. Post-cleanup coordination
Always tell the user to:
- rotate leaked credentials
- purge or invalidate old access where relevant
- have other clones recloned or scrubbed
- notify repo admins if server-side cache or object cleanup is needed
- use the remote backup JSON when reconstructing remotes after force-push in a fresh clone

View File

@ -0,0 +1,47 @@
#!/usr/bin/env python3
"""Back up git remote fetch/push URLs to JSON."""
from __future__ import annotations
import json
import subprocess
import sys
from pathlib import Path
def run(repo_path: str, *args: str) -> str:
return subprocess.check_output(["git", "-C", repo_path, *args], text=True).strip()
def main() -> int:
if len(sys.argv) != 3:
print("Usage: backup_git_remotes.py <repo-path> <output-json>", file=sys.stderr)
return 1
repo_path, output_json = sys.argv[1], sys.argv[2]
remotes = run(repo_path, "remote").splitlines()
payload: dict[str, dict[str, list[str]]] = {}
for remote in remotes:
remote = remote.strip()
if not remote:
continue
fetch_urls = run(repo_path, "remote", "get-url", "--all", remote).splitlines()
try:
push_urls = run(repo_path, "remote", "get-url", "--push", "--all", remote).splitlines()
except subprocess.CalledProcessError:
push_urls = fetch_urls
payload[remote] = {
"fetch": [url for url in fetch_urls if url],
"push": [url for url in push_urls if url],
}
output_path = Path(output_json)
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
print(output_path)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ $# -ne 1 ]]; then
echo "Usage: $0 <repo-path>" >&2
exit 1
fi
repo_path=$1
if [[ ! -d "$repo_path/.git" ]]; then
echo "Error: not a git repository: $repo_path" >&2
exit 1
fi
git -C "$repo_path" for-each-ref --format='%(refname)' refs/heads refs/tags refs/remotes/origin

View File

@ -0,0 +1,53 @@
#!/usr/bin/env python3
"""Restore git remote fetch/push URLs from JSON."""
from __future__ import annotations
import json
import subprocess
import sys
from pathlib import Path
def git(repo_path: str, *args: str) -> None:
subprocess.check_call(["git", "-C", repo_path, *args])
def main() -> int:
if len(sys.argv) != 3:
print("Usage: restore_git_remotes.py <repo-path> <input-json>", file=sys.stderr)
return 1
repo_path, input_json = sys.argv[1], sys.argv[2]
data = json.loads(Path(input_json).read_text(encoding="utf-8"))
for remote, urls in data.items():
fetch_urls = urls.get("fetch") or []
push_urls = urls.get("push") or []
if not fetch_urls:
continue
existing = subprocess.run(
["git", "-C", repo_path, "remote", "get-url", remote],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
if existing.returncode != 0:
git(repo_path, "remote", "add", remote, fetch_urls[0])
else:
git(repo_path, "remote", "set-url", remote, fetch_urls[0])
for url in fetch_urls[1:]:
git(repo_path, "remote", "set-url", "--add", remote, url)
if push_urls:
git(repo_path, "remote", "set-url", "--push", remote, push_urls[0])
for url in push_urls[1:]:
git(repo_path, "remote", "set-url", "--push", "--add", remote, url)
print(input_json)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@ -0,0 +1,61 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ $# -lt 2 ]]; then
echo "Usage: $0 <repo-path> <replace-text-file> [path-to-remove...]" >&2
exit 1
fi
repo_path=$1
replace_text_file=$2
shift 2
remove_paths=("$@")
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
backup_dir="$repo_path/.git/filter-repo"
remote_backup_json="$backup_dir/remotes.backup.json"
if [[ ! -d "$repo_path/.git" ]]; then
echo "Error: not a git repository: $repo_path" >&2
exit 1
fi
if [[ ! -f "$replace_text_file" ]]; then
echo "Error: replace-text file not found: $replace_text_file" >&2
exit 1
fi
if ! command -v git-filter-repo >/dev/null 2>&1 && ! command -v git >/dev/null 2>&1; then
echo "Error: git-filter-repo is not installed." >&2
exit 1
fi
python3 - "$repo_path" <<'PY'
from pathlib import Path
import sys
marker = Path(sys.argv[1]) / ".git/filter-repo/already_ran"
if marker.exists():
marker.unlink()
PY
python3 "$script_dir/backup_git_remotes.py" "$repo_path" "$remote_backup_json" >/dev/null
cmd=(
git
-C "$repo_path"
filter-repo
--force
--sensitive-data-removal
--no-fetch
--replace-text "$replace_text_file"
)
if [[ ${#remove_paths[@]} -gt 0 ]]; then
for path in "${remove_paths[@]}"; do
cmd+=(--path "$path")
done
cmd+=(--invert-paths)
fi
"${cmd[@]}"
python3 "$script_dir/restore_git_remotes.py" "$repo_path" "$remote_backup_json" >/dev/null

View File

@ -0,0 +1,32 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ $# -lt 1 || $# -gt 2 ]]; then
echo "Usage: $0 <repo-path> [gitleaks-config-path]" >&2
exit 1
fi
repo_path=$1
config_path=${2:-}
if [[ ! -d "$repo_path/.git" ]]; then
echo "Error: not a git repository: $repo_path" >&2
exit 1
fi
if ! command -v gitleaks >/dev/null 2>&1; then
echo "Error: gitleaks is not installed or not in PATH." >&2
exit 1
fi
config_args=()
if [[ -n "$config_path" ]]; then
config_args=(--config "$config_path")
elif [[ -f "$repo_path/config/gitleaks.toml" ]]; then
config_args=(--config "$repo_path/config/gitleaks.toml")
fi
(
cd "$repo_path"
gitleaks detect -v "${config_args[@]}"
)

View File

@ -0,0 +1,27 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ $# -lt 2 ]]; then
echo "Usage: $0 <repo-path> <replace-text-file> [path-to-remove...]" >&2
exit 1
fi
repo_path=$1
replace_text_file=$2
shift 2
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
echo "[1/4] Inventory refs"
bash "$script_dir/list_git_refs.sh" "$repo_path"
echo "[2/4] Pre-scan"
if ! bash "$script_dir/run_gitleaks_history_scan.sh" "$repo_path"; then
echo "Pre-scan found leaks. Continuing to remediation..." >&2
fi
echo "[3/4] Rewrite history"
bash "$script_dir/run_filter_repo_redaction.sh" "$repo_path" "$replace_text_file" "$@"
echo "[4/4] Post-scan"
bash "$script_dir/run_gitleaks_history_scan.sh" "$repo_path"