Add git history secret remediation skill
This commit is contained in:
parent
ce53e1cc3b
commit
fb9ded514a
60
scripts/skills/package_skill.py
Executable file
60
scripts/skills/package_skill.py
Executable file
@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Package a skill folder into a distributable .skill archive."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
from validate_skill import validate_skill
|
||||
|
||||
|
||||
def should_include(file_path: Path) -> bool:
|
||||
if "__pycache__" in file_path.parts:
|
||||
return False
|
||||
if file_path.suffix == ".pyc":
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def package_skill(skill_path: str | Path, output_dir: str | Path | None = None) -> Path:
|
||||
skill_dir = Path(skill_path).resolve()
|
||||
if not skill_dir.exists():
|
||||
raise FileNotFoundError(f"Skill folder not found: {skill_dir}")
|
||||
if not skill_dir.is_dir():
|
||||
raise NotADirectoryError(f"Path is not a directory: {skill_dir}")
|
||||
|
||||
valid, message = validate_skill(skill_dir)
|
||||
if not valid:
|
||||
raise ValueError(message)
|
||||
|
||||
destination = Path(output_dir).resolve() if output_dir else Path.cwd()
|
||||
destination.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
output_path = destination / f"{skill_dir.name}.skill"
|
||||
with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as archive:
|
||||
for file_path in skill_dir.rglob("*"):
|
||||
if file_path.is_file() and should_include(file_path):
|
||||
archive.write(file_path, file_path.relative_to(skill_dir.parent))
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) < 2 or len(sys.argv) > 3:
|
||||
print("Usage: package_skill.py <path/to/skill-folder> [output-directory]")
|
||||
return 1
|
||||
|
||||
try:
|
||||
output_path = package_skill(sys.argv[1], sys.argv[2] if len(sys.argv) == 3 else None)
|
||||
except Exception as exc: # pragma: no cover - command-line wrapper
|
||||
print(f"Error: {exc}")
|
||||
return 1
|
||||
|
||||
print(output_path)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
78
scripts/skills/validate_skill.py
Executable file
78
scripts/skills/validate_skill.py
Executable file
@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Minimal ClawHub-style skill validation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
ALLOWED_PROPERTIES = {"name", "description", "license", "allowed-tools", "metadata"}
|
||||
|
||||
|
||||
def validate_skill(skill_path: str | Path) -> tuple[bool, str]:
|
||||
skill_dir = Path(skill_path)
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
if not skill_md.exists():
|
||||
return False, "SKILL.md not found"
|
||||
|
||||
content = skill_md.read_text(encoding="utf-8")
|
||||
if not content.startswith("---"):
|
||||
return False, "No YAML frontmatter found"
|
||||
|
||||
match = re.match(r"^---\n(.*?)\n---", content, re.DOTALL)
|
||||
if not match:
|
||||
return False, "Invalid frontmatter format"
|
||||
|
||||
try:
|
||||
frontmatter = yaml.safe_load(match.group(1))
|
||||
except yaml.YAMLError as exc:
|
||||
return False, f"Invalid YAML in frontmatter: {exc}"
|
||||
|
||||
if not isinstance(frontmatter, dict):
|
||||
return False, "Frontmatter must be a YAML dictionary"
|
||||
|
||||
unexpected = set(frontmatter.keys()) - ALLOWED_PROPERTIES
|
||||
if unexpected:
|
||||
return (
|
||||
False,
|
||||
"Unexpected key(s) in SKILL.md frontmatter: "
|
||||
+ ", ".join(sorted(unexpected))
|
||||
+ ". Allowed properties are: "
|
||||
+ ", ".join(sorted(ALLOWED_PROPERTIES)),
|
||||
)
|
||||
|
||||
for key in ("name", "description"):
|
||||
if key not in frontmatter:
|
||||
return False, f"Missing '{key}' in frontmatter"
|
||||
|
||||
name = str(frontmatter["name"]).strip()
|
||||
if not re.fullmatch(r"[a-z0-9-]+", name) or name.startswith("-") or name.endswith("-") or "--" in name:
|
||||
return False, f"Name '{name}' should be hyphen-case (lowercase letters, digits, and hyphens only)"
|
||||
if len(name) > 64:
|
||||
return False, f"Name is too long ({len(name)} characters). Maximum is 64 characters."
|
||||
|
||||
description = str(frontmatter["description"]).strip()
|
||||
if "<" in description or ">" in description:
|
||||
return False, "Description cannot contain angle brackets (< or >)"
|
||||
if len(description) > 1024:
|
||||
return False, f"Description is too long ({len(description)} characters). Maximum is 1024 characters."
|
||||
|
||||
return True, "Skill is valid!"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: validate_skill.py <skill-directory>")
|
||||
return 1
|
||||
|
||||
valid, message = validate_skill(sys.argv[1])
|
||||
print(message)
|
||||
return 0 if valid else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
190
skills/git-history-secret-remediation/SKILL.md
Normal file
190
skills/git-history-secret-remediation/SKILL.md
Normal file
@ -0,0 +1,190 @@
|
||||
---
|
||||
name: git-history-secret-remediation
|
||||
description: Use when a user asks to detect secrets in git commit history, clean tracked sensitive data, rewrite history with git-filter-repo, or verify cleanup with gitleaks. Covers gitleaks detect -v, replacement mapping, path removal, ref inventory, history rewrites, force-push planning, and post-cleanup coordination.
|
||||
license: Internal use only
|
||||
metadata:
|
||||
owner: cloud-neutral-toolkit
|
||||
distribution: clawhub-compatible
|
||||
package-format: .skill
|
||||
---
|
||||
|
||||
# Git History Secret Remediation
|
||||
|
||||
Use this skill when secrets have already been committed and the task is to inspect, scrub, verify, and coordinate git history cleanup.
|
||||
|
||||
Core tools:
|
||||
|
||||
- `gitleaks detect -v`
|
||||
- `git filter-repo`
|
||||
|
||||
Bundled scripts:
|
||||
|
||||
- `scripts/list_git_refs.sh`
|
||||
- `scripts/run_gitleaks_history_scan.sh`
|
||||
- `scripts/backup_git_remotes.py`
|
||||
- `scripts/restore_git_remotes.py`
|
||||
- `scripts/run_filter_repo_redaction.sh`
|
||||
- `scripts/run_history_remediation.sh`
|
||||
|
||||
## When To Use
|
||||
|
||||
Trigger this skill when the user asks to:
|
||||
|
||||
- scan commit history for secrets
|
||||
- run `gitleaks detect -v`
|
||||
- remove passwords, API keys, tokens, or private keys from git history
|
||||
- run `git filter-repo`
|
||||
- clean up old commits after a leak
|
||||
- rewrite history and force-push the cleaned repository
|
||||
|
||||
## Safety Rules
|
||||
|
||||
1. Clean current `HEAD` first, then rewrite history.
|
||||
2. Rotate real leaked credentials out-of-band. History cleanup is not secret rotation.
|
||||
3. Prefer empty values or angle-bracket placeholders in tracked samples.
|
||||
4. Do not use fake secret-looking placeholders such as `` when scanners still match them.
|
||||
5. Treat history rewrite as destructive:
|
||||
- inventory refs first
|
||||
- expect force-push
|
||||
- warn that teammates must reclone or fully scrub old clones
|
||||
6. Back up `git remote -v` before rewrite and restore it after rewrite or force-push preparation.
|
||||
|
||||
## Workflow
|
||||
|
||||
### 1. Inventory refs
|
||||
|
||||
At repo root:
|
||||
|
||||
```bash
|
||||
bash skills/git-history-secret-remediation/scripts/list_git_refs.sh /path/to/repo
|
||||
```
|
||||
|
||||
This tells you which branches and tags may need to be force-pushed after rewriting.
|
||||
|
||||
### 2. Run the history scan
|
||||
|
||||
Use the bundled wrapper:
|
||||
|
||||
```bash
|
||||
bash skills/git-history-secret-remediation/scripts/run_gitleaks_history_scan.sh /path/to/repo
|
||||
```
|
||||
|
||||
Behavior:
|
||||
|
||||
- auto-detects `config/gitleaks.toml` when present
|
||||
- otherwise runs `gitleaks detect -v` with tool defaults
|
||||
|
||||
Classify findings into:
|
||||
|
||||
- current-file leaks still present in `HEAD`
|
||||
- history-only leaks from deleted or renamed files
|
||||
|
||||
### 3. Sanitize current HEAD
|
||||
|
||||
Before rewriting history:
|
||||
|
||||
- replace real secrets in tracked sample/config files
|
||||
- prefer:
|
||||
- `""`
|
||||
- empty env values
|
||||
- `<OPENSSH_PRIVATE_KEY_CONTENT>`
|
||||
- keep real values only in local `.env` or a secret manager
|
||||
|
||||
### 4. Build a replace-text file
|
||||
|
||||
Create a temporary mapping file, for example:
|
||||
|
||||
```text
|
||||
real-secret-1==>
|
||||
real-secret-2==>
|
||||
OPENSSH_PRIVATE_KEY_BEGIN_LINE==><OPENSSH_PRIVATE_KEY_BEGIN_LINE>
|
||||
OPENSSH_PRIVATE_KEY_END_LINE==><OPENSSH_PRIVATE_KEY_END_LINE>
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- default replacement can be empty
|
||||
- use explicit placeholders only when file syntax requires visible text
|
||||
- if an old placeholder also triggers scanners, run a second rewrite replacing it with an empty string
|
||||
|
||||
### 5. Remove history-only artifact files when appropriate
|
||||
|
||||
If a file exists only as a leak artifact, prefer removing it from history entirely.
|
||||
|
||||
Examples:
|
||||
|
||||
- `leaks_github.json`
|
||||
- obsolete docs that embed private-key examples
|
||||
- scratch backup files that contain real credentials
|
||||
|
||||
### 6. Rewrite history
|
||||
|
||||
Use the bundled wrapper:
|
||||
|
||||
```bash
|
||||
bash skills/git-history-secret-remediation/scripts/run_filter_repo_redaction.sh \
|
||||
/path/to/repo \
|
||||
/tmp/replace-text.txt \
|
||||
[path-to-remove...]
|
||||
```
|
||||
|
||||
Behavior:
|
||||
|
||||
- backs up `git remote -v` metadata before rewriting
|
||||
- restores remotes after rewriting if needed
|
||||
- runs `git filter-repo --force --sensitive-data-removal --no-fetch`
|
||||
- clears `.git/filter-repo/already_ran` when present
|
||||
- optionally removes listed paths from history with `--invert-paths`
|
||||
|
||||
### 6b. Single-command remediation
|
||||
|
||||
If you already know the replacement mapping and the paths to purge, use the orchestrator:
|
||||
|
||||
```bash
|
||||
bash skills/git-history-secret-remediation/scripts/run_history_remediation.sh \
|
||||
/path/to/repo \
|
||||
/tmp/replace-text.txt \
|
||||
[path-to-remove...]
|
||||
```
|
||||
|
||||
Behavior:
|
||||
|
||||
- inventories refs
|
||||
- runs a pre-scan
|
||||
- rewrites history
|
||||
- restores remotes
|
||||
- re-runs `gitleaks`
|
||||
- exits non-zero until the repo scans clean
|
||||
|
||||
### 7. Re-run gitleaks
|
||||
|
||||
Repeat until:
|
||||
|
||||
- real secrets are gone from all commits
|
||||
- remaining findings, if any, are only deliberate placeholders you explicitly accept
|
||||
|
||||
### 8. Push rewritten refs
|
||||
|
||||
For normal repos with all relevant local branches:
|
||||
|
||||
```bash
|
||||
git push --force origin --all
|
||||
git push --force origin --tags
|
||||
```
|
||||
|
||||
If the remote has important branches not present locally:
|
||||
|
||||
- create local tracking branches first
|
||||
- or do the rewrite in a fresh mirror clone and push from there
|
||||
|
||||
Do not assume a normal non-bare clone can safely use `git push --mirror`.
|
||||
|
||||
### 9. Post-cleanup coordination
|
||||
|
||||
Always tell the user to:
|
||||
|
||||
- rotate leaked credentials
|
||||
- purge or invalidate old access where relevant
|
||||
- have other clones recloned or scrubbed
|
||||
- notify repo admins if server-side cache or object cleanup is needed
|
||||
- use the remote backup JSON when reconstructing remotes after force-push in a fresh clone
|
||||
47
skills/git-history-secret-remediation/scripts/backup_git_remotes.py
Executable file
47
skills/git-history-secret-remediation/scripts/backup_git_remotes.py
Executable file
@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Back up git remote fetch/push URLs to JSON."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def run(repo_path: str, *args: str) -> str:
|
||||
return subprocess.check_output(["git", "-C", repo_path, *args], text=True).strip()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: backup_git_remotes.py <repo-path> <output-json>", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
repo_path, output_json = sys.argv[1], sys.argv[2]
|
||||
remotes = run(repo_path, "remote").splitlines()
|
||||
payload: dict[str, dict[str, list[str]]] = {}
|
||||
|
||||
for remote in remotes:
|
||||
remote = remote.strip()
|
||||
if not remote:
|
||||
continue
|
||||
fetch_urls = run(repo_path, "remote", "get-url", "--all", remote).splitlines()
|
||||
try:
|
||||
push_urls = run(repo_path, "remote", "get-url", "--push", "--all", remote).splitlines()
|
||||
except subprocess.CalledProcessError:
|
||||
push_urls = fetch_urls
|
||||
payload[remote] = {
|
||||
"fetch": [url for url in fetch_urls if url],
|
||||
"push": [url for url in push_urls if url],
|
||||
}
|
||||
|
||||
output_path = Path(output_json)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||
print(output_path)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
16
skills/git-history-secret-remediation/scripts/list_git_refs.sh
Executable file
16
skills/git-history-secret-remediation/scripts/list_git_refs.sh
Executable file
@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if [[ $# -ne 1 ]]; then
|
||||
echo "Usage: $0 <repo-path>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
repo_path=$1
|
||||
|
||||
if [[ ! -d "$repo_path/.git" ]]; then
|
||||
echo "Error: not a git repository: $repo_path" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
git -C "$repo_path" for-each-ref --format='%(refname)' refs/heads refs/tags refs/remotes/origin
|
||||
53
skills/git-history-secret-remediation/scripts/restore_git_remotes.py
Executable file
53
skills/git-history-secret-remediation/scripts/restore_git_remotes.py
Executable file
@ -0,0 +1,53 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Restore git remote fetch/push URLs from JSON."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def git(repo_path: str, *args: str) -> None:
|
||||
subprocess.check_call(["git", "-C", repo_path, *args])
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: restore_git_remotes.py <repo-path> <input-json>", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
repo_path, input_json = sys.argv[1], sys.argv[2]
|
||||
data = json.loads(Path(input_json).read_text(encoding="utf-8"))
|
||||
|
||||
for remote, urls in data.items():
|
||||
fetch_urls = urls.get("fetch") or []
|
||||
push_urls = urls.get("push") or []
|
||||
if not fetch_urls:
|
||||
continue
|
||||
|
||||
existing = subprocess.run(
|
||||
["git", "-C", repo_path, "remote", "get-url", remote],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
if existing.returncode != 0:
|
||||
git(repo_path, "remote", "add", remote, fetch_urls[0])
|
||||
else:
|
||||
git(repo_path, "remote", "set-url", remote, fetch_urls[0])
|
||||
|
||||
for url in fetch_urls[1:]:
|
||||
git(repo_path, "remote", "set-url", "--add", remote, url)
|
||||
|
||||
if push_urls:
|
||||
git(repo_path, "remote", "set-url", "--push", remote, push_urls[0])
|
||||
for url in push_urls[1:]:
|
||||
git(repo_path, "remote", "set-url", "--push", "--add", remote, url)
|
||||
|
||||
print(input_json)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
61
skills/git-history-secret-remediation/scripts/run_filter_repo_redaction.sh
Executable file
61
skills/git-history-secret-remediation/scripts/run_filter_repo_redaction.sh
Executable file
@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if [[ $# -lt 2 ]]; then
|
||||
echo "Usage: $0 <repo-path> <replace-text-file> [path-to-remove...]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
repo_path=$1
|
||||
replace_text_file=$2
|
||||
shift 2
|
||||
remove_paths=("$@")
|
||||
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
backup_dir="$repo_path/.git/filter-repo"
|
||||
remote_backup_json="$backup_dir/remotes.backup.json"
|
||||
|
||||
if [[ ! -d "$repo_path/.git" ]]; then
|
||||
echo "Error: not a git repository: $repo_path" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f "$replace_text_file" ]]; then
|
||||
echo "Error: replace-text file not found: $replace_text_file" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v git-filter-repo >/dev/null 2>&1 && ! command -v git >/dev/null 2>&1; then
|
||||
echo "Error: git-filter-repo is not installed." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
python3 - "$repo_path" <<'PY'
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
marker = Path(sys.argv[1]) / ".git/filter-repo/already_ran"
|
||||
if marker.exists():
|
||||
marker.unlink()
|
||||
PY
|
||||
|
||||
python3 "$script_dir/backup_git_remotes.py" "$repo_path" "$remote_backup_json" >/dev/null
|
||||
|
||||
cmd=(
|
||||
git
|
||||
-C "$repo_path"
|
||||
filter-repo
|
||||
--force
|
||||
--sensitive-data-removal
|
||||
--no-fetch
|
||||
--replace-text "$replace_text_file"
|
||||
)
|
||||
|
||||
if [[ ${#remove_paths[@]} -gt 0 ]]; then
|
||||
for path in "${remove_paths[@]}"; do
|
||||
cmd+=(--path "$path")
|
||||
done
|
||||
cmd+=(--invert-paths)
|
||||
fi
|
||||
|
||||
"${cmd[@]}"
|
||||
python3 "$script_dir/restore_git_remotes.py" "$repo_path" "$remote_backup_json" >/dev/null
|
||||
32
skills/git-history-secret-remediation/scripts/run_gitleaks_history_scan.sh
Executable file
32
skills/git-history-secret-remediation/scripts/run_gitleaks_history_scan.sh
Executable file
@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if [[ $# -lt 1 || $# -gt 2 ]]; then
|
||||
echo "Usage: $0 <repo-path> [gitleaks-config-path]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
repo_path=$1
|
||||
config_path=${2:-}
|
||||
|
||||
if [[ ! -d "$repo_path/.git" ]]; then
|
||||
echo "Error: not a git repository: $repo_path" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v gitleaks >/dev/null 2>&1; then
|
||||
echo "Error: gitleaks is not installed or not in PATH." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
config_args=()
|
||||
if [[ -n "$config_path" ]]; then
|
||||
config_args=(--config "$config_path")
|
||||
elif [[ -f "$repo_path/config/gitleaks.toml" ]]; then
|
||||
config_args=(--config "$repo_path/config/gitleaks.toml")
|
||||
fi
|
||||
|
||||
(
|
||||
cd "$repo_path"
|
||||
gitleaks detect -v "${config_args[@]}"
|
||||
)
|
||||
27
skills/git-history-secret-remediation/scripts/run_history_remediation.sh
Executable file
27
skills/git-history-secret-remediation/scripts/run_history_remediation.sh
Executable file
@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if [[ $# -lt 2 ]]; then
|
||||
echo "Usage: $0 <repo-path> <replace-text-file> [path-to-remove...]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
repo_path=$1
|
||||
replace_text_file=$2
|
||||
shift 2
|
||||
|
||||
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
echo "[1/4] Inventory refs"
|
||||
bash "$script_dir/list_git_refs.sh" "$repo_path"
|
||||
|
||||
echo "[2/4] Pre-scan"
|
||||
if ! bash "$script_dir/run_gitleaks_history_scan.sh" "$repo_path"; then
|
||||
echo "Pre-scan found leaks. Continuing to remediation..." >&2
|
||||
fi
|
||||
|
||||
echo "[3/4] Rewrite history"
|
||||
bash "$script_dir/run_filter_repo_redaction.sh" "$repo_path" "$replace_text_file" "$@"
|
||||
|
||||
echo "[4/4] Post-scan"
|
||||
bash "$script_dir/run_gitleaks_history_scan.sh" "$repo_path"
|
||||
Loading…
Reference in New Issue
Block a user