diff --git a/scripts/skills/package_skill.py b/scripts/skills/package_skill.py new file mode 100755 index 0000000..9a71d90 --- /dev/null +++ b/scripts/skills/package_skill.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +"""Package a skill folder into a distributable .skill archive.""" + +from __future__ import annotations + +import sys +import zipfile +from pathlib import Path + +from validate_skill import validate_skill + + +def should_include(file_path: Path) -> bool: + if "__pycache__" in file_path.parts: + return False + if file_path.suffix == ".pyc": + return False + return True + + +def package_skill(skill_path: str | Path, output_dir: str | Path | None = None) -> Path: + skill_dir = Path(skill_path).resolve() + if not skill_dir.exists(): + raise FileNotFoundError(f"Skill folder not found: {skill_dir}") + if not skill_dir.is_dir(): + raise NotADirectoryError(f"Path is not a directory: {skill_dir}") + + valid, message = validate_skill(skill_dir) + if not valid: + raise ValueError(message) + + destination = Path(output_dir).resolve() if output_dir else Path.cwd() + destination.mkdir(parents=True, exist_ok=True) + + output_path = destination / f"{skill_dir.name}.skill" + with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as archive: + for file_path in skill_dir.rglob("*"): + if file_path.is_file() and should_include(file_path): + archive.write(file_path, file_path.relative_to(skill_dir.parent)) + + return output_path + + +def main() -> int: + if len(sys.argv) < 2 or len(sys.argv) > 3: + print("Usage: package_skill.py [output-directory]") + return 1 + + try: + output_path = package_skill(sys.argv[1], sys.argv[2] if len(sys.argv) == 3 else None) + except Exception as exc: # pragma: no cover - command-line wrapper + print(f"Error: {exc}") + return 1 + + print(output_path) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/skills/validate_skill.py b/scripts/skills/validate_skill.py new file mode 100755 index 0000000..b3b5234 --- /dev/null +++ b/scripts/skills/validate_skill.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +"""Minimal ClawHub-style skill validation.""" + +from __future__ import annotations + +import re +import sys +from pathlib import Path + +import yaml + + +ALLOWED_PROPERTIES = {"name", "description", "license", "allowed-tools", "metadata"} + + +def validate_skill(skill_path: str | Path) -> tuple[bool, str]: + skill_dir = Path(skill_path) + skill_md = skill_dir / "SKILL.md" + if not skill_md.exists(): + return False, "SKILL.md not found" + + content = skill_md.read_text(encoding="utf-8") + if not content.startswith("---"): + return False, "No YAML frontmatter found" + + match = re.match(r"^---\n(.*?)\n---", content, re.DOTALL) + if not match: + return False, "Invalid frontmatter format" + + try: + frontmatter = yaml.safe_load(match.group(1)) + except yaml.YAMLError as exc: + return False, f"Invalid YAML in frontmatter: {exc}" + + if not isinstance(frontmatter, dict): + return False, "Frontmatter must be a YAML dictionary" + + unexpected = set(frontmatter.keys()) - ALLOWED_PROPERTIES + if unexpected: + return ( + False, + "Unexpected key(s) in SKILL.md frontmatter: " + + ", ".join(sorted(unexpected)) + + ". Allowed properties are: " + + ", ".join(sorted(ALLOWED_PROPERTIES)), + ) + + for key in ("name", "description"): + if key not in frontmatter: + return False, f"Missing '{key}' in frontmatter" + + name = str(frontmatter["name"]).strip() + if not re.fullmatch(r"[a-z0-9-]+", name) or name.startswith("-") or name.endswith("-") or "--" in name: + return False, f"Name '{name}' should be hyphen-case (lowercase letters, digits, and hyphens only)" + if len(name) > 64: + return False, f"Name is too long ({len(name)} characters). Maximum is 64 characters." + + description = str(frontmatter["description"]).strip() + if "<" in description or ">" in description: + return False, "Description cannot contain angle brackets (< or >)" + if len(description) > 1024: + return False, f"Description is too long ({len(description)} characters). Maximum is 1024 characters." + + return True, "Skill is valid!" + + +def main() -> int: + if len(sys.argv) != 2: + print("Usage: validate_skill.py ") + return 1 + + valid, message = validate_skill(sys.argv[1]) + print(message) + return 0 if valid else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/skills/git-history-secret-remediation/SKILL.md b/skills/git-history-secret-remediation/SKILL.md new file mode 100644 index 0000000..da66131 --- /dev/null +++ b/skills/git-history-secret-remediation/SKILL.md @@ -0,0 +1,190 @@ +--- +name: git-history-secret-remediation +description: Use when a user asks to detect secrets in git commit history, clean tracked sensitive data, rewrite history with git-filter-repo, or verify cleanup with gitleaks. Covers gitleaks detect -v, replacement mapping, path removal, ref inventory, history rewrites, force-push planning, and post-cleanup coordination. +license: Internal use only +metadata: + owner: cloud-neutral-toolkit + distribution: clawhub-compatible + package-format: .skill +--- + +# Git History Secret Remediation + +Use this skill when secrets have already been committed and the task is to inspect, scrub, verify, and coordinate git history cleanup. + +Core tools: + +- `gitleaks detect -v` +- `git filter-repo` + +Bundled scripts: + +- `scripts/list_git_refs.sh` +- `scripts/run_gitleaks_history_scan.sh` +- `scripts/backup_git_remotes.py` +- `scripts/restore_git_remotes.py` +- `scripts/run_filter_repo_redaction.sh` +- `scripts/run_history_remediation.sh` + +## When To Use + +Trigger this skill when the user asks to: + +- scan commit history for secrets +- run `gitleaks detect -v` +- remove passwords, API keys, tokens, or private keys from git history +- run `git filter-repo` +- clean up old commits after a leak +- rewrite history and force-push the cleaned repository + +## Safety Rules + +1. Clean current `HEAD` first, then rewrite history. +2. Rotate real leaked credentials out-of-band. History cleanup is not secret rotation. +3. Prefer empty values or angle-bracket placeholders in tracked samples. +4. Do not use fake secret-looking placeholders such as `` when scanners still match them. +5. Treat history rewrite as destructive: + - inventory refs first + - expect force-push + - warn that teammates must reclone or fully scrub old clones +6. Back up `git remote -v` before rewrite and restore it after rewrite or force-push preparation. + +## Workflow + +### 1. Inventory refs + +At repo root: + +```bash +bash skills/git-history-secret-remediation/scripts/list_git_refs.sh /path/to/repo +``` + +This tells you which branches and tags may need to be force-pushed after rewriting. + +### 2. Run the history scan + +Use the bundled wrapper: + +```bash +bash skills/git-history-secret-remediation/scripts/run_gitleaks_history_scan.sh /path/to/repo +``` + +Behavior: + +- auto-detects `config/gitleaks.toml` when present +- otherwise runs `gitleaks detect -v` with tool defaults + +Classify findings into: + +- current-file leaks still present in `HEAD` +- history-only leaks from deleted or renamed files + +### 3. Sanitize current HEAD + +Before rewriting history: + +- replace real secrets in tracked sample/config files +- prefer: + - `""` + - empty env values + - `` +- keep real values only in local `.env` or a secret manager + +### 4. Build a replace-text file + +Create a temporary mapping file, for example: + +```text +real-secret-1==> +real-secret-2==> +OPENSSH_PRIVATE_KEY_BEGIN_LINE==> +OPENSSH_PRIVATE_KEY_END_LINE==> +``` + +Notes: + +- default replacement can be empty +- use explicit placeholders only when file syntax requires visible text +- if an old placeholder also triggers scanners, run a second rewrite replacing it with an empty string + +### 5. Remove history-only artifact files when appropriate + +If a file exists only as a leak artifact, prefer removing it from history entirely. + +Examples: + +- `leaks_github.json` +- obsolete docs that embed private-key examples +- scratch backup files that contain real credentials + +### 6. Rewrite history + +Use the bundled wrapper: + +```bash +bash skills/git-history-secret-remediation/scripts/run_filter_repo_redaction.sh \ + /path/to/repo \ + /tmp/replace-text.txt \ + [path-to-remove...] +``` + +Behavior: + +- backs up `git remote -v` metadata before rewriting +- restores remotes after rewriting if needed +- runs `git filter-repo --force --sensitive-data-removal --no-fetch` +- clears `.git/filter-repo/already_ran` when present +- optionally removes listed paths from history with `--invert-paths` + +### 6b. Single-command remediation + +If you already know the replacement mapping and the paths to purge, use the orchestrator: + +```bash +bash skills/git-history-secret-remediation/scripts/run_history_remediation.sh \ + /path/to/repo \ + /tmp/replace-text.txt \ + [path-to-remove...] +``` + +Behavior: + +- inventories refs +- runs a pre-scan +- rewrites history +- restores remotes +- re-runs `gitleaks` +- exits non-zero until the repo scans clean + +### 7. Re-run gitleaks + +Repeat until: + +- real secrets are gone from all commits +- remaining findings, if any, are only deliberate placeholders you explicitly accept + +### 8. Push rewritten refs + +For normal repos with all relevant local branches: + +```bash +git push --force origin --all +git push --force origin --tags +``` + +If the remote has important branches not present locally: + +- create local tracking branches first +- or do the rewrite in a fresh mirror clone and push from there + +Do not assume a normal non-bare clone can safely use `git push --mirror`. + +### 9. Post-cleanup coordination + +Always tell the user to: + +- rotate leaked credentials +- purge or invalidate old access where relevant +- have other clones recloned or scrubbed +- notify repo admins if server-side cache or object cleanup is needed +- use the remote backup JSON when reconstructing remotes after force-push in a fresh clone diff --git a/skills/git-history-secret-remediation/scripts/backup_git_remotes.py b/skills/git-history-secret-remediation/scripts/backup_git_remotes.py new file mode 100755 index 0000000..beb2718 --- /dev/null +++ b/skills/git-history-secret-remediation/scripts/backup_git_remotes.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +"""Back up git remote fetch/push URLs to JSON.""" + +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + + +def run(repo_path: str, *args: str) -> str: + return subprocess.check_output(["git", "-C", repo_path, *args], text=True).strip() + + +def main() -> int: + if len(sys.argv) != 3: + print("Usage: backup_git_remotes.py ", file=sys.stderr) + return 1 + + repo_path, output_json = sys.argv[1], sys.argv[2] + remotes = run(repo_path, "remote").splitlines() + payload: dict[str, dict[str, list[str]]] = {} + + for remote in remotes: + remote = remote.strip() + if not remote: + continue + fetch_urls = run(repo_path, "remote", "get-url", "--all", remote).splitlines() + try: + push_urls = run(repo_path, "remote", "get-url", "--push", "--all", remote).splitlines() + except subprocess.CalledProcessError: + push_urls = fetch_urls + payload[remote] = { + "fetch": [url for url in fetch_urls if url], + "push": [url for url in push_urls if url], + } + + output_path = Path(output_json) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + print(output_path) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/skills/git-history-secret-remediation/scripts/list_git_refs.sh b/skills/git-history-secret-remediation/scripts/list_git_refs.sh new file mode 100755 index 0000000..49e773e --- /dev/null +++ b/skills/git-history-secret-remediation/scripts/list_git_refs.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -ne 1 ]]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +repo_path=$1 + +if [[ ! -d "$repo_path/.git" ]]; then + echo "Error: not a git repository: $repo_path" >&2 + exit 1 +fi + +git -C "$repo_path" for-each-ref --format='%(refname)' refs/heads refs/tags refs/remotes/origin diff --git a/skills/git-history-secret-remediation/scripts/restore_git_remotes.py b/skills/git-history-secret-remediation/scripts/restore_git_remotes.py new file mode 100755 index 0000000..df2fcba --- /dev/null +++ b/skills/git-history-secret-remediation/scripts/restore_git_remotes.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +"""Restore git remote fetch/push URLs from JSON.""" + +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + + +def git(repo_path: str, *args: str) -> None: + subprocess.check_call(["git", "-C", repo_path, *args]) + + +def main() -> int: + if len(sys.argv) != 3: + print("Usage: restore_git_remotes.py ", file=sys.stderr) + return 1 + + repo_path, input_json = sys.argv[1], sys.argv[2] + data = json.loads(Path(input_json).read_text(encoding="utf-8")) + + for remote, urls in data.items(): + fetch_urls = urls.get("fetch") or [] + push_urls = urls.get("push") or [] + if not fetch_urls: + continue + + existing = subprocess.run( + ["git", "-C", repo_path, "remote", "get-url", remote], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + if existing.returncode != 0: + git(repo_path, "remote", "add", remote, fetch_urls[0]) + else: + git(repo_path, "remote", "set-url", remote, fetch_urls[0]) + + for url in fetch_urls[1:]: + git(repo_path, "remote", "set-url", "--add", remote, url) + + if push_urls: + git(repo_path, "remote", "set-url", "--push", remote, push_urls[0]) + for url in push_urls[1:]: + git(repo_path, "remote", "set-url", "--push", "--add", remote, url) + + print(input_json) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/skills/git-history-secret-remediation/scripts/run_filter_repo_redaction.sh b/skills/git-history-secret-remediation/scripts/run_filter_repo_redaction.sh new file mode 100755 index 0000000..cdf94e6 --- /dev/null +++ b/skills/git-history-secret-remediation/scripts/run_filter_repo_redaction.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -lt 2 ]]; then + echo "Usage: $0 [path-to-remove...]" >&2 + exit 1 +fi + +repo_path=$1 +replace_text_file=$2 +shift 2 +remove_paths=("$@") +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +backup_dir="$repo_path/.git/filter-repo" +remote_backup_json="$backup_dir/remotes.backup.json" + +if [[ ! -d "$repo_path/.git" ]]; then + echo "Error: not a git repository: $repo_path" >&2 + exit 1 +fi + +if [[ ! -f "$replace_text_file" ]]; then + echo "Error: replace-text file not found: $replace_text_file" >&2 + exit 1 +fi + +if ! command -v git-filter-repo >/dev/null 2>&1 && ! command -v git >/dev/null 2>&1; then + echo "Error: git-filter-repo is not installed." >&2 + exit 1 +fi + +python3 - "$repo_path" <<'PY' +from pathlib import Path +import sys + +marker = Path(sys.argv[1]) / ".git/filter-repo/already_ran" +if marker.exists(): + marker.unlink() +PY + +python3 "$script_dir/backup_git_remotes.py" "$repo_path" "$remote_backup_json" >/dev/null + +cmd=( + git + -C "$repo_path" + filter-repo + --force + --sensitive-data-removal + --no-fetch + --replace-text "$replace_text_file" +) + +if [[ ${#remove_paths[@]} -gt 0 ]]; then + for path in "${remove_paths[@]}"; do + cmd+=(--path "$path") + done + cmd+=(--invert-paths) +fi + +"${cmd[@]}" +python3 "$script_dir/restore_git_remotes.py" "$repo_path" "$remote_backup_json" >/dev/null diff --git a/skills/git-history-secret-remediation/scripts/run_gitleaks_history_scan.sh b/skills/git-history-secret-remediation/scripts/run_gitleaks_history_scan.sh new file mode 100755 index 0000000..42d8125 --- /dev/null +++ b/skills/git-history-secret-remediation/scripts/run_gitleaks_history_scan.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -lt 1 || $# -gt 2 ]]; then + echo "Usage: $0 [gitleaks-config-path]" >&2 + exit 1 +fi + +repo_path=$1 +config_path=${2:-} + +if [[ ! -d "$repo_path/.git" ]]; then + echo "Error: not a git repository: $repo_path" >&2 + exit 1 +fi + +if ! command -v gitleaks >/dev/null 2>&1; then + echo "Error: gitleaks is not installed or not in PATH." >&2 + exit 1 +fi + +config_args=() +if [[ -n "$config_path" ]]; then + config_args=(--config "$config_path") +elif [[ -f "$repo_path/config/gitleaks.toml" ]]; then + config_args=(--config "$repo_path/config/gitleaks.toml") +fi + +( + cd "$repo_path" + gitleaks detect -v "${config_args[@]}" +) diff --git a/skills/git-history-secret-remediation/scripts/run_history_remediation.sh b/skills/git-history-secret-remediation/scripts/run_history_remediation.sh new file mode 100755 index 0000000..fcb4af4 --- /dev/null +++ b/skills/git-history-secret-remediation/scripts/run_history_remediation.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -lt 2 ]]; then + echo "Usage: $0 [path-to-remove...]" >&2 + exit 1 +fi + +repo_path=$1 +replace_text_file=$2 +shift 2 + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +echo "[1/4] Inventory refs" +bash "$script_dir/list_git_refs.sh" "$repo_path" + +echo "[2/4] Pre-scan" +if ! bash "$script_dir/run_gitleaks_history_scan.sh" "$repo_path"; then + echo "Pre-scan found leaks. Continuing to remediation..." >&2 +fi + +echo "[3/4] Rewrite history" +bash "$script_dir/run_filter_repo_redaction.sh" "$repo_path" "$replace_text_file" "$@" + +echo "[4/4] Post-scan" +bash "$script_dir/run_gitleaks_history_scan.sh" "$repo_path"