diff --git a/.github/workflows/mutation-test.yml b/.github/workflows/mutation-test.yml
new file mode 100644
index 0000000000..8094ca5746
--- /dev/null
+++ b/.github/workflows/mutation-test.yml
@@ -0,0 +1,131 @@
+name: "Mutation Test (manual)"
+
+# Manually-triggered mutation testing. Runs mutmut against the scope
+# configured in [tool.mutmut] in pyproject.toml (currently the
+# litellm/proxy/management_endpoints/ folder). Intended cadence is roughly
+# weekly — clicked from the Actions tab when someone wants a fresh report.
+#
+# Uploads a structured `mutation-report.md` (Meta ACH-style: original +
+# mutated function with `# MUTANT START`/`# MUTANT END` delimiters + the
+# existing tests + a task instruction) as a workflow artifact. Failures
+# do not block anything because nothing depends on this workflow.
+
+on:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+concurrency:
+  group: mutation-test-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  mutation:
+    name: Run mutmut
+    runs-on: ubuntu-latest
+    # Whole-folder mutation against ~15 files / ~7.5k LOC can take hours.
+    # 350 minutes is just under the GitHub-hosted job cap of 360 minutes.
+    timeout-minutes: 350
+
+    steps:
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false
+
+      - name: Set up Python
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        with:
+          python-version: "3.12"
+
+      - name: Set up uv
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
+        with:
+          version: "0.10.9"
+
+      - name: Cache uv dependencies
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
+        with:
+          path: |
+            ~/.cache/uv
+            .venv
+          key: ${{ runner.os }}-uv-${{ hashFiles('uv.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-uv-
+
+      - name: Install dependencies
+        run: |
+          uv sync --frozen --group ci --group proxy-dev --extra google --extra proxy --extra semantic-router
+
+      - name: Generate Prisma client
+        env:
+          PRISMA_BINARY_CACHE_DIR: ${{ runner.temp }}/prisma-cache
+        run: |
+          uv run --no-sync prisma generate --schema litellm/proxy/schema.prisma
+
+      # mutmut 3.x runs tests inside a `mutants/` sandbox where it injects
+      # mutation trampolines. uv installs the project as editable by default,
+      # which puts the original source dir on sys.path via a .pth file and
+      # shadows the sandbox copy — so tests would never exercise the mutated
+      # code. Reinstalling non-editable removes the .pth shadow.
+      - name: Reinstall litellm non-editable (so mutants/ is not shadowed)
+        run: |
+          uv pip uninstall litellm
+          uv pip install . --no-deps
+
+      # pytest-retry's pytest_configure hook crashes with
+      # `INTERNALERROR: no option named 'filtered_exceptions'` when invoked
+      # via mutmut's in-process pytest.main() call. The entry-point name
+      # doesn't normalize cleanly with `-p no:<name>`, so just remove the
+      # package outright. Reruns are wrong for mutation testing anyway —
+      # rerunning a "failed" mutant test would mask which mutants are killed.
+      - name: Remove pytest plugins that conflict with mutmut
+        run: |
+          uv pip uninstall pytest-retry || true
+
+      - name: Run mutmut
+        env:
+          # Make the mutants/ sandbox win over site-packages on sys.path so the
+          # trampolined files are imported instead of the installed copy.
+          PYTHONPATH: ${{ github.workspace }}/mutants
+        run: |
+          set -o pipefail
+          mkdir -p mutants
+          uv run --no-sync --with mutmut==3.5.0 mutmut run 2>&1 | tee mutmut-run.log
+
+      # Generate the structured report. The script embeds the enclosing
+      # function source for each survivor (via Python AST) and includes the
+      # existing test files, so an LLM agent has enough context to write
+      # killing tests without further file lookups. Modeled on Meta's ACH
+      # prompt template (arXiv 2501.12862).
+      - name: Generate detailed mutation report
+        if: always()
+        run: |
+          set +e
+          uv run --no-sync --with mutmut==3.5.0 mutmut export-cicd-stats > /dev/null 2>&1
+          uv run --no-sync --with mutmut==3.5.0 mutmut results > mutmut-results.txt 2>&1
+          uv run --no-sync python scripts/mutation_report.py
+          # The full report can be very long for big test files; the run-page
+          # summary cuts off at 1 MB. Append the head of the report (summary
+          # + survivor list) and link out to the artifact for the full body.
+          {
+            head -c 900000 mutation-report.md
+            echo ""
+            echo ""
+            echo "_Full report (with embedded function bodies and test files) is in the workflow artifact._"
+          } >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upload mutmut artifacts
+        if: always()
+        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
+        with:
+          name: mutmut-${{ github.run_id }}-${{ github.run_attempt }}
+          path: |
+            mutation-report.md
+            mutmut-results.txt
+            mutmut-run.log
+            mutants/mutmut-stats.json
+            mutants/mutmut-cicd-stats.json
+            mutants/litellm/proxy/management_endpoints/**/*.py
+          if-no-files-found: warn
+          retention-days: 14
diff --git a/pyproject.toml b/pyproject.toml
index 5cd83148d3..65557d8a9e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -275,6 +275,33 @@ filterwarnings = [
     "ignore::DeprecationWarning:pytest_asyncio.plugin",
 ]
 
+[tool.mutmut]
+# Mutation-testing scope. Driven by the manually-triggered workflow at
+# .github/workflows/mutation-test.yml. mutmut is not part of the project's
+# default install; it is pulled in via `uv run --with mutmut==<version>` in CI.
+# `also_copy = ["litellm/"]` is required because mutmut runs in a `mutants/`
+# sandbox and the test conftest imports from across the litellm package.
+paths_to_mutate = [
+    "litellm/proxy/management_endpoints/",
+]
+tests_dir = [
+    "tests/test_litellm/proxy/management_endpoints/",
+]
+also_copy = [
+    "litellm/",
+]
+# Disable rerun/parallel plugins for mutation runs:
+# - pytest-retry triggers an `INTERNALERROR: no option named 'filtered_exceptions'`
+#   when invoked via mutmut's in-process `pytest.main()` call.
+# - rerunning a "failed" test on a mutant would mask which mutants are killed
+#   vs. survive, so reruns are wrong for mutation testing regardless.
+# - xdist is unnecessary inside mutmut (mutmut handles its own parallelism).
+pytest_add_cli_args = [
+    "-p", "no:retry",
+    "-p", "no:rerunfailures",
+    "-p", "no:xdist",
+]
+
 [tool.coverage.run]
 source = ["litellm"]
 relative_files = true
diff --git a/scripts/mutation_report.py b/scripts/mutation_report.py
new file mode 100644
index 0000000000..a606e3f71c
--- /dev/null
+++ b/scripts/mutation_report.py
@@ -0,0 +1,423 @@
+#!/usr/bin/env python3
+"""Generate an agent-actionable mutation testing report.
+
+Reads the mutmut sandbox state at `mutants/` and produces a single
+`mutation-report.md` grouped by function. For each function with surviving
+mutants, the report embeds the original function source (via AST), the
+unified diff for each surviving mutation (via `mutmut show`), and the
+existing test file(s) — followed by an ACH-style instruction asking the
+reader to write tests that kill the survivors.
+
+Run after `mutmut run` and `mutmut export-cicd-stats`. Expects mutmut to be
+invokable as `uv run --no-sync --with mutmut==<version> mutmut <subcommand>`.
+"""
+from __future__ import annotations
+
+import ast
+import json
+import re
+import subprocess
+import sys
+import tomllib
+from collections import defaultdict
+from difflib import SequenceMatcher
+from pathlib import Path
+from textwrap import dedent
+
+ROOT = Path(__file__).resolve().parent.parent
+MUTMUT_INVOCATION = ["uv", "run", "--no-sync", "--with", "mutmut==3.5.0", "mutmut"]
+
+
+def load_mutmut_config() -> dict:
+    with open(ROOT / "pyproject.toml", "rb") as f:
+        return tomllib.load(f)["tool"]["mutmut"]
+
+
+def get_survivors() -> list[str]:
+    proc = subprocess.run(
+        [*MUTMUT_INVOCATION, "results"], capture_output=True, text=True, check=False
+    )
+    survivors = []
+    for line in proc.stdout.splitlines():
+        m = re.match(r"\s*(\S+):\s*survived\s*$", line)
+        if m:
+            survivors.append(m.group(1))
+    return survivors
+
+
+def get_mutmut_show(mutant_name: str) -> str:
+    proc = subprocess.run(
+        [*MUTMUT_INVOCATION, "show", mutant_name],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    return proc.stdout.strip() or "(mutmut show produced no output)"
+
+
+def parse_mutant_name(name: str) -> tuple[str, str, str]:
+    """Parse `<dotted.module>.x_<function>__mutmut_<N>` -> (module, function, N).
+
+    mutmut prefixes mutated functions with `x_` (single underscore). For a
+    function named `foo`, mutants are `x_foo__mutmut_N`. For a function named
+    `_foo` (leading underscore), the mutant becomes `x__foo__mutmut_N` — so
+    the regex matches a single underscore after `x` and captures everything
+    (including any leading underscores) up to `__mutmut_<N>`.
+    """
+    m = re.match(r"^(.+)\.x_(.+)__mutmut_(\d+)$", name)
+    if not m:
+        return name, name, "?"
+    return m.group(1), m.group(2), m.group(3)
+
+
+def function_anchor(module_path: str, function_name: str) -> str:
+    return re.sub(r"[^a-z0-9_-]+", "-", f"{module_path}-{function_name}".lower()).strip(
+        "-"
+    )
+
+
+def module_to_file(module_path: str) -> Path | None:
+    candidate = ROOT / Path(*module_path.split(".")).with_suffix(".py")
+    return candidate if candidate.exists() else None
+
+
+def find_function_in_file(
+    file_path: Path, function_name: str
+) -> tuple[int, int, str, list[int]] | None:
+    """Find a top-level or nested function by name; returns the first match.
+
+    Returns ``(start_line, end_line, source, all_match_lines)`` or ``None``.
+    ``all_match_lines`` is the start line of every function (any nesting
+    level) in the file with this name. When ``len(all_match_lines) > 1`` the
+    file defines the same name in multiple places (e.g., a module-level
+    helper and a class method) — mutmut's mutant identifier does not carry
+    class context, so we can't determine which definition was mutated.
+    Callers surface a disambiguation note in that case.
+    """
+    src = file_path.read_text()
+    tree = ast.parse(src)
+    matches = [
+        node
+        for node in ast.walk(tree)
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
+        and node.name == function_name
+    ]
+    if not matches:
+        return None
+    first = matches[0]
+    lines = src.splitlines()
+    return (
+        first.lineno,
+        first.end_lineno,
+        "\n".join(lines[first.lineno - 1 : first.end_lineno]),
+        [m.lineno for m in matches],
+    )
+
+
+def collect_test_files(tests_dir: list[str]) -> list[Path]:
+    found: list[Path] = []
+    for entry in tests_dir:
+        p = ROOT / entry
+        if p.is_file():
+            found.append(p)
+        elif p.is_dir():
+            found.extend(sorted(p.rglob("test_*.py")))
+    return found
+
+
+def _indent_of(line: str) -> str:
+    return line[: len(line) - len(line.lstrip())]
+
+
+def render_meta_style_mutant(
+    module_path: str, function_name: str, mutant_num: str
+) -> str | None:
+    """Render the mutated function with `# MUTANT START`/`# MUTANT END` delimiters.
+
+    Reads `mutants/<module>.py` (the trampoline file mutmut emits), finds
+    `x_<func>__mutmut_orig` and `x_<func>__mutmut_<N>`, and renders the
+    mutated version with the lines that differ from `__mutmut_orig` wrapped
+    in `# MUTANT START`/`# MUTANT END` comments — the format from Meta's
+    ACH paper (arXiv 2501.12862, Table 1).
+
+    The function header is rewritten to use the original function name so
+    the agent sees the source as it would appear in the file (rather than
+    mutmut's internal `x_*__mutmut_<N>` name).
+
+    Returns None if the trampoline file or either function cannot be found
+    (the caller falls back to the unified diff).
+    """
+    trampoline = ROOT / "mutants" / Path(*module_path.split(".")).with_suffix(".py")
+    if not trampoline.exists():
+        return None
+
+    src = trampoline.read_text()
+    try:
+        tree = ast.parse(src)
+    except SyntaxError:
+        return None
+    file_lines = src.splitlines()
+
+    orig_def = f"x_{function_name}__mutmut_orig"
+    mutant_def = f"x_{function_name}__mutmut_{mutant_num}"
+
+    orig_node = mutated_node = None
+    for node in ast.walk(tree):
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            if node.name == orig_def:
+                orig_node = node
+            elif node.name == mutant_def:
+                mutated_node = node
+
+    if orig_node is None or mutated_node is None:
+        return None
+
+    orig_lines = file_lines[orig_node.lineno - 1 : orig_node.end_lineno]
+    mutated_lines = file_lines[mutated_node.lineno - 1 : mutated_node.end_lineno]
+    if not orig_lines or not mutated_lines:
+        return None
+
+    # Rewrite the def line to use the original (non-trampolined) function name
+    # so the agent sees the function as it appears in the source file.
+    orig_lines[0] = orig_lines[0].replace(orig_def, function_name, 1)
+    mutated_lines[0] = mutated_lines[0].replace(mutant_def, function_name, 1)
+
+    matcher = SequenceMatcher(a=orig_lines, b=mutated_lines)
+    out: list[str] = []
+    in_diff = False
+
+    for op, i1, i2, j1, j2 in matcher.get_opcodes():
+        if op == "equal":
+            if in_diff:
+                # Close the block at the indent of the line just inside it.
+                indent = _indent_of(out[-1]) if out else ""
+                out.append(f"{indent}# MUTANT END")
+                in_diff = False
+            out.extend(mutated_lines[j1:j2])
+        else:
+            if not in_diff:
+                # Open the block at the indent of the first differing line.
+                if j1 < len(mutated_lines):
+                    indent = _indent_of(mutated_lines[j1])
+                elif i1 < len(orig_lines):
+                    indent = _indent_of(orig_lines[i1])
+                else:
+                    indent = ""
+                out.append(f"{indent}# MUTANT START")
+                in_diff = True
+            if op == "delete":
+                # Mutation removed lines — surface what was deleted as a
+                # comment so the agent can see the intent of the change.
+                for deleted in orig_lines[i1:i2]:
+                    indent = _indent_of(deleted)
+                    out.append(f"{indent}# (deleted by mutation): {deleted.lstrip()}")
+            else:
+                # replace / insert: take from mutated_lines
+                out.extend(mutated_lines[j1:j2])
+
+    if in_diff:
+        indent = _indent_of(out[-1]) if out else ""
+        out.append(f"{indent}# MUTANT END")
+
+    return "\n".join(out)
+
+
+def render(config: dict, survivors: list[str], stats: dict | None) -> str:
+    by_function: dict[tuple[str, str], list[tuple[str, str]]] = defaultdict(list)
+    for survivor in survivors:
+        module_path, function_name, mutant_num = parse_mutant_name(survivor)
+        by_function[(module_path, function_name)].append((survivor, mutant_num))
+
+    out: list[str] = []
+    out.append("# Mutation Test Report")
+    out.append("")
+
+    out.append("## Summary")
+    out.append("")
+    if stats:
+        total = stats.get("total", 0) or sum(
+            stats.get(k, 0)
+            for k in (
+                "killed",
+                "survived",
+                "no_tests",
+                "skipped",
+                "suspicious",
+                "timeout",
+                "segfault",
+            )
+        )
+        killed = stats.get("killed", 0)
+        survived = stats.get("survived", 0)
+        score = (killed / total * 100) if total else 0.0
+        out.append(f"- Total mutants: **{total}**")
+        out.append(f"- Killed: **{killed}**")
+        out.append(f"- Survived: **{survived}**")
+        out.append(f"- Mutation score: **{score:.1f}%**")
+        for k in ("no_tests", "skipped", "suspicious", "timeout", "segfault"):
+            v = stats.get(k, 0)
+            if v:
+                out.append(f"- {k.replace('_', ' ').title()}: {v}")
+    else:
+        out.append(f"- Survivors found: **{len(survivors)}**")
+        out.append("- (mutmut-cicd-stats.json not available — full counts unavailable)")
+    out.append("")
+
+    if not survivors:
+        out.append("**No surviving mutants — the test suite caught every mutation.**")
+        out.append("")
+        return "\n".join(out)
+
+    out.append("## Surviving mutants by function")
+    out.append("")
+    for (module_path, function_name), items in by_function.items():
+        anchor = function_anchor(module_path, function_name)
+        out.append(
+            f"- [`{function_name}`](#{anchor}) — {len(items)} mutant"
+            f"{'s' if len(items) != 1 else ''} ({module_path})"
+        )
+    out.append("")
+
+    for (module_path, function_name), items in by_function.items():
+        anchor = function_anchor(module_path, function_name)
+        out.append(f'<a id="{anchor}"></a>')
+        out.append(f"## `{module_path}.{function_name}`")
+        out.append("")
+        out.append(f"**Module:** `{module_path}`")
+
+        file_path = module_to_file(module_path)
+        if file_path is None:
+            out.append("")
+            out.append(f"_(could not locate source file for module `{module_path}`)_")
+            out.append("")
+        else:
+            rel = file_path.relative_to(ROOT)
+            out.append(f"**File:** `{rel}`")
+            out.append("")
+            found = find_function_in_file(file_path, function_name)
+            if found:
+                start, end, fn_src, all_lines = found
+                out.append(f"### Original function (lines {start}-{end})")
+                out.append("")
+                if len(all_lines) > 1:
+                    line_list = ", ".join(str(line) for line in all_lines)
+                    out.append(
+                        f"> **Note:** {len(all_lines)} functions named "
+                        f"`{function_name}` are defined in this file at lines "
+                        f"{line_list}. Showing the first match. mutmut's "
+                        f"mutant identifier does not carry class context, so "
+                        f"the body below may not correspond to the function "
+                        f"that was actually mutated — verify manually before "
+                        f"writing the killing test."
+                    )
+                    out.append("")
+                out.append("```python")
+                out.append(fn_src)
+                out.append("```")
+                out.append("")
+            else:
+                out.append(f"_(could not locate `{function_name}` in {rel} via AST)_")
+                out.append("")
+
+        out.append(f"### Surviving mutations ({len(items)})")
+        out.append("")
+        for i, (mutant_name, mutant_num) in enumerate(items, 1):
+            out.append(f"#### Mutation {i} of {len(items)} — `{mutant_name}`")
+            out.append("")
+            meta_style = render_meta_style_mutant(
+                module_path, function_name, mutant_num
+            )
+            if meta_style is not None:
+                out.append(
+                    "Mutated function (the bug is delimited by "
+                    "`# MUTANT START` / `# MUTANT END`):"
+                )
+                out.append("")
+                out.append("```python")
+                out.append(meta_style)
+                out.append("```")
+                out.append("")
+                out.append("<details><summary>Unified diff (`mutmut show`)</summary>")
+                out.append("")
+                out.append("```diff")
+                out.append(get_mutmut_show(mutant_name))
+                out.append("```")
+                out.append("")
+                out.append("</details>")
+                out.append("")
+            else:
+                # Fallback: trampoline file or function lookup failed.
+                out.append("```diff")
+                out.append(get_mutmut_show(mutant_name))
+                out.append("```")
+                out.append("")
+
+    test_files = collect_test_files(config.get("tests_dir", []))
+    if test_files:
+        out.append("## Existing tests")
+        out.append("")
+        out.append(
+            "These are the test files that mutmut considered when classifying the "
+            "mutants above. New tests should be added here, matching existing "
+            "conventions, fixtures, and naming."
+        )
+        out.append("")
+        for tf in test_files:
+            rel = tf.relative_to(ROOT)
+            out.append(f"### `{rel}`")
+            out.append("")
+            out.append("```python")
+            out.append(tf.read_text())
+            out.append("```")
+            out.append("")
+
+    out.append("## Task")
+    out.append("")
+    out.append(
+        dedent(
+            """\
+            For each surviving mutant listed above, write a new test in the
+            existing test file (matching its conventions, fixtures, and naming
+            style) that:
+
+            - **Fails** when the mutated version of the function is in place.
+            - **Passes** when the original (correct) version is in place.
+
+            Aim for one test per surviving mutant. If multiple mutants in the
+            same function can be killed by a single test, that is fine — note
+            which mutant numbers in the test name or docstring.
+
+            Do not modify the source file. Only add tests.
+            """
+        ).strip()
+    )
+    out.append("")
+
+    return "\n".join(out)
+
+
+def main() -> int:
+    config = load_mutmut_config()
+
+    stats_file = ROOT / "mutants" / "mutmut-cicd-stats.json"
+    stats: dict | None = None
+    if stats_file.exists():
+        try:
+            stats = json.loads(stats_file.read_text())
+        except json.JSONDecodeError as exc:
+            print(f"warning: could not parse {stats_file}: {exc}", file=sys.stderr)
+
+    survivors = get_survivors()
+    report = render(config, survivors, stats)
+
+    out_path = ROOT / "mutation-report.md"
+    out_path.write_text(report)
+    print(
+        f"Wrote {out_path} ({len(survivors)} survivor"
+        f"{'s' if len(survivors) != 1 else ''}, {len(report)} chars)"
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())