diff --git a/litellm/litellm_core_utils/cli_token_utils.py b/litellm/litellm_core_utils/cli_token_utils.py index 3776d27691..eb01359cdc 100644 --- a/litellm/litellm_core_utils/cli_token_utils.py +++ b/litellm/litellm_core_utils/cli_token_utils.py @@ -37,7 +37,7 @@ def get_litellm_gateway_api_key( """ Get the stored CLI API key for use with LiteLLM SDK. - This function reads the token file created by `litellm-proxy login` + This function reads the token file created by `lite login` and returns the API key for use in Python scripts. Args: diff --git a/litellm/proxy/client/README.md b/litellm/proxy/client/README.md index 9fbc6f2197..c2ce28884c 100644 --- a/litellm/proxy/client/README.md +++ b/litellm/proxy/client/README.md @@ -338,9 +338,9 @@ sequenceDiagram The CLI provides three authentication commands: -- **`litellm-proxy login`** - Start SSO authentication flow -- **`litellm-proxy logout`** - Clear stored authentication token -- **`litellm-proxy whoami`** - Show current authentication status +- **`lite login`** - Start SSO authentication flow +- **`lite logout`** - Clear stored authentication token +- **`lite whoami`** - Show current authentication status ### Authentication Flow Steps @@ -382,14 +382,14 @@ Once authenticated, the CLI will automatically use the stored token for all requ ```bash # Login -litellm-proxy login +lite login # Use CLI without specifying API key -litellm-proxy models list +lite models list # Check authentication status -litellm-proxy whoami +lite whoami # Logout -litellm-proxy logout +lite logout ``` diff --git a/litellm/proxy/client/cli/README.md b/litellm/proxy/client/cli/README.md index 6ef837cb52..333e2029e4 100644 --- a/litellm/proxy/client/cli/README.md +++ b/litellm/proxy/client/cli/README.md @@ -22,11 +22,11 @@ The CLI can be configured using environment variables or command-line options: Example: ```bash -litellm-proxy version +lite version # or -litellm-proxy --version +lite --version # or -litellm-proxy -v +lite -v ``` ## Commands @@ -40,7 +40,7 @@ The CLI provides several commands for managing models on your LiteLLM proxy serv View all available models: ```bash -litellm-proxy models list [--format table|json] +lite models list [--format table|json] ``` Options: @@ -52,7 +52,7 @@ Options: Get detailed information about all models: ```bash -litellm-proxy models info [options] +lite models info [options] ``` Options: @@ -75,7 +75,7 @@ Default columns: `public_model`, `upstream_model`, `updated_at` Add a new model to the proxy: ```bash -litellm-proxy models add [options] +lite models add [options] ``` Options: @@ -86,7 +86,7 @@ Options: Example: ```bash -litellm-proxy models add gpt-4 -p api_key=sk-123 -p api_base=https://api.openai.com -i description="GPT-4 model" +lite models add gpt-4 -p api_key=sk-123 -p api_base=https://api.openai.com -i description="GPT-4 model" ``` #### Get Model Info @@ -94,7 +94,7 @@ litellm-proxy models add gpt-4 -p api_key=sk-123 -p api_base=https://api.openai. Get information about a specific model: ```bash -litellm-proxy models get [--id MODEL_ID] [--name MODEL_NAME] +lite models get [--id MODEL_ID] [--name MODEL_NAME] ``` Options: @@ -107,7 +107,7 @@ Options: Delete a model from the proxy: ```bash -litellm-proxy models delete +lite models delete ``` #### Update Model @@ -115,7 +115,7 @@ litellm-proxy models delete Update an existing model's configuration: ```bash -litellm-proxy models update [options] +lite models update [options] ``` Options: @@ -128,7 +128,7 @@ Options: Import models from a YAML file: ```bash -litellm-proxy models import models.yaml +lite models import models.yaml ``` Options: @@ -142,31 +142,31 @@ Examples: 1. Import all models from a YAML file: ```bash -litellm-proxy models import models.yaml +lite models import models.yaml ``` 2. Dry run (show what would be imported): ```bash -litellm-proxy models import models.yaml --dry-run +lite models import models.yaml --dry-run ``` 3. Only import models where the model name contains 'gpt': ```bash -litellm-proxy models import models.yaml --only-models-matching-regex gpt +lite models import models.yaml --only-models-matching-regex gpt ``` 4. Only import models with access group containing 'beta': ```bash -litellm-proxy models import models.yaml --only-access-groups-matching-regex beta +lite models import models.yaml --only-access-groups-matching-regex beta ``` 5. Combine both filters: ```bash -litellm-proxy models import models.yaml --only-models-matching-regex gpt --only-access-groups-matching-regex beta +lite models import models.yaml --only-models-matching-regex gpt --only-access-groups-matching-regex beta ``` ### Credentials Management @@ -178,7 +178,7 @@ The CLI provides commands for managing credentials on your LiteLLM proxy server: View all available credentials: ```bash -litellm-proxy credentials list [--format table|json] +lite credentials list [--format table|json] ``` Options: @@ -194,7 +194,7 @@ The table format displays: Create a new credential: ```bash -litellm-proxy credentials create --info --values +lite credentials create --info --values ``` Options: @@ -205,7 +205,7 @@ Options: Example: ```bash -litellm-proxy credentials create azure-cred \ +lite credentials create azure-cred \ --info '{"custom_llm_provider": "azure"}' \ --values '{"api_key": "sk-123", "api_base": "https://example.azure.openai.com"}' ``` @@ -215,7 +215,7 @@ litellm-proxy credentials create azure-cred \ Get information about a specific credential: ```bash -litellm-proxy credentials get +lite credentials get ``` #### Delete Credential @@ -223,7 +223,7 @@ litellm-proxy credentials get Delete a credential: ```bash -litellm-proxy credentials delete +lite credentials delete ``` ### Keys Management @@ -235,7 +235,7 @@ The CLI provides commands for managing API keys on your LiteLLM proxy server: View all API keys: ```bash -litellm-proxy keys list [--format table|json] [options] +lite keys list [--format table|json] [options] ``` Options: @@ -256,7 +256,7 @@ Options: Generate a new API key: ```bash -litellm-proxy keys generate [options] +lite keys generate [options] ``` Options: @@ -274,7 +274,7 @@ Options: Example: ```bash -litellm-proxy keys generate --models gpt-4,gpt-3.5-turbo --spend 100 --duration 24h --key-alias my-key --team-id team123 +lite keys generate --models gpt-4,gpt-3.5-turbo --spend 100 --duration 24h --key-alias my-key --team-id team123 ``` #### Delete Keys @@ -282,7 +282,7 @@ litellm-proxy keys generate --models gpt-4,gpt-3.5-turbo --spend 100 --duration Delete API keys by key or alias: ```bash -litellm-proxy keys delete [--keys ] [--key-aliases ] +lite keys delete [--keys ] [--key-aliases ] ``` Options: @@ -293,7 +293,7 @@ Options: Example: ```bash -litellm-proxy keys delete --keys sk-key1,sk-key2 --key-aliases alias1,alias2 +lite keys delete --keys sk-key1,sk-key2 --key-aliases alias1,alias2 ``` #### Get Key Info @@ -301,7 +301,7 @@ litellm-proxy keys delete --keys sk-key1,sk-key2 --key-aliases alias1,alias2 Get information about a specific API key: ```bash -litellm-proxy keys info --key +lite keys info --key ``` Options: @@ -311,7 +311,7 @@ Options: Example: ```bash -litellm-proxy keys info --key sk-key1 +lite keys info --key sk-key1 ``` ### User Management @@ -323,7 +323,7 @@ The CLI provides commands for managing users on your LiteLLM proxy server: View all users: ```bash -litellm-proxy users list +lite users list ``` #### Get User Info @@ -331,7 +331,7 @@ litellm-proxy users list Get information about a specific user: ```bash -litellm-proxy users get --id +lite users get --id ``` #### Create User @@ -339,7 +339,7 @@ litellm-proxy users get --id Create a new user: ```bash -litellm-proxy users create --email user@example.com --role internal_user --alias "Alice" --team team1 --max-budget 100.0 +lite users create --email user@example.com --role internal_user --alias "Alice" --team team1 --max-budget 100.0 ``` #### Delete User @@ -347,7 +347,7 @@ litellm-proxy users create --email user@example.com --role internal_user --alias Delete one or more users by user_id: ```bash -litellm-proxy users delete +lite users delete ``` ### Chat Commands @@ -359,7 +359,7 @@ The CLI provides commands for interacting with chat models through your LiteLLM Create a chat completion: ```bash -litellm-proxy chat completions [options] +lite chat completions [options] ``` Arguments: @@ -379,12 +379,12 @@ Examples: 1. Simple completion: ```bash -litellm-proxy chat completions gpt-4 -m "user:Hello, how are you?" +lite chat completions gpt-4 -m "user:Hello, how are you?" ``` 2. Multi-message conversation: ```bash -litellm-proxy chat completions gpt-4 \ +lite chat completions gpt-4 \ -m "system:You are a helpful assistant" \ -m "user:What's the capital of France?" \ -m "assistant:The capital of France is Paris." \ @@ -393,7 +393,7 @@ litellm-proxy chat completions gpt-4 \ 3. With generation parameters: ```bash -litellm-proxy chat completions gpt-4 \ +lite chat completions gpt-4 \ -m "user:Write a story" \ --temperature 0.7 \ --max-tokens 500 \ @@ -409,7 +409,7 @@ The CLI provides commands for making direct HTTP requests to your LiteLLM proxy Make an HTTP request to any endpoint: ```bash -litellm-proxy http request [options] +lite http request [options] ``` Arguments: @@ -425,19 +425,46 @@ Examples: 1. List models: ```bash -litellm-proxy http request GET /models +lite http request GET /models ``` 2. Create a chat completion: ```bash -litellm-proxy http request POST /chat/completions -j '{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}' +lite http request POST /chat/completions -j '{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}' ``` 3. Test connection with custom headers: ```bash -litellm-proxy http request GET /health/test_connection -H "X-Custom-Header:value" +lite http request GET /health/test_connection -H "X-Custom-Header:value" ``` +### Run a Coding Agent + +Launch a coding agent with all of its LLM traffic routed through your LiteLLM proxy. Each supported agent is its own command, so there is nothing to remember beyond the agent's name: + +```bash +lite claude +lite codex +lite opencode +``` + +Anything you type after the agent name is forwarded to it untouched, so the usual flags keep working: + +```bash +lite claude --resume +lite codex exec "summarize the repo" +``` + +Each command resolves your LiteLLM key (logging in via SSO when none is stored and you are at a terminal; otherwise it expects `LITELLM_PROXY_API_KEY` or `--api-key`), checks the key against the proxy so bad credentials fail immediately instead of deep inside the agent, exports the environment variables the agent reads, then replaces itself with the agent process. + +The right variables are picked per agent. Claude Code gets `ANTHROPIC_BASE_URL` (the proxy root, so it appends `/v1/messages`) and `ANTHROPIC_AUTH_TOKEN`, with any stray `ANTHROPIC_API_KEY` cleared so the proxy token wins. Codex and OpenCode get `OPENAI_BASE_URL` (the proxy plus `/v1`) and `OPENAI_API_KEY`. Codex ignores `OPENAI_BASE_URL`, so it is additionally pointed at the proxy through a custom provider passed as `-c` config overrides (HTTP/SSE Responses transport, since the proxy does not speak the Responses WebSocket protocol). + +Options (these belong to the wrapper, so put them before the agent's own flags): + +- `--skip-verify`: Skip the pre-launch key check (useful offline or with non-standard auth). + +To pin the model, pass the agent's own model flag (for example `lite claude --model my-proxy-model` or `lite codex -m my-proxy-model`), or export the variable the agent reads (`ANTHROPIC_MODEL` / `ANTHROPIC_SMALL_FAST_MODEL` for Claude Code); the wrapper preserves anything you already have set. Whatever model the agent ends up requesting must exist on the proxy, since requests land on the proxy's `/v1/messages` (Anthropic) or `/v1/chat/completions` and `/v1/responses` (OpenAI) endpoints. + ## Environment Variables The CLI respects the following environment variables: @@ -450,37 +477,37 @@ The CLI respects the following environment variables: 1. List all models in table format: ```bash -litellm-proxy models list +lite models list ``` 2. Add a new model with parameters: ```bash -litellm-proxy models add gpt-4 -p api_key=sk-123 -p max_tokens=2048 +lite models add gpt-4 -p api_key=sk-123 -p max_tokens=2048 ``` 3. Get model information in JSON format: ```bash -litellm-proxy models info --format json +lite models info --format json ``` 4. Update model parameters: ```bash -litellm-proxy models update model-123 -p temperature=0.7 -i description="Updated model" +lite models update model-123 -p temperature=0.7 -i description="Updated model" ``` 5. List all credentials in table format: ```bash -litellm-proxy credentials list +lite credentials list ``` 6. Create a new credential for Azure: ```bash -litellm-proxy credentials create azure-prod \ +lite credentials create azure-prod \ --info '{"custom_llm_provider": "azure"}' \ --values '{"api_key": "sk-123", "api_base": "https://prod.azure.openai.com"}' ``` @@ -488,7 +515,7 @@ litellm-proxy credentials create azure-prod \ 7. Make a custom HTTP request: ```bash -litellm-proxy http request POST /chat/completions \ +lite http request POST /chat/completions \ -j '{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}' \ -H "X-Custom-Header:value" ``` @@ -497,29 +524,29 @@ litellm-proxy http request POST /chat/completions \ ```bash # List users -litellm-proxy users list +lite users list # Get user info -litellm-proxy users get --id u1 +lite users get --id u1 # Create a user -litellm-proxy users create --email a@b.com --role internal_user --alias "Alice" --team team1 --max-budget 100.0 +lite users create --email a@b.com --role internal_user --alias "Alice" --team team1 --max-budget 100.0 # Delete users -litellm-proxy users delete u1 u2 +lite users delete u1 u2 ``` 9. Import models from a YAML file (with filters): ```bash # Only import models where the model name contains 'gpt' -litellm-proxy models import models.yaml --only-models-matching-regex gpt +lite models import models.yaml --only-models-matching-regex gpt # Only import models with access group containing 'beta' -litellm-proxy models import models.yaml --only-access-groups-matching-regex beta +lite models import models.yaml --only-access-groups-matching-regex beta # Combine both filters -litellm-proxy models import models.yaml --only-models-matching-regex gpt --only-access-groups-matching-regex beta +lite models import models.yaml --only-models-matching-regex gpt --only-access-groups-matching-regex beta ``` ## Error Handling diff --git a/litellm/proxy/client/cli/commands/agents.py b/litellm/proxy/client/cli/commands/agents.py new file mode 100644 index 0000000000..f39ffb3e86 --- /dev/null +++ b/litellm/proxy/client/cli/commands/agents.py @@ -0,0 +1,303 @@ +import os +import shutil +import sys +from typing import Callable, Dict, FrozenSet, List, Mapping, Optional, Sequence, Tuple + +import click +import requests + +from .auth import get_stored_api_key, login + +ANTHROPIC_BASE_URL_ENV = "ANTHROPIC_BASE_URL" +ANTHROPIC_AUTH_TOKEN_ENV = "ANTHROPIC_AUTH_TOKEN" +ANTHROPIC_API_KEY_ENV = "ANTHROPIC_API_KEY" +OPENAI_BASE_URL_ENV = "OPENAI_BASE_URL" +OPENAI_API_KEY_ENV = "OPENAI_API_KEY" + +PROFILE_ANTHROPIC = "anthropic" +PROFILE_OPENAI = "openai" + +_KNOWN_AGENTS: Dict[str, Tuple[str, FrozenSet[str]]] = { + "claude": ("Claude Code", frozenset({PROFILE_ANTHROPIC})), + "codex": ("Codex", frozenset({PROFILE_OPENAI})), + "opencode": ("OpenCode", frozenset({PROFILE_OPENAI})), +} + +_INSTALL_DOCS: Dict[str, str] = { + "claude": "https://docs.claude.com/en/docs/claude-code/setup", + "codex": "https://developers.openai.com/codex/cli", + "opencode": "https://opencode.ai/docs", +} + +CODEX_PROXY_PROVIDER = "litellm" + + +class AgentRunError(Exception): + """Raised for any user-actionable failure while preparing to run an agent.""" + + +def agent_profile(command: str) -> Tuple[str, FrozenSet[str]]: + """Return the (display name, env profiles) for a wrapped command. + + Known agents map to the API family they speak. Anything else gets both + families so it works regardless of which env vars the tool reads. + """ + base = os.path.basename(command) + if base in _KNOWN_AGENTS: + return _KNOWN_AGENTS[base] + return base, frozenset({PROFILE_ANTHROPIC, PROFILE_OPENAI}) + + +def build_agent_env( + base_env: Mapping[str, str], + base_url: str, + api_key: str, + profiles: FrozenSet[str], +) -> Dict[str, str]: + """Return a copy of base_env wired to route the agent through the proxy. + + Anthropic clients (Claude Code) append /v1/messages to ANTHROPIC_BASE_URL, + so it stays the bare proxy root; OpenAI clients (Codex, OpenCode) expect the + /v1 suffix on OPENAI_BASE_URL. ANTHROPIC_API_KEY is dropped so a stray + Anthropic key cannot win over the bearer token we set. + """ + env = dict(base_env) + root = base_url.rstrip("/") + if PROFILE_ANTHROPIC in profiles: + env[ANTHROPIC_BASE_URL_ENV] = root + env[ANTHROPIC_AUTH_TOKEN_ENV] = api_key + env.pop(ANTHROPIC_API_KEY_ENV, None) + if PROFILE_OPENAI in profiles: + env[OPENAI_BASE_URL_ENV] = root + "/v1" + env[OPENAI_API_KEY_ENV] = api_key + return env + + +def _codex_proxy_args(base_url: str) -> List[str]: + """Codex `-c` overrides that point it at the proxy. + + Codex ignores OPENAI_BASE_URL (it always dials api.openai.com), so the env + profile alone cannot route it. It does honor a custom provider, so define one + inline; supports_websockets=false forces the HTTP/SSE Responses transport + because the proxy does not speak the Responses WebSocket protocol. The key is + read from OPENAI_API_KEY, which build_agent_env already exports. + """ + root = base_url.rstrip("/") + "/v1" + provider = f"model_providers.{CODEX_PROXY_PROVIDER}" + return [ + "-c", + f'model_provider="{CODEX_PROXY_PROVIDER}"', + "-c", + f'{provider}.name="LiteLLM proxy"', + "-c", + f'{provider}.base_url="{root}"', + "-c", + f'{provider}.env_key="{OPENAI_API_KEY_ENV}"', + "-c", + f'{provider}.wire_api="responses"', + "-c", + f"{provider}.supports_websockets=false", + ] + + +_PROXY_ARGS: Dict[str, Callable[[str], List[str]]] = { + "codex": _codex_proxy_args, +} + + +def agent_launch_args(command: str, base_url: str) -> List[str]: + """Extra CLI args an agent needs to actually honor the proxy. + + Claude Code and OpenCode respect the exported env vars, so they get nothing + here; Codex needs its provider pointed via config overrides. + """ + builder = _PROXY_ARGS.get(os.path.basename(command)) + return builder(base_url) if builder else [] + + +def verify_proxy_key( + base_url: str, + api_key: str, + *, + get: Callable[..., requests.Response] = requests.get, +) -> None: + """Probe the proxy with the key so bad creds fail here, not inside the agent. + + Raises AgentRunError when the proxy is unreachable or rejects the key. Other + non-2xx responses are tolerated; the agent's own call is the real test. + """ + url = base_url.rstrip("/") + "/v1/models" + try: + resp = get(url, headers={"Authorization": f"Bearer {api_key}"}, timeout=10) + except requests.RequestException as e: + raise AgentRunError( + f"Could not reach the LiteLLM proxy at {base_url.rstrip('/')}: {e}. " + "Is it running, and is --base-url (or LITELLM_PROXY_URL) correct?" + ) + if resp.status_code in (401, 403): + raise AgentRunError( + f"LiteLLM rejected your key (HTTP {resp.status_code}). " + "Run `lite login` to refresh it, or pass a valid --api-key." + ) + + +def _exec(path: str, args: Sequence[str], env: Mapping[str, str]) -> None: + os.execvpe(path, list(args), dict(env)) + + +def _restore_controlling_terminal() -> None: + """Reattach the controlling terminal to stdin before handing off to the agent. + + Completing the browser SSO login can leave stdin detached from the terminal, + which makes a TUI agent like Claude Code start in non-interactive mode and + exit immediately. Reopening /dev/tty onto fd 0 gives the agent a live + terminal; when stdin is still a tty (no login happened) this is a no-op. + """ + if sys.stdin.isatty(): + return + try: + fd = os.open("/dev/tty", os.O_RDONLY) + except OSError: + return + try: + os.dup2(fd, 0) + finally: + os.close(fd) + + +def run_agent( + base_url: str, + api_key: str, + command: Sequence[str], + *, + skip_verify: bool = False, + base_env: Optional[Mapping[str, str]] = None, + which: Callable[[str], Optional[str]] = shutil.which, + verify: Callable[[str, str], None] = verify_proxy_key, + launcher: Callable[[str, Sequence[str], Mapping[str, str]], None] = _exec, + reattach_terminal: Optional[Callable[[], None]] = None, +) -> None: + """Validate, wire the environment, and hand off to the agent. + + On success this replaces the current process and never returns. Raises + AgentRunError for missing binaries, an unreachable proxy, or a rejected key. + reattach_terminal, when given, runs just before handoff to restore stdin. + """ + if not command: + raise AgentRunError("Nothing to run.") + + _, profiles = agent_profile(command[0]) + binary = which(command[0]) + if binary is None: + docs = _INSTALL_DOCS.get(os.path.basename(command[0])) + hint = f" Install it first: {docs}" if docs else "" + raise AgentRunError(f"Could not find `{command[0]}` on your PATH.{hint}") + + if not skip_verify: + verify(base_url, api_key) + + env = build_agent_env( + base_env if base_env is not None else os.environ, + base_url, + api_key, + profiles, + ) + extra_args = agent_launch_args(command[0], base_url) + if reattach_terminal is not None: + reattach_terminal() + launcher(binary, [command[0], *extra_args, *command[1:]], env) + + +def _is_interactive() -> bool: + return sys.stdin.isatty() + + +def _resolve_api_key(ctx: click.Context) -> str: + base_url = ctx.obj["base_url"] + api_key = ctx.obj.get("api_key") + if api_key: + return api_key + + if not _is_interactive(): + raise click.ClickException( + "No LiteLLM key found. Set LITELLM_PROXY_API_KEY (or pass --api-key) for " + "non-interactive use, or run `lite login` from a terminal." + ) + + click.echo("No LiteLLM credentials found; starting login...") + ctx.invoke(login) + api_key = get_stored_api_key(expected_base_url=base_url) + if not api_key: + raise click.ClickException( + "Login did not produce an API key; cannot start the agent." + ) + return api_key + + +_SKIP_VERIFY_HELP = "Skip the pre-launch key check against the proxy." + + +def _launch( + ctx: click.Context, binary: str, args: Sequence[str], *, skip_verify: bool +) -> None: + base_url = ctx.obj["base_url"] + started_interactive = _is_interactive() + api_key = _resolve_api_key(ctx) + + display_name, _ = agent_profile(binary) + click.echo( + f"litellm: routing {display_name} through proxy at {base_url.rstrip('/')}" + ) + + try: + run_agent( + base_url, + api_key, + [binary, *args], + skip_verify=skip_verify, + reattach_terminal=( + _restore_controlling_terminal if started_interactive else None + ), + ) + except AgentRunError as e: + raise click.ClickException(str(e)) + + +def _make_agent_command(binary: str, display_name: str) -> click.Command: + @click.command( + name=binary, + context_settings={"ignore_unknown_options": True}, + short_help=f"Run {display_name} through your LiteLLM proxy", + ) + @click.option("--skip-verify", is_flag=True, default=False, help=_SKIP_VERIFY_HELP) + @click.argument("args", nargs=-1, type=click.UNPROCESSED) + @click.pass_context + def _command(ctx: click.Context, skip_verify: bool, args: Sequence[str]) -> None: + _launch(ctx, binary, list(args), skip_verify=skip_verify) + + _command.help = ( + f"Run {display_name} routed through your LiteLLM proxy.\n\n" + f"Logs in with LiteLLM if needed, verifies your key against the proxy, " + f"exports the env vars {binary} reads, then hands off. Any arguments are " + f"forwarded to `{binary}`." + ) + return _command + + +def agent_commands() -> List[click.Command]: + """Build one top-level command per known agent, e.g. `lite claude`.""" + return [ + _make_agent_command(binary, name) + for binary, (name, _profiles) in _KNOWN_AGENTS.items() + ] + + +__all__ = [ + "agent_commands", + "run_agent", + "build_agent_env", + "agent_launch_args", + "verify_proxy_key", + "agent_profile", + "AgentRunError", +] diff --git a/litellm/proxy/client/cli/commands/auth.py b/litellm/proxy/client/cli/commands/auth.py index 447837c35e..b06d86d596 100644 --- a/litellm/proxy/client/cli/commands/auth.py +++ b/litellm/proxy/client/cli/commands/auth.py @@ -624,7 +624,7 @@ def whoami(): token_data = load_token() if not token_data: - click.echo("❌ Not authenticated. Run 'litellm-proxy login' to authenticate.") + click.echo("❌ Not authenticated. Run 'lite login' to authenticate.") return click.echo("✅ Authenticated") diff --git a/litellm/proxy/client/cli/commands/chat.py b/litellm/proxy/client/cli/commands/chat.py index a078b76610..696e34c3ec 100644 --- a/litellm/proxy/client/cli/commands/chat.py +++ b/litellm/proxy/client/cli/commands/chat.py @@ -122,13 +122,13 @@ def chat( Examples: # Chat with a specific model - litellm-proxy chat gpt-4 + lite chat gpt-4 # Chat without specifying model (will show model selection) - litellm-proxy chat + lite chat # Chat with custom settings - litellm-proxy chat gpt-4 --temperature 0.9 --system "You are a helpful coding assistant" + lite chat gpt-4 --temperature 0.9 --system "You are a helpful coding assistant" """ console = Console() diff --git a/litellm/proxy/client/cli/interface.py b/litellm/proxy/client/cli/interface.py index eba693dc18..a32d60aadd 100644 --- a/litellm/proxy/client/cli/interface.py +++ b/litellm/proxy/client/cli/interface.py @@ -80,6 +80,8 @@ def styled_prompt(): def show_commands(): """Display available commands.""" + from .commands.agents import agent_commands + commands = [ ("login", "Authenticate with the LiteLLM proxy server"), ("logout", "Clear stored authentication"), @@ -91,6 +93,9 @@ def show_commands(): ("keys", "Manage API keys"), ("teams", "Manage teams and team assignments"), ("users", "Manage users"), + ] + commands += [(c.name, c.get_short_help_str()) for c in agent_commands()] + commands += [ ("version", "Show version information"), ("help", "Show this help message"), ("quit", "Exit the interactive session"), @@ -156,7 +161,7 @@ def execute_command(user_input: str, ctx: click.Context): # Execute the command try: # Create a new argument list for click to parse - sys.argv = ["litellm-proxy"] + [command] + args + sys.argv = ["lite"] + [command] + args # Get the command object and invoke it cmd = cli.commands[command] diff --git a/litellm/proxy/client/cli/main.py b/litellm/proxy/client/cli/main.py index be55f79c06..b8c483f4b0 100644 --- a/litellm/proxy/client/cli/main.py +++ b/litellm/proxy/client/cli/main.py @@ -7,6 +7,7 @@ import click from litellm._version import version as litellm_version from litellm.proxy.client.health import HealthManagementClient +from .commands.agents import agent_commands from .commands.auth import get_stored_api_key, login, logout, whoami from .commands.chat import chat from .commands.credentials import credentials @@ -112,6 +113,9 @@ cli.add_command(keys) cli.add_command(teams) # Add the users command group cli.add_command(users) +# Add a top-level command per coding agent (claude, codex, opencode, ...) +for agent_command in agent_commands(): + cli.add_command(agent_command) if __name__ == "__main__": diff --git a/litellm/proxy/common_utils/html_forms/cli_sso_success.py b/litellm/proxy/common_utils/html_forms/cli_sso_success.py index 51f0775d90..345f3ca5b4 100644 --- a/litellm/proxy/common_utils/html_forms/cli_sso_success.py +++ b/litellm/proxy/common_utils/html_forms/cli_sso_success.py @@ -135,7 +135,7 @@ def render_cli_sso_success_page() -> str: font-size: 14px; }} - .countdown {{ + .status {{ color: #64748b; font-size: 14px; font-weight: 500; @@ -183,23 +183,11 @@ def render_cli_sso_success_page() -> str:

You can now use LiteLLM CLI commands with your authenticated session.

-
This window will close in 3 seconds...
+
You can now close this window and return to your terminal.
- + diff --git a/packaging/homebrew/README.md b/packaging/homebrew/README.md new file mode 100644 index 0000000000..ef441ded30 --- /dev/null +++ b/packaging/homebrew/README.md @@ -0,0 +1,27 @@ +# Homebrew formula for the `lite` CLI + +[`lite.rb`](./lite.rb) is the canonical source for the Homebrew formula that installs the thin LiteLLM CLI (`litellm[cli]`). It lives here so it is versioned with the code, but Homebrew serves formulae from a tap, so it has to be published to the `BerriAI/homebrew-litellm` tap to be installable. + +Once published, end users install with + +```shell +brew install BerriAI/litellm/lite +``` + +which gives them the `lite` command (`lite login`, `lite claude`, `lite models list`, ...) without the proxy server runtime. For the full proxy server, they keep using pip/uv with `litellm[proxy]` or the Docker image. + +## Why a tap and not homebrew-core + +The formula builds the published `litellm` sdist with the `cli` extra and resolves that extra's dependencies from PyPI at build time. homebrew-core forbids network access during `install` and would require every transitive dependency declared as a pinned `resource`, regenerated on each release. For a fast-moving CLI that tradeoff is not worth it, so this stays a tap formula. + +## Release runbook + +The formula can only point at a published artifact, so it activates with the first `litellm` release that ships the `cli` extra (added in [pyproject.toml](../../pyproject.toml)). + +1. Cut a `litellm` release whose `pyproject.toml` includes the `cli` extra and confirm it is on PyPI. +2. Fetch the sdist URL and checksum for that version: `curl -fsSL https://pypi.org/pypi/litellm//json | jq -r '.urls[] | select(.packagetype=="sdist") | "\(.url)\n\(.digests.sha256)"'` +3. Set `url` and `sha256` in `lite.rb` to those values; `version` is parsed from `url`. +4. Copy `lite.rb` into the tap repo under `Formula/lite.rb`, then run `brew install --build-from-source ./Formula/lite.rb` and `brew test lite` to verify a clean build and that `lite --help` works. +5. Commit and push to `BerriAI/homebrew-litellm`. + +Keep `lite.rb` here in sync with the tap copy so the in-repo formula stays the source of truth. diff --git a/packaging/homebrew/lite.rb b/packaging/homebrew/lite.rb new file mode 100644 index 0000000000..d0d61bb5b4 --- /dev/null +++ b/packaging/homebrew/lite.rb @@ -0,0 +1,33 @@ +# Homebrew formula for the thin LiteLLM `lite` CLI (litellm[cli]). +# +# Ships in the BerriAI/homebrew-litellm tap, not homebrew-core: it builds the +# published litellm sdist with the `cli` extra into a dedicated virtualenv and +# pulls the extra's deps from PyPI. That is the low-maintenance path for a +# fast-moving Python CLI; the resource-stanza alternative would need every +# transitive dep re-pinned with a fresh sha256 on each release. +# +# RELEASE STEP (see README.md in this directory): point `url` + `sha256` at the +# PyPI sdist of the first litellm version that ships the `cli` extra. `version` +# is parsed from `url`, and the build installs exactly that version, so the three +# stay in lockstep automatically. +class Lite < Formula + include Language::Python::Virtualenv + + desc "Thin client for the LiteLLM proxy: lite login, lite claude/codex/opencode" + homepage "https://docs.litellm.ai/docs/proxy/management_cli" + url "https://files.pythonhosted.org/packages/source/l/litellm/litellm-REPLACE_AT_RELEASE.tar.gz" + sha256 "REPLACE_AT_RELEASE" + license "MIT" + + depends_on "python@3.13" + + def install + virtualenv_create(libexec, "python3.13") + system libexec/"bin/pip", "install", "#{buildpath}[cli]" + bin.install_symlink libexec/"bin/lite" + end + + test do + assert_match "login", shell_output("#{bin}/lite --help") + end +end diff --git a/pyproject.toml b/pyproject.toml index 28e6f48dc4..b9d76379fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,14 @@ proxy = [ "pyroscope-io>=0.8.16,<1.0; sys_platform != 'win32'", "pydantic-settings>=2.14.1,<3.0", ] +# Thin client install for the `lite` CLI on developer laptops. The CLI's heavy +# imports (fastapi, cryptography, ...) are all guarded, so it runs on the base +# SDK plus just these three; none of the server runtime in `proxy` is pulled in. +cli = [ + "rich>=13.9.4,<14.0", + "pyyaml>=6.0.3,<7.0", + "requests>=2.32.0,<3.0", +] extra_proxy = [ "prisma>=0.11.0,<1.0", "azure-identity>=1.25.2,<2.0", @@ -132,6 +140,7 @@ proxy-runtime = [ [project.scripts] litellm = "litellm:run_server" +lite = "litellm.proxy.client.cli:cli" litellm-proxy = "litellm.proxy.client.cli:cli" [dependency-groups] diff --git a/scripts/install-cli.sh b/scripts/install-cli.sh new file mode 100755 index 0000000000..d147286fca --- /dev/null +++ b/scripts/install-cli.sh @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +# LiteLLM CLI Installer (the thin `lite` client) +# Usage: curl -fsSL https://raw.githubusercontent.com/BerriAI/litellm/main/scripts/install-cli.sh | sh +# +# Installs only litellm[cli]: the `lite` command for authenticating to a LiteLLM +# proxy and running coding agents (lite claude / codex / opencode) through it. +# None of the proxy server runtime is pulled in. To run a proxy server instead, +# use scripts/install.sh, which installs litellm[proxy]. +# +# Needs only curl: uv is bootstrapped if missing, and uv provisions a compatible +# Python itself (honouring litellm's requires-python), downloading a managed one +# when the host has no suitable interpreter. +# +# NOTE: set -e without pipefail for POSIX sh compatibility (dash on Ubuntu/Debian +# ignores the shebang when invoked as `sh` and does not support `pipefail`). +set -eu + +# NOTE: before merging, this must stay as "litellm[cli]" to install from PyPI. +LITELLM_PACKAGE="litellm[cli]" +UV_VERSION="0.10.9" + +# ── colours ──────────────────────────────────────────────────────────────── +if [ -t 1 ]; then + BOLD='\033[1m' + GREEN='\033[38;2;78;186;101m' + GREY='\033[38;2;153;153;153m' + RESET='\033[0m' +else + BOLD='' GREEN='' GREY='' RESET='' +fi + +info() { printf "${GREY} %s${RESET}\n" "$*"; } +success() { printf "${GREEN} ✔ %s${RESET}\n" "$*"; } +header() { printf "${BOLD} %s${RESET}\n" "$*"; } +die() { printf "\n Error: %s\n\n" "$*" >&2; exit 1; } + +# ── banner ───────────────────────────────────────────────────────────────── +echo "" +cat << 'EOF' + ██╗ ██╗████████╗███████╗ + ██║ ██║╚══██╔══╝██╔════╝ + ██║ ██║ ██║ █████╗ + ██║ ██║ ██║ ██╔══╝ + ███████╗██║ ██║ ███████╗ + ╚══════╝╚═╝ ╚═╝ ╚══════╝ +EOF +printf " ${BOLD}LiteLLM CLI Installer${RESET} ${GREY}the thin 'lite' client for your proxy${RESET}\n\n" + +# ── OS detection ─────────────────────────────────────────────────────────── +OS="$(uname -s)" +ARCH="$(uname -m)" + +case "$OS" in + Darwin) PLATFORM="macOS ($ARCH)" ;; + Linux) PLATFORM="Linux ($ARCH)" ;; + *) die "Unsupported OS: $OS. LiteLLM supports macOS and Linux." ;; +esac + +info "Platform: $PLATFORM" + +# ── uv detection / install ──────────────────────────────────────────────── +UV_BIN="" +CURRENT_UV_VERSION="" +for candidate in uv "$HOME/.local/bin/uv"; do + if command -v "$candidate" >/dev/null 2>&1; then + UV_BIN="$(command -v "$candidate")" + break + elif [ -x "$candidate" ]; then + UV_BIN="$candidate" + break + fi +done + +if [ -n "$UV_BIN" ]; then + CURRENT_UV_VERSION="$("$UV_BIN" --version 2>/dev/null | awk '{print $2}' | head -1 || true)" +fi + +if [ -z "$UV_BIN" ] || [ "${CURRENT_UV_VERSION:-}" != "$UV_VERSION" ]; then + header "Installing uv…" + if [ -n "${CURRENT_UV_VERSION:-}" ]; then + info "Upgrading uv from ${CURRENT_UV_VERSION} to ${UV_VERSION}" + fi + curl -LsSf "https://astral.sh/uv/${UV_VERSION}/install.sh" | env UV_NO_MODIFY_PATH=1 sh \ + || die "uv installation failed. Try manually: curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh" + UV_BIN="$HOME/.local/bin/uv" +fi + +# ── install ──────────────────────────────────────────────────────────────── +# --python-preference system: reuse a compatible system Python when present, +# otherwise download a managed one. Either way uv honours litellm's requires-python, +# so a too-old (3.9) or too-new (3.14+) system Python is skipped, not forced. +echo "" +header "Installing litellm[cli]…" +echo "" + +"$UV_BIN" tool install --python-preference system --force "${LITELLM_PACKAGE}" \ + || die "uv tool install failed. Try manually: $UV_BIN tool install '${LITELLM_PACKAGE}'" + +# ── find the lite binary installed by uv tool ────────────────────────────── +SCRIPTS_DIR="$("$UV_BIN" tool dir --bin)" +LITE_BIN="${SCRIPTS_DIR}/lite" + +if [ ! -x "$LITE_BIN" ]; then + die "lite binary not found after install. Try: $UV_BIN tool install '${LITELLM_PACKAGE}'" +fi + +# ── success banner ───────────────────────────────────────────────────────── +echo "" +success "LiteLLM CLI installed" + +installed_ver="$("$LITE_BIN" --version 2>&1 | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || true)" +[ -n "$installed_ver" ] && info "Version: $installed_ver" + +# ── PATH hint ────────────────────────────────────────────────────────────── +if ! command -v lite >/dev/null 2>&1; then + info "Note: add lite to your PATH: export PATH=\"\$PATH:${SCRIPTS_DIR}\"" +fi + +# ── next steps ───────────────────────────────────────────────────────────── +echo "" +header "Next steps:" +echo "" +info " export LITELLM_PROXY_URL=https://your-proxy # point at your gateway" +info " lite login # authenticate via SSO" +info " lite claude # run Claude Code through the proxy" +echo "" +info "Docs: https://docs.litellm.ai/docs/proxy/management_cli" +echo "" diff --git a/scripts/install.sh b/scripts/install.sh index c28d7da872..06e6249c9b 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -2,13 +2,13 @@ # LiteLLM Installer # Usage: curl -fsSL https://raw.githubusercontent.com/BerriAI/litellm/main/scripts/install.sh | sh # +# Needs only curl: uv is bootstrapped if missing, and uv provisions a compatible +# Python itself (reusing a suitable system one, else downloading a managed build). +# # NOTE: set -e without pipefail for POSIX sh compatibility (dash on Ubuntu/Debian # ignores the shebang when invoked as `sh` and does not support `pipefail`). set -eu -MIN_PYTHON_MAJOR=3 -MIN_PYTHON_MINOR=9 - # NOTE: before merging, this must stay as "litellm[proxy]" to install from PyPI. LITELLM_PACKAGE="litellm[proxy]" UV_VERSION="0.10.9" @@ -52,27 +52,6 @@ esac info "Platform: $PLATFORM" -# ── Python detection ─────────────────────────────────────────────────────── -PYTHON_BIN="" -for candidate in python3 python; do - if command -v "$candidate" >/dev/null 2>&1; then - major="$("$candidate" -c 'import sys; print(sys.version_info.major)' 2>/dev/null || true)" - minor="$("$candidate" -c 'import sys; print(sys.version_info.minor)' 2>/dev/null || true)" - if [ "${major:-0}" -ge "$MIN_PYTHON_MAJOR" ] && [ "${minor:-0}" -ge "$MIN_PYTHON_MINOR" ]; then - PYTHON_BIN="$(command -v "$candidate")" - info "Python: $("$candidate" --version 2>&1)" - break - fi - fi -done - -if [ -z "$PYTHON_BIN" ]; then - die "Python ${MIN_PYTHON_MAJOR}.${MIN_PYTHON_MINOR}+ is required but not found. - Install it from https://python.org/downloads or via your package manager: - macOS: brew install python@3 - Ubuntu: sudo apt install python3" -fi - # ── uv detection / install ──────────────────────────────────────────────── UV_BIN="" CURRENT_UV_VERSION="" @@ -105,15 +84,18 @@ echo "" header "Installing litellm[proxy]…" echo "" -"$UV_BIN" tool install --python "$PYTHON_BIN" --force "${LITELLM_PACKAGE}" \ - || die "uv tool install failed. Try manually: $UV_BIN tool install --python '$PYTHON_BIN' '${LITELLM_PACKAGE}'" +# --python-preference system: reuse a compatible system Python when present, +# otherwise download a managed one. Either way uv honours litellm's requires-python, +# so a too-old (3.9) or too-new (3.14+) system Python is skipped, not forced. +"$UV_BIN" tool install --python-preference system --force "${LITELLM_PACKAGE}" \ + || die "uv tool install failed. Try manually: $UV_BIN tool install '${LITELLM_PACKAGE}'" # ── find the litellm binary installed by uv tool ─────────────────────────── SCRIPTS_DIR="$("$UV_BIN" tool dir --bin)" LITELLM_BIN="${SCRIPTS_DIR}/litellm" if [ ! -x "$LITELLM_BIN" ]; then - die "litellm binary not found after install. Try: $UV_BIN tool install --python '$PYTHON_BIN' '${LITELLM_PACKAGE}'" + die "litellm binary not found after install. Try: $UV_BIN tool install '${LITELLM_PACKAGE}'" fi # ── success banner ───────────────────────────────────────────────────────── diff --git a/tests/local_testing/test_basic_python_version.py b/tests/local_testing/test_basic_python_version.py index 8308e0d603..e31c395371 100644 --- a/tests/local_testing/test_basic_python_version.py +++ b/tests/local_testing/test_basic_python_version.py @@ -92,6 +92,56 @@ def test_package_dependencies(): ) +def test_cli_extra_is_a_thin_client_install(): + """The `cli` extra must install a working `lite` client without dragging in the + proxy server runtime. It therefore has to declare the CLI's real third-party + deps (rich, pyyaml, requests) and must never contain a server-only dependency + from the `proxy` extra; a leak there silently re-bloats the laptop install. + """ + import pathlib + + import litellm + from packaging.requirements import Requirement + + try: + import tomllib as tomli + except ImportError: + try: + import tomli + except ImportError: + pytest.skip("tomli/tomllib not available - skipping dependency check") + + pyproject_path = pathlib.Path(litellm.__file__).parent.parent / "pyproject.toml" + with open(pyproject_path, "rb") as f: + optional_deps = tomli.load(f)["project"]["optional-dependencies"] + + assert "cli" in optional_deps, "Expected a `cli` extra for the thin lite install" + + cli_names = {Requirement(req).name.lower() for req in optional_deps["cli"]} + + missing = {"rich", "pyyaml", "requests"} - cli_names + assert not missing, f"`cli` extra is missing deps the lite CLI imports: {missing}" + + server_only = { + "fastapi", + "uvicorn", + "gunicorn", + "granian", + "starlette", + "boto3", + "polars", + "soundfile", + "mcp", + "cryptography", + "apscheduler", + "rq", + "litellm-enterprise", + "litellm-proxy-extras", + } + leaked = cli_names & server_only + assert not leaked, f"`cli` extra leaks proxy-server deps onto laptops: {leaked}" + + import os import subprocess import time diff --git a/tests/local_testing/test_router_debug_logs.py b/tests/local_testing/test_router_debug_logs.py index f1c7e9d722..ad807539bf 100644 --- a/tests/local_testing/test_router_debug_logs.py +++ b/tests/local_testing/test_router_debug_logs.py @@ -82,7 +82,9 @@ def test_async_fallbacks(caplog): asyncio.run(_make_request()) captured_logs = [rec.message for rec in caplog.records] - # on circle ci the captured logs get some async task exception logs - filter them out "Task exception was never retrieved" + # on circle ci the captured logs get async cleanup noise from the gc (leaked + # task warnings, plus aiohttp "Unclosed client session"/"Unclosed connector" + # warnings from cached clients other router tests evicted) - filter it out captured_logs = [ log for log in captured_logs @@ -90,6 +92,8 @@ def test_async_fallbacks(caplog): and "Task was destroyed but it is pending" not in log and "get_available_deployment" not in log and "in the Langfuse queue" not in log + and "Unclosed client session" not in log + and "Unclosed connector" not in log ] print("\n Captured caplog records - ", captured_logs) diff --git a/tests/test_litellm/proxy/client/cli/test_agents.py b/tests/test_litellm/proxy/client/cli/test_agents.py new file mode 100644 index 0000000000..afd1696a89 --- /dev/null +++ b/tests/test_litellm/proxy/client/cli/test_agents.py @@ -0,0 +1,475 @@ +import os +import sys +from unittest.mock import patch + +import click +import pytest +import requests +from click.testing import CliRunner + +sys.path.insert( + 0, os.path.abspath("../../..") +) # Adds the parent directory to the system path + + +from litellm.proxy.client.cli.commands.agents import ( + AgentRunError, + agent_commands, + agent_launch_args, + agent_profile, + build_agent_env, + run_agent, + verify_proxy_key, +) + +AGENTS_MODULE = "litellm.proxy.client.cli.commands.agents" + + +def _agent_command(name): + return next(c for c in agent_commands() if c.name == name) + + +class _FakeResponse: + def __init__(self, status_code): + self.status_code = status_code + + +class TestAgentProfile: + def test_claude_is_anthropic(self): + name, profiles = agent_profile("claude") + assert name == "Claude Code" + assert profiles == frozenset({"anthropic"}) + + def test_claude_full_path_uses_basename(self): + name, profiles = agent_profile("/usr/local/bin/claude") + assert name == "Claude Code" + assert profiles == frozenset({"anthropic"}) + + def test_codex_and_opencode_are_openai(self): + assert agent_profile("codex") == ("Codex", frozenset({"openai"})) + assert agent_profile("opencode") == ("OpenCode", frozenset({"openai"})) + + def test_unknown_command_gets_both_profiles(self): + name, profiles = agent_profile("mytool") + assert name == "mytool" + assert profiles == frozenset({"anthropic", "openai"}) + + +class TestBuildAgentEnv: + def test_anthropic_profile_uses_bare_root_and_bearer(self): + env = build_agent_env( + {}, "http://localhost:4000/", "sk-key", frozenset({"anthropic"}) + ) + assert env["ANTHROPIC_BASE_URL"] == "http://localhost:4000" + assert env["ANTHROPIC_AUTH_TOKEN"] == "sk-key" + assert "OPENAI_BASE_URL" not in env + assert "OPENAI_API_KEY" not in env + + def test_anthropic_profile_drops_existing_api_key(self): + env = build_agent_env( + {"ANTHROPIC_API_KEY": "real-key"}, + "http://localhost:4000", + "sk-key", + frozenset({"anthropic"}), + ) + assert "ANTHROPIC_API_KEY" not in env + + def test_openai_profile_appends_v1(self): + env = build_agent_env( + {}, "http://localhost:4000/", "sk-key", frozenset({"openai"}) + ) + assert env["OPENAI_BASE_URL"] == "http://localhost:4000/v1" + assert env["OPENAI_API_KEY"] == "sk-key" + assert "ANTHROPIC_BASE_URL" not in env + + def test_both_profiles_set_everything(self): + env = build_agent_env( + {}, "http://localhost:4000", "sk-key", frozenset({"anthropic", "openai"}) + ) + assert env["ANTHROPIC_BASE_URL"] == "http://localhost:4000" + assert env["OPENAI_BASE_URL"] == "http://localhost:4000/v1" + assert env["ANTHROPIC_AUTH_TOKEN"] == "sk-key" + assert env["OPENAI_API_KEY"] == "sk-key" + + def test_preserves_unrelated_env_and_does_not_mutate_input(self): + base = {"PATH": "/usr/bin", "ANTHROPIC_API_KEY": "real-key"} + env = build_agent_env( + base, "http://localhost:4000", "sk-key", frozenset({"anthropic"}) + ) + assert env["PATH"] == "/usr/bin" + assert base == {"PATH": "/usr/bin", "ANTHROPIC_API_KEY": "real-key"} + + +class TestAgentLaunchArgs: + def test_claude_and_opencode_get_no_extra_args(self): + assert agent_launch_args("claude", "http://localhost:4000") == [] + assert agent_launch_args("opencode", "http://localhost:4000") == [] + + def test_unknown_agent_gets_no_extra_args(self): + assert agent_launch_args("mytool", "http://localhost:4000") == [] + + def test_codex_points_provider_at_proxy_over_http(self): + args = agent_launch_args("codex", "http://localhost:4000/") + joined = " ".join(args) + assert 'model_provider="litellm"' in args + assert 'model_providers.litellm.base_url="http://localhost:4000/v1"' in args + assert 'model_providers.litellm.env_key="OPENAI_API_KEY"' in args + assert 'model_providers.litellm.wire_api="responses"' in args + assert "model_providers.litellm.supports_websockets=false" in args + assert joined.count("-c") == 6 + + def test_codex_uses_basename(self): + assert agent_launch_args("/usr/local/bin/codex", "http://localhost:4000") == ( + agent_launch_args("codex", "http://localhost:4000") + ) + + +class TestVerifyProxyKey: + def test_ok_status_passes_and_uses_models_endpoint(self): + captured = {} + + def fake_get(url, headers, timeout): + captured["url"] = url + captured["headers"] = headers + return _FakeResponse(200) + + verify_proxy_key("http://localhost:4000/", "sk-key", get=fake_get) + + assert captured["url"] == "http://localhost:4000/v1/models" + assert captured["headers"] == {"Authorization": "Bearer sk-key"} + + @pytest.mark.parametrize("status", [401, 403]) + def test_rejected_key_raises(self, status): + with pytest.raises(AgentRunError, match="rejected your key"): + verify_proxy_key( + "http://localhost:4000", + "sk-key", + get=lambda *a, **k: _FakeResponse(status), + ) + + def test_unreachable_proxy_raises(self): + def boom(*a, **k): + raise requests.ConnectionError("refused") + + with pytest.raises(AgentRunError, match="Could not reach"): + verify_proxy_key("http://localhost:4000", "sk-key", get=boom) + + def test_other_non_2xx_is_tolerated(self): + verify_proxy_key( + "http://localhost:4000", + "sk-key", + get=lambda *a, **k: _FakeResponse(500), + ) + + +class TestRunAgent: + def test_wires_env_and_launches_resolved_binary(self): + calls = {} + + def fake_launcher(path, args, env): + calls["path"] = path + calls["args"] = tuple(args) + calls["env"] = dict(env) + + run_agent( + "http://localhost:4000", + "sk-key", + ["claude", "--resume"], + base_env={"PATH": "/usr/bin", "ANTHROPIC_API_KEY": "leaked"}, + which=lambda name: "/usr/local/bin/claude", + verify=lambda *a: None, + launcher=fake_launcher, + ) + + assert calls["path"] == "/usr/local/bin/claude" + assert calls["args"] == ("claude", "--resume") + env = calls["env"] + assert env["ANTHROPIC_BASE_URL"] == "http://localhost:4000" + assert env["ANTHROPIC_AUTH_TOKEN"] == "sk-key" + assert "ANTHROPIC_API_KEY" not in env + assert "OPENAI_BASE_URL" not in env + + def test_codex_gets_openai_env(self): + calls = {} + run_agent( + "http://localhost:4000", + "sk-key", + ["codex"], + base_env={}, + which=lambda name: "/usr/local/bin/codex", + verify=lambda *a: None, + launcher=lambda p, a, e: calls.update(env=dict(e)), + ) + assert calls["env"]["OPENAI_BASE_URL"] == "http://localhost:4000/v1" + assert calls["env"]["OPENAI_API_KEY"] == "sk-key" + assert "ANTHROPIC_BASE_URL" not in calls["env"] + + def test_codex_injects_proxy_provider_args_before_user_args(self): + calls = {} + run_agent( + "http://localhost:4000", + "sk-key", + ["codex", "exec", "do a thing"], + base_env={}, + which=lambda name: "/usr/local/bin/codex", + verify=lambda *a: None, + launcher=lambda p, a, e: calls.update(args=tuple(a)), + ) + args = calls["args"] + assert args[0] == "codex" + assert args[-2:] == ("exec", "do a thing") + assert 'model_provider="litellm"' in args + assert 'model_providers.litellm.base_url="http://localhost:4000/v1"' in args + # overrides must precede the codex subcommand so codex parses them + assert args.index('model_provider="litellm"') < args.index("exec") + + def test_claude_launches_without_injected_args(self): + calls = {} + run_agent( + "http://localhost:4000", + "sk-key", + ["claude", "--resume"], + base_env={}, + which=lambda name: "/usr/local/bin/claude", + verify=lambda *a: None, + launcher=lambda p, a, e: calls.update(args=tuple(a)), + ) + assert calls["args"] == ("claude", "--resume") + + def test_missing_binary_raises_with_install_hint(self): + with pytest.raises(AgentRunError, match="claude.*Install it first"): + run_agent( + "http://localhost:4000", + "sk-key", + ["claude"], + base_env={}, + which=lambda name: None, + verify=lambda *a: None, + launcher=lambda *a: None, + ) + + def test_skip_verify_does_not_call_verify(self): + verified = [] + launched = [] + run_agent( + "http://localhost:4000", + "sk-key", + ["claude"], + skip_verify=True, + base_env={}, + which=lambda name: "/usr/local/bin/claude", + verify=lambda *a: verified.append(a), + launcher=lambda *a: launched.append(a), + ) + assert verified == [] + assert len(launched) == 1 + + def test_verify_failure_aborts_before_launch(self): + launched = [] + + def boom(*a): + raise AgentRunError("rejected") + + with pytest.raises(AgentRunError): + run_agent( + "http://localhost:4000", + "sk-key", + ["claude"], + base_env={}, + which=lambda name: "/usr/local/bin/claude", + verify=boom, + launcher=lambda *a: launched.append(a), + ) + assert launched == [] + + def test_empty_command_raises(self): + with pytest.raises(AgentRunError): + run_agent("http://localhost:4000", "sk-key", []) + + def test_reattach_terminal_runs_just_before_launch(self): + order = [] + run_agent( + "http://localhost:4000", + "sk-key", + ["claude"], + skip_verify=True, + base_env={}, + which=lambda name: "/usr/local/bin/claude", + launcher=lambda *a: order.append("launch"), + reattach_terminal=lambda: order.append("reattach"), + ) + assert order == ["reattach", "launch"] + + def test_no_reattach_terminal_by_default(self): + order = [] + run_agent( + "http://localhost:4000", + "sk-key", + ["claude"], + skip_verify=True, + base_env={}, + which=lambda name: "/usr/local/bin/claude", + launcher=lambda *a: order.append("launch"), + ) + assert order == ["launch"] + + +class TestAgentCommands: + def setup_method(self): + self.runner = CliRunner() + + def test_one_command_per_known_agent(self): + assert {c.name for c in agent_commands()} == {"claude", "codex", "opencode"} + + def test_claude_launches_with_stored_key_and_forwards_args(self): + captured = {} + + def fake_run_agent(base_url, api_key, command, **kwargs): + captured["base_url"] = base_url + captured["api_key"] = api_key + captured["command"] = list(command) + captured["skip_verify"] = kwargs.get("skip_verify") + + with patch(f"{AGENTS_MODULE}.run_agent", side_effect=fake_run_agent): + result = self.runner.invoke( + _agent_command("claude"), + ["--resume", "-p", "hi"], + obj={"base_url": "http://localhost:4000", "api_key": "sk-key"}, + ) + + assert result.exit_code == 0, result.output + assert captured["api_key"] == "sk-key" + assert captured["command"] == ["claude", "--resume", "-p", "hi"] + assert captured["skip_verify"] is False + assert ( + "routing Claude Code through proxy at http://localhost:4000" + in result.output + ) + + def test_codex_shows_friendly_name(self): + captured = {} + with patch( + f"{AGENTS_MODULE}.run_agent", + side_effect=lambda b, k, c, **kw: captured.update(command=list(c)), + ): + result = self.runner.invoke( + _agent_command("codex"), + ["exec", "do a thing"], + obj={"base_url": "http://localhost:4000", "api_key": "sk-key"}, + ) + assert result.exit_code == 0, result.output + assert captured["command"] == ["codex", "exec", "do a thing"] + assert "routing Codex through proxy" in result.output + + def test_skip_verify_is_consumed_not_forwarded(self): + captured = {} + + def fake_run_agent(base_url, api_key, command, **kwargs): + captured["command"] = list(command) + captured["skip_verify"] = kwargs.get("skip_verify") + + with patch(f"{AGENTS_MODULE}.run_agent", side_effect=fake_run_agent): + result = self.runner.invoke( + _agent_command("claude"), + ["--skip-verify", "--resume"], + obj={"base_url": "http://localhost:4000", "api_key": "sk-key"}, + ) + + assert result.exit_code == 0, result.output + assert captured["skip_verify"] is True + assert captured["command"] == ["claude", "--resume"] + + def test_non_interactive_without_key_errors_clearly(self): + with ( + patch(f"{AGENTS_MODULE}._is_interactive", return_value=False), + patch(f"{AGENTS_MODULE}.run_agent") as mock_run, + ): + result = self.runner.invoke( + _agent_command("claude"), + [], + obj={"base_url": "http://localhost:4000", "api_key": None}, + ) + assert result.exit_code != 0 + assert "LITELLM_PROXY_API_KEY" in result.output + mock_run.assert_not_called() + + def test_interactive_without_key_logs_in_then_launches(self): + captured = {} + + @click.command() + def fake_login(): + pass + + with ( + patch(f"{AGENTS_MODULE}._is_interactive", return_value=True), + patch(f"{AGENTS_MODULE}.login", fake_login), + patch( + f"{AGENTS_MODULE}.get_stored_api_key", return_value="sk-after-login" + ) as mock_get, + patch( + f"{AGENTS_MODULE}.run_agent", + side_effect=lambda base_url, api_key, command, **k: captured.update( + api_key=api_key + ), + ), + ): + result = self.runner.invoke( + _agent_command("claude"), + [], + obj={"base_url": "http://localhost:4000", "api_key": None}, + ) + + assert result.exit_code == 0, result.output + assert captured["api_key"] == "sk-after-login" + mock_get.assert_called_once_with(expected_base_url="http://localhost:4000") + + def test_agent_run_error_becomes_click_error(self): + with patch( + f"{AGENTS_MODULE}.run_agent", + side_effect=AgentRunError("could not reach proxy"), + ): + result = self.runner.invoke( + _agent_command("claude"), + [], + obj={"base_url": "http://localhost:4000", "api_key": "sk-key"}, + ) + assert result.exit_code != 0 + assert "could not reach proxy" in result.output + + def test_interactive_session_reattaches_terminal_before_handoff(self): + from litellm.proxy.client.cli.commands.agents import ( + _restore_controlling_terminal, + ) + + captured = {} + with ( + patch(f"{AGENTS_MODULE}._is_interactive", return_value=True), + patch( + f"{AGENTS_MODULE}.run_agent", + side_effect=lambda b, k, c, **kw: captured.update(kw), + ), + ): + result = self.runner.invoke( + _agent_command("claude"), + [], + obj={"base_url": "http://localhost:4000", "api_key": "sk-key"}, + ) + assert result.exit_code == 0, result.output + assert captured["reattach_terminal"] is _restore_controlling_terminal + + def test_non_interactive_agent_mode_leaves_stdin_alone(self): + captured = {} + with ( + patch(f"{AGENTS_MODULE}._is_interactive", return_value=False), + patch( + f"{AGENTS_MODULE}.run_agent", + side_effect=lambda b, k, c, **kw: captured.update(kw), + ), + ): + result = self.runner.invoke( + _agent_command("claude"), + [], + obj={"base_url": "http://localhost:4000", "api_key": "sk-key"}, + ) + assert result.exit_code == 0, result.output + assert captured["reattach_terminal"] is None diff --git a/tests/test_litellm/proxy/client/cli/test_auth_commands.py b/tests/test_litellm/proxy/client/cli/test_auth_commands.py index 2e738ff900..4ee8b502aa 100644 --- a/tests/test_litellm/proxy/client/cli/test_auth_commands.py +++ b/tests/test_litellm/proxy/client/cli/test_auth_commands.py @@ -517,7 +517,7 @@ class TestWhoamiCommand: assert result.exit_code == 0 assert "❌ Not authenticated" in result.output - assert "Run 'litellm-proxy login'" in result.output + assert "Run 'lite login'" in result.output def test_whoami_old_token(self): """Test whoami with old token showing warning""" diff --git a/tests/test_litellm/proxy/management_endpoints/test_ui_sso.py b/tests/test_litellm/proxy/management_endpoints/test_ui_sso.py index c763e9c0e9..2efec3e0b3 100644 --- a/tests/test_litellm/proxy/management_endpoints/test_ui_sso.py +++ b/tests/test_litellm/proxy/management_endpoints/test_ui_sso.py @@ -1777,6 +1777,23 @@ class TestHTMLIntegration: assert isinstance(html, str) assert len(html) > 0 + def test_success_page_instructs_manual_close_without_false_countdown(self): + """Browsers refuse window.close() on tabs they did not open via window.open() + (the CLI opens the page with webbrowser.open), so a 'closing in 3...' countdown + is a promise the browser usually can't keep and the page gets stuck on + 'Closing...'. The page must instead always show the manual-close instruction + and never advertise an auto-close that won't happen. + """ + from litellm.proxy.common_utils.html_forms.cli_sso_success import ( + render_cli_sso_success_page, + ) + + html = render_cli_sso_success_page() + + assert "You can now close this window and return to your terminal." in html + assert "Closing..." not in html + assert "This window will close in" not in html + class TestCustomUISSO: """Test the custom UI SSO sign-in handler functionality""" diff --git a/uv.lock b/uv.lock index 2403a7fbf0..1100db783d 100644 --- a/uv.lock +++ b/uv.lock @@ -3297,6 +3297,11 @@ dependencies = [ caching = [ { name = "diskcache" }, ] +cli = [ + { name = "pyyaml" }, + { name = "requests" }, + { name = "rich" }, +] extra-proxy = [ { name = "a2a-sdk" }, { name = "azure-identity" }, @@ -3528,10 +3533,13 @@ requires-dist = [ { name = "pyroscope-io", marker = "sys_platform != 'win32' and extra == 'proxy'", specifier = ">=0.8.16,<1.0" }, { name = "python-dotenv", specifier = ">=1.0.0,<2.0" }, { name = "python-multipart", marker = "extra == 'proxy'", specifier = ">=0.0.27,<1.0" }, + { name = "pyyaml", marker = "extra == 'cli'", specifier = ">=6.0.3,<7.0" }, { name = "pyyaml", marker = "extra == 'proxy'", specifier = ">=6.0.3,<7.0" }, { name = "redisvl", marker = "python_full_version < '3.14' and extra == 'extra-proxy'", specifier = ">=0.4.1,<1.0" }, + { name = "requests", marker = "extra == 'cli'", specifier = ">=2.32.0,<3.0" }, { name = "resend", marker = "extra == 'extra-proxy'", specifier = ">=2.23.0,<3.0" }, { name = "restrictedpython", marker = "extra == 'proxy'", specifier = ">=8.1,<9.0" }, + { name = "rich", marker = "extra == 'cli'", specifier = ">=13.9.4,<14.0" }, { name = "rich", marker = "extra == 'proxy'", specifier = ">=13.9.4,<14.0" }, { name = "rq", marker = "extra == 'proxy'", specifier = ">=2.7.0,<3.0" }, { name = "semantic-router", marker = "python_full_version < '3.14' and extra == 'semantic-router'", specifier = ">=0.1.15,<1.0" }, @@ -3545,7 +3553,7 @@ requires-dist = [ { name = "uvloop", marker = "sys_platform != 'win32' and extra == 'proxy'", specifier = ">=0.21.0,<1.0" }, { name = "websockets", marker = "extra == 'proxy'", specifier = ">=15.0.1,<16.0" }, ] -provides-extras = ["proxy", "extra-proxy", "utils", "caching", "semantic-router", "mlflow", "grpc", "stt-nvidia-riva", "google", "proxy-runtime"] +provides-extras = ["proxy", "cli", "extra-proxy", "utils", "caching", "semantic-router", "mlflow", "grpc", "stt-nvidia-riva", "google", "proxy-runtime"] [package.metadata.requires-dev] ci = [