feat(proxy): add --timeout_worker_healthcheck flag for uvicorn worker triage

Adds a CLI flag (`--timeout_worker_healthcheck`, env `TIMEOUT_WORKER_HEALTHCHECK`)
that forwards to uvicorn's `timeout_worker_healthcheck` Config kwarg (added in
uvicorn 0.37.0). Lets operators raise the supervisor's worker-ping timeout above
the default 5s when triaging workers being killed and respawned under load.

The helper introspects `uvicorn.Config.__init__` and only sets the kwarg if
supported, otherwise prints a warning - so the existing uvicorn>=0.32.1,<1.0.0
floor pin is unaffected. Gunicorn and Hypercorn paths are unchanged (the uvicorn
supervisor isn't running there); the value is also not passed to the helper at
all on those paths so the "uvicorn too old" warning never fires spuriously.
This commit is contained in:
Ryan Crabbe 2026-04-27 11:06:56 -07:00
parent d120ddf678
commit 84527b0135
No known key found for this signature in database
2 changed files with 98 additions and 0 deletions

View File

@ -130,10 +130,15 @@ class ProxyInitializationHelpers:
port: int,
log_config: Optional[str] = None,
keepalive_timeout: Optional[int] = None,
timeout_worker_healthcheck: Optional[int] = None,
) -> dict:
"""
Get the arguments for `uvicorn` worker
"""
import inspect
import uvicorn
import litellm
from litellm._logging import _get_uvicorn_json_log_config
@ -150,6 +155,18 @@ class ProxyInitializationHelpers:
uvicorn_args["log_config"] = _get_uvicorn_json_log_config()
if keepalive_timeout is not None:
uvicorn_args["timeout_keep_alive"] = keepalive_timeout
if timeout_worker_healthcheck is not None:
if (
"timeout_worker_healthcheck"
in inspect.signature(uvicorn.Config.__init__).parameters
):
uvicorn_args["timeout_worker_healthcheck"] = timeout_worker_healthcheck
else:
print( # noqa
f"\033[1;33mLiteLLM Proxy: --timeout_worker_healthcheck "
f"requires uvicorn>=0.37.0, but installed uvicorn=={uvicorn.__version__}. "
f"Ignoring the flag.\033[0m"
)
return uvicorn_args
@staticmethod
@ -563,6 +580,17 @@ class ProxyInitializationHelpers:
help="Set the uvicorn keepalive timeout in seconds (uvicorn timeout_keep_alive parameter)",
envvar="KEEPALIVE_TIMEOUT",
)
@click.option(
"--timeout_worker_healthcheck",
default=None,
type=int,
help=(
"Set the uvicorn worker health-check timeout in seconds (uvicorn timeout_worker_healthcheck parameter). "
"Requires uvicorn>=0.37.0. Only applies when running uvicorn directly with --num_workers>1; "
"ignored under --run_gunicorn / --run_hypercorn."
),
envvar="TIMEOUT_WORKER_HEALTHCHECK",
)
@click.option(
"--max_requests_before_restart",
default=None,
@ -632,6 +660,7 @@ def run_server( # noqa: PLR0915
use_prisma_db_push: bool,
skip_server_startup,
keepalive_timeout,
timeout_worker_healthcheck,
max_requests_before_restart,
enforce_prisma_migration_check: bool,
use_v2_migration_resolver: bool,
@ -973,11 +1002,15 @@ def run_server( # noqa: PLR0915
)
return
running_uvicorn = run_gunicorn is False and run_hypercorn is False
uvicorn_args = ProxyInitializationHelpers._get_default_unvicorn_init_args(
host=host,
port=port,
log_config=log_config,
keepalive_timeout=keepalive_timeout,
timeout_worker_healthcheck=(
timeout_worker_healthcheck if running_uvicorn else None
),
)
# Optional: recycle uvicorn workers after N requests
if max_requests_before_restart is not None:

View File

@ -123,6 +123,16 @@ class TestProxyInitializationHelpers:
assert args["log_config"] == "log_config.json"
assert args["timeout_keep_alive"] == 120
class _FakeUvicornConfig:
def __init__(self, timeout_worker_healthcheck=None):
pass
with patch("uvicorn.Config", _FakeUvicornConfig):
args = ProxyInitializationHelpers._get_default_unvicorn_init_args(
"localhost", 8000, timeout_worker_healthcheck=15
)
assert args["timeout_worker_healthcheck"] == 15
@patch("asyncio.run")
@patch("builtins.print")
def test_init_hypercorn_server(self, mock_print, mock_asyncio_run):
@ -401,6 +411,7 @@ class TestProxyInitializationHelpers:
port=4000,
log_config=None,
keepalive_timeout=30,
timeout_worker_healthcheck=None,
)
mock_uvicorn_run.assert_called_once()
@ -408,6 +419,60 @@ class TestProxyInitializationHelpers:
call_args = mock_uvicorn_run.call_args
assert call_args[1]["timeout_keep_alive"] == 30
@patch("uvicorn.run")
@patch("builtins.print")
def test_timeout_worker_healthcheck_flag(self, mock_print, mock_uvicorn_run):
"""Test that the --timeout_worker_healthcheck flag is threaded through to the uvicorn init helper."""
from click.testing import CliRunner
from litellm.proxy.proxy_cli import run_server
runner = CliRunner()
mock_app = MagicMock()
mock_proxy_config = MagicMock()
mock_key_mgmt = MagicMock()
mock_save_worker_config = MagicMock()
with (
patch.dict(
"sys.modules",
{
"proxy_server": MagicMock(
app=mock_app,
ProxyConfig=mock_proxy_config,
KeyManagementSettings=mock_key_mgmt,
save_worker_config=mock_save_worker_config,
)
},
),
patch(
"litellm.proxy.proxy_cli.ProxyInitializationHelpers._get_default_unvicorn_init_args"
) as mock_get_args,
patch(
"litellm.proxy.proxy_cli.ProxyInitializationHelpers._is_port_in_use",
return_value=False,
),
):
mock_get_args.return_value = {
"app": "litellm.proxy.proxy_server:app",
"host": "localhost",
"port": 8000,
}
result = runner.invoke(
run_server, ["--local", "--timeout_worker_healthcheck", "15"]
)
assert result.exit_code == 0
mock_get_args.assert_called_once_with(
host="0.0.0.0",
port=4000,
log_config=None,
keepalive_timeout=None,
timeout_worker_healthcheck=15,
)
@patch("uvicorn.run")
@patch("builtins.print")
@patch("litellm.proxy.db.prisma_client.PrismaManager.setup_database")