From 84527b0135dfca9218e8eda5c15fca50b8c50558 Mon Sep 17 00:00:00 2001 From: Ryan Crabbe Date: Mon, 27 Apr 2026 11:06:56 -0700 Subject: [PATCH] feat(proxy): add --timeout_worker_healthcheck flag for uvicorn worker triage Adds a CLI flag (`--timeout_worker_healthcheck`, env `TIMEOUT_WORKER_HEALTHCHECK`) that forwards to uvicorn's `timeout_worker_healthcheck` Config kwarg (added in uvicorn 0.37.0). Lets operators raise the supervisor's worker-ping timeout above the default 5s when triaging workers being killed and respawned under load. The helper introspects `uvicorn.Config.__init__` and only sets the kwarg if supported, otherwise prints a warning - so the existing uvicorn>=0.32.1,<1.0.0 floor pin is unaffected. Gunicorn and Hypercorn paths are unchanged (the uvicorn supervisor isn't running there); the value is also not passed to the helper at all on those paths so the "uvicorn too old" warning never fires spuriously. --- litellm/proxy/proxy_cli.py | 33 +++++++++++ tests/test_litellm/proxy/test_proxy_cli.py | 65 ++++++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 3845203bb9..71aeea6788 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -130,10 +130,15 @@ class ProxyInitializationHelpers: port: int, log_config: Optional[str] = None, keepalive_timeout: Optional[int] = None, + timeout_worker_healthcheck: Optional[int] = None, ) -> dict: """ Get the arguments for `uvicorn` worker """ + import inspect + + import uvicorn + import litellm from litellm._logging import _get_uvicorn_json_log_config @@ -150,6 +155,18 @@ class ProxyInitializationHelpers: uvicorn_args["log_config"] = _get_uvicorn_json_log_config() if keepalive_timeout is not None: uvicorn_args["timeout_keep_alive"] = keepalive_timeout + if timeout_worker_healthcheck is not None: + if ( + "timeout_worker_healthcheck" + in inspect.signature(uvicorn.Config.__init__).parameters + ): + uvicorn_args["timeout_worker_healthcheck"] = timeout_worker_healthcheck + else: + print( # noqa + f"\033[1;33mLiteLLM Proxy: --timeout_worker_healthcheck " + f"requires uvicorn>=0.37.0, but installed uvicorn=={uvicorn.__version__}. " + f"Ignoring the flag.\033[0m" + ) return uvicorn_args @staticmethod @@ -563,6 +580,17 @@ class ProxyInitializationHelpers: help="Set the uvicorn keepalive timeout in seconds (uvicorn timeout_keep_alive parameter)", envvar="KEEPALIVE_TIMEOUT", ) +@click.option( + "--timeout_worker_healthcheck", + default=None, + type=int, + help=( + "Set the uvicorn worker health-check timeout in seconds (uvicorn timeout_worker_healthcheck parameter). " + "Requires uvicorn>=0.37.0. Only applies when running uvicorn directly with --num_workers>1; " + "ignored under --run_gunicorn / --run_hypercorn." + ), + envvar="TIMEOUT_WORKER_HEALTHCHECK", +) @click.option( "--max_requests_before_restart", default=None, @@ -632,6 +660,7 @@ def run_server( # noqa: PLR0915 use_prisma_db_push: bool, skip_server_startup, keepalive_timeout, + timeout_worker_healthcheck, max_requests_before_restart, enforce_prisma_migration_check: bool, use_v2_migration_resolver: bool, @@ -973,11 +1002,15 @@ def run_server( # noqa: PLR0915 ) return + running_uvicorn = run_gunicorn is False and run_hypercorn is False uvicorn_args = ProxyInitializationHelpers._get_default_unvicorn_init_args( host=host, port=port, log_config=log_config, keepalive_timeout=keepalive_timeout, + timeout_worker_healthcheck=( + timeout_worker_healthcheck if running_uvicorn else None + ), ) # Optional: recycle uvicorn workers after N requests if max_requests_before_restart is not None: diff --git a/tests/test_litellm/proxy/test_proxy_cli.py b/tests/test_litellm/proxy/test_proxy_cli.py index e5fcc6001d..6fbce4a545 100644 --- a/tests/test_litellm/proxy/test_proxy_cli.py +++ b/tests/test_litellm/proxy/test_proxy_cli.py @@ -123,6 +123,16 @@ class TestProxyInitializationHelpers: assert args["log_config"] == "log_config.json" assert args["timeout_keep_alive"] == 120 + class _FakeUvicornConfig: + def __init__(self, timeout_worker_healthcheck=None): + pass + + with patch("uvicorn.Config", _FakeUvicornConfig): + args = ProxyInitializationHelpers._get_default_unvicorn_init_args( + "localhost", 8000, timeout_worker_healthcheck=15 + ) + assert args["timeout_worker_healthcheck"] == 15 + @patch("asyncio.run") @patch("builtins.print") def test_init_hypercorn_server(self, mock_print, mock_asyncio_run): @@ -401,6 +411,7 @@ class TestProxyInitializationHelpers: port=4000, log_config=None, keepalive_timeout=30, + timeout_worker_healthcheck=None, ) mock_uvicorn_run.assert_called_once() @@ -408,6 +419,60 @@ class TestProxyInitializationHelpers: call_args = mock_uvicorn_run.call_args assert call_args[1]["timeout_keep_alive"] == 30 + @patch("uvicorn.run") + @patch("builtins.print") + def test_timeout_worker_healthcheck_flag(self, mock_print, mock_uvicorn_run): + """Test that the --timeout_worker_healthcheck flag is threaded through to the uvicorn init helper.""" + from click.testing import CliRunner + + from litellm.proxy.proxy_cli import run_server + + runner = CliRunner() + + mock_app = MagicMock() + mock_proxy_config = MagicMock() + mock_key_mgmt = MagicMock() + mock_save_worker_config = MagicMock() + + with ( + patch.dict( + "sys.modules", + { + "proxy_server": MagicMock( + app=mock_app, + ProxyConfig=mock_proxy_config, + KeyManagementSettings=mock_key_mgmt, + save_worker_config=mock_save_worker_config, + ) + }, + ), + patch( + "litellm.proxy.proxy_cli.ProxyInitializationHelpers._get_default_unvicorn_init_args" + ) as mock_get_args, + patch( + "litellm.proxy.proxy_cli.ProxyInitializationHelpers._is_port_in_use", + return_value=False, + ), + ): + mock_get_args.return_value = { + "app": "litellm.proxy.proxy_server:app", + "host": "localhost", + "port": 8000, + } + + result = runner.invoke( + run_server, ["--local", "--timeout_worker_healthcheck", "15"] + ) + + assert result.exit_code == 0 + mock_get_args.assert_called_once_with( + host="0.0.0.0", + port=4000, + log_config=None, + keepalive_timeout=None, + timeout_worker_healthcheck=15, + ) + @patch("uvicorn.run") @patch("builtins.print") @patch("litellm.proxy.db.prisma_client.PrismaManager.setup_database")