feat(proxy): add --timeout_worker_healthcheck flag for uvicorn worker triage
Adds a CLI flag (`--timeout_worker_healthcheck`, env `TIMEOUT_WORKER_HEALTHCHECK`) that forwards to uvicorn's `timeout_worker_healthcheck` Config kwarg (added in uvicorn 0.37.0). Lets operators raise the supervisor's worker-ping timeout above the default 5s when triaging workers being killed and respawned under load. The helper introspects `uvicorn.Config.__init__` and only sets the kwarg if supported, otherwise prints a warning - so the existing uvicorn>=0.32.1,<1.0.0 floor pin is unaffected. Gunicorn and Hypercorn paths are unchanged (the uvicorn supervisor isn't running there); the value is also not passed to the helper at all on those paths so the "uvicorn too old" warning never fires spuriously.
This commit is contained in:
parent
d120ddf678
commit
84527b0135
@ -130,10 +130,15 @@ class ProxyInitializationHelpers:
|
||||
port: int,
|
||||
log_config: Optional[str] = None,
|
||||
keepalive_timeout: Optional[int] = None,
|
||||
timeout_worker_healthcheck: Optional[int] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Get the arguments for `uvicorn` worker
|
||||
"""
|
||||
import inspect
|
||||
|
||||
import uvicorn
|
||||
|
||||
import litellm
|
||||
from litellm._logging import _get_uvicorn_json_log_config
|
||||
|
||||
@ -150,6 +155,18 @@ class ProxyInitializationHelpers:
|
||||
uvicorn_args["log_config"] = _get_uvicorn_json_log_config()
|
||||
if keepalive_timeout is not None:
|
||||
uvicorn_args["timeout_keep_alive"] = keepalive_timeout
|
||||
if timeout_worker_healthcheck is not None:
|
||||
if (
|
||||
"timeout_worker_healthcheck"
|
||||
in inspect.signature(uvicorn.Config.__init__).parameters
|
||||
):
|
||||
uvicorn_args["timeout_worker_healthcheck"] = timeout_worker_healthcheck
|
||||
else:
|
||||
print( # noqa
|
||||
f"\033[1;33mLiteLLM Proxy: --timeout_worker_healthcheck "
|
||||
f"requires uvicorn>=0.37.0, but installed uvicorn=={uvicorn.__version__}. "
|
||||
f"Ignoring the flag.\033[0m"
|
||||
)
|
||||
return uvicorn_args
|
||||
|
||||
@staticmethod
|
||||
@ -563,6 +580,17 @@ class ProxyInitializationHelpers:
|
||||
help="Set the uvicorn keepalive timeout in seconds (uvicorn timeout_keep_alive parameter)",
|
||||
envvar="KEEPALIVE_TIMEOUT",
|
||||
)
|
||||
@click.option(
|
||||
"--timeout_worker_healthcheck",
|
||||
default=None,
|
||||
type=int,
|
||||
help=(
|
||||
"Set the uvicorn worker health-check timeout in seconds (uvicorn timeout_worker_healthcheck parameter). "
|
||||
"Requires uvicorn>=0.37.0. Only applies when running uvicorn directly with --num_workers>1; "
|
||||
"ignored under --run_gunicorn / --run_hypercorn."
|
||||
),
|
||||
envvar="TIMEOUT_WORKER_HEALTHCHECK",
|
||||
)
|
||||
@click.option(
|
||||
"--max_requests_before_restart",
|
||||
default=None,
|
||||
@ -632,6 +660,7 @@ def run_server( # noqa: PLR0915
|
||||
use_prisma_db_push: bool,
|
||||
skip_server_startup,
|
||||
keepalive_timeout,
|
||||
timeout_worker_healthcheck,
|
||||
max_requests_before_restart,
|
||||
enforce_prisma_migration_check: bool,
|
||||
use_v2_migration_resolver: bool,
|
||||
@ -973,11 +1002,15 @@ def run_server( # noqa: PLR0915
|
||||
)
|
||||
return
|
||||
|
||||
running_uvicorn = run_gunicorn is False and run_hypercorn is False
|
||||
uvicorn_args = ProxyInitializationHelpers._get_default_unvicorn_init_args(
|
||||
host=host,
|
||||
port=port,
|
||||
log_config=log_config,
|
||||
keepalive_timeout=keepalive_timeout,
|
||||
timeout_worker_healthcheck=(
|
||||
timeout_worker_healthcheck if running_uvicorn else None
|
||||
),
|
||||
)
|
||||
# Optional: recycle uvicorn workers after N requests
|
||||
if max_requests_before_restart is not None:
|
||||
|
||||
@ -123,6 +123,16 @@ class TestProxyInitializationHelpers:
|
||||
assert args["log_config"] == "log_config.json"
|
||||
assert args["timeout_keep_alive"] == 120
|
||||
|
||||
class _FakeUvicornConfig:
|
||||
def __init__(self, timeout_worker_healthcheck=None):
|
||||
pass
|
||||
|
||||
with patch("uvicorn.Config", _FakeUvicornConfig):
|
||||
args = ProxyInitializationHelpers._get_default_unvicorn_init_args(
|
||||
"localhost", 8000, timeout_worker_healthcheck=15
|
||||
)
|
||||
assert args["timeout_worker_healthcheck"] == 15
|
||||
|
||||
@patch("asyncio.run")
|
||||
@patch("builtins.print")
|
||||
def test_init_hypercorn_server(self, mock_print, mock_asyncio_run):
|
||||
@ -401,6 +411,7 @@ class TestProxyInitializationHelpers:
|
||||
port=4000,
|
||||
log_config=None,
|
||||
keepalive_timeout=30,
|
||||
timeout_worker_healthcheck=None,
|
||||
)
|
||||
mock_uvicorn_run.assert_called_once()
|
||||
|
||||
@ -408,6 +419,60 @@ class TestProxyInitializationHelpers:
|
||||
call_args = mock_uvicorn_run.call_args
|
||||
assert call_args[1]["timeout_keep_alive"] == 30
|
||||
|
||||
@patch("uvicorn.run")
|
||||
@patch("builtins.print")
|
||||
def test_timeout_worker_healthcheck_flag(self, mock_print, mock_uvicorn_run):
|
||||
"""Test that the --timeout_worker_healthcheck flag is threaded through to the uvicorn init helper."""
|
||||
from click.testing import CliRunner
|
||||
|
||||
from litellm.proxy.proxy_cli import run_server
|
||||
|
||||
runner = CliRunner()
|
||||
|
||||
mock_app = MagicMock()
|
||||
mock_proxy_config = MagicMock()
|
||||
mock_key_mgmt = MagicMock()
|
||||
mock_save_worker_config = MagicMock()
|
||||
|
||||
with (
|
||||
patch.dict(
|
||||
"sys.modules",
|
||||
{
|
||||
"proxy_server": MagicMock(
|
||||
app=mock_app,
|
||||
ProxyConfig=mock_proxy_config,
|
||||
KeyManagementSettings=mock_key_mgmt,
|
||||
save_worker_config=mock_save_worker_config,
|
||||
)
|
||||
},
|
||||
),
|
||||
patch(
|
||||
"litellm.proxy.proxy_cli.ProxyInitializationHelpers._get_default_unvicorn_init_args"
|
||||
) as mock_get_args,
|
||||
patch(
|
||||
"litellm.proxy.proxy_cli.ProxyInitializationHelpers._is_port_in_use",
|
||||
return_value=False,
|
||||
),
|
||||
):
|
||||
mock_get_args.return_value = {
|
||||
"app": "litellm.proxy.proxy_server:app",
|
||||
"host": "localhost",
|
||||
"port": 8000,
|
||||
}
|
||||
|
||||
result = runner.invoke(
|
||||
run_server, ["--local", "--timeout_worker_healthcheck", "15"]
|
||||
)
|
||||
|
||||
assert result.exit_code == 0
|
||||
mock_get_args.assert_called_once_with(
|
||||
host="0.0.0.0",
|
||||
port=4000,
|
||||
log_config=None,
|
||||
keepalive_timeout=None,
|
||||
timeout_worker_healthcheck=15,
|
||||
)
|
||||
|
||||
@patch("uvicorn.run")
|
||||
@patch("builtins.print")
|
||||
@patch("litellm.proxy.db.prisma_client.PrismaManager.setup_database")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user