fix: resolve flaky test failures in health, spend logs, and CLI tests (#21769)
* fix: reset db_health_cache in source module to prevent stale cache hits The test was reassigning db_health_cache via `global` in the test module, which doesn't affect the _health_endpoints module's variable. When a prior test set the cache to "connected" within 2 minutes, _db_health_readiness_check returned early without calling health_check(), causing assert_called_once to fail. Also use PrismaError with a connection message so it's properly recognized as a connection error by PrismaDBExceptionHandler.is_database_connection_error. * fix: replace asyncio.sleep with polling loop in spend logs tests The GLOBAL_LOGGING_WORKER processes callbacks via an async queue, so asyncio.sleep(1) is a race condition - under CI load the worker may not have processed the queued task within 1 second. Replace with a polling helper that waits up to 10 seconds for the mock to be called. Also add metadata.attempted_retries and metadata.max_retries to ignored_keys since these are new fields. * fix: isolate test_skip_server_startup from CI environment Remove mix_stderr=False (unsupported in some Click versions). Strip DATABASE_URL/DIRECT_URL from environment during the test to prevent real prisma operations when these are set in CI.
This commit is contained in:
parent
061a3cdc3e
commit
3e67cb5287
@ -12,6 +12,7 @@ sys.path.insert(
|
||||
import pytest
|
||||
from prisma.errors import ClientNotConnectedError, HTTPClientClosedError, PrismaError
|
||||
|
||||
import litellm.proxy.health_endpoints._health_endpoints as _health_endpoints_module
|
||||
from litellm.proxy.health_endpoints._health_endpoints import (
|
||||
_db_health_readiness_check,
|
||||
db_health_cache,
|
||||
@ -31,7 +32,7 @@ from tests.test_litellm.proxy.conftest import create_proxy_test_client
|
||||
@pytest.mark.parametrize(
|
||||
"prisma_error",
|
||||
[
|
||||
PrismaError(),
|
||||
PrismaError("Can't reach database server"),
|
||||
ClientNotConnectedError(),
|
||||
HTTPClientClosedError(),
|
||||
],
|
||||
@ -46,9 +47,9 @@ async def test_db_health_readiness_check_with_prisma_error(prisma_error):
|
||||
mock_prisma_client = MagicMock()
|
||||
mock_prisma_client.health_check.side_effect = prisma_error
|
||||
|
||||
# Reset the health cache to a known state
|
||||
global db_health_cache
|
||||
db_health_cache = {
|
||||
# Reset the health cache in the source module so _db_health_readiness_check
|
||||
# sees the updated value (assigning to a test-module global doesn't work).
|
||||
_health_endpoints_module.db_health_cache = {
|
||||
"status": "unknown",
|
||||
"last_updated": datetime.now() - timedelta(minutes=5),
|
||||
}
|
||||
@ -73,7 +74,7 @@ async def test_db_health_readiness_check_with_prisma_error(prisma_error):
|
||||
@pytest.mark.parametrize(
|
||||
"prisma_error",
|
||||
[
|
||||
PrismaError(),
|
||||
PrismaError("Can't reach database server"),
|
||||
ClientNotConnectedError(),
|
||||
HTTPClientClosedError(),
|
||||
],
|
||||
@ -87,9 +88,8 @@ async def test_db_health_readiness_check_with_error_and_flag_off(prisma_error):
|
||||
mock_prisma_client = MagicMock()
|
||||
mock_prisma_client.health_check.side_effect = prisma_error
|
||||
|
||||
# Reset the health cache
|
||||
global db_health_cache
|
||||
db_health_cache = {
|
||||
# Reset the health cache in the source module
|
||||
_health_endpoints_module.db_health_cache = {
|
||||
"status": "unknown",
|
||||
"last_updated": datetime.now() - timedelta(minutes=5),
|
||||
}
|
||||
@ -491,7 +491,7 @@ def test_get_callback_identifier_string_and_object_with_callback_name():
|
||||
- Object with empty/None callback_name (should fall through to other checks)
|
||||
"""
|
||||
from litellm.proxy.health_endpoints._health_endpoints import get_callback_identifier
|
||||
|
||||
|
||||
# Test 1: String callback should be returned as-is
|
||||
assert get_callback_identifier("datadog") == "datadog"
|
||||
assert get_callback_identifier("langfuse") == "langfuse"
|
||||
@ -522,9 +522,9 @@ def test_get_callback_identifier_custom_logger_registry_and_fallback():
|
||||
- Object with callback_name that matches registry entry
|
||||
- Fallback to callback_name() helper function
|
||||
"""
|
||||
from litellm.proxy.health_endpoints._health_endpoints import get_callback_identifier
|
||||
from litellm.litellm_core_utils.custom_logger_registry import CustomLoggerRegistry
|
||||
|
||||
from litellm.proxy.health_endpoints._health_endpoints import get_callback_identifier
|
||||
|
||||
# Test 1: Object registered in CustomLoggerRegistry (without callback_name attribute)
|
||||
# Mock a class that's registered in the registry
|
||||
class MockRegisteredLogger:
|
||||
|
||||
@ -299,6 +299,8 @@ ignored_keys = [
|
||||
"metadata.status",
|
||||
"metadata.proxy_server_request",
|
||||
"metadata.error_information",
|
||||
"metadata.attempted_retries",
|
||||
"metadata.max_retries",
|
||||
]
|
||||
|
||||
MODEL_LIST = [
|
||||
@ -1196,6 +1198,18 @@ async def test_ui_view_spend_logs_with_key_hash(client, monkeypatch):
|
||||
assert data["data"][0]["api_key"] == "sk-test-key-1"
|
||||
|
||||
|
||||
async def _wait_for_mock_call(mock, timeout=10, interval=0.1):
|
||||
"""Poll until mock has been called at least once, or timeout."""
|
||||
import time
|
||||
|
||||
deadline = time.monotonic() + timeout
|
||||
while time.monotonic() < deadline:
|
||||
if mock.call_count > 0:
|
||||
return
|
||||
await asyncio.sleep(interval)
|
||||
mock.assert_called_once() # will raise with a clear message
|
||||
|
||||
|
||||
class TestSpendLogsPayload:
|
||||
@pytest.mark.asyncio
|
||||
async def test_spend_logs_payload_e2e(self):
|
||||
@ -1215,9 +1229,7 @@ class TestSpendLogsPayload:
|
||||
|
||||
assert response.choices[0].message.content == "Hello, world!"
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
mock_client.assert_called_once()
|
||||
await _wait_for_mock_call(mock_client)
|
||||
|
||||
kwargs = mock_client.call_args.kwargs
|
||||
payload: SpendLogsPayload = kwargs["payload"]
|
||||
@ -1310,9 +1322,7 @@ class TestSpendLogsPayload:
|
||||
|
||||
assert response.choices[0].message.content == "Hi! My name is Claude."
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
mock_client.assert_called_once()
|
||||
await _wait_for_mock_call(mock_client)
|
||||
|
||||
kwargs = mock_client.call_args.kwargs
|
||||
payload: SpendLogsPayload = kwargs["payload"]
|
||||
@ -1402,9 +1412,7 @@ class TestSpendLogsPayload:
|
||||
|
||||
assert response.choices[0].message.content == "Hi! My name is Claude."
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
mock_client.assert_called_once()
|
||||
await _wait_for_mock_call(mock_client)
|
||||
|
||||
kwargs = mock_client.call_args.kwargs
|
||||
payload: SpendLogsPayload = kwargs["payload"]
|
||||
|
||||
@ -233,7 +233,7 @@ class TestProxyInitializationHelpers:
|
||||
|
||||
from litellm.proxy.proxy_cli import run_server
|
||||
|
||||
runner = CliRunner(mix_stderr=False)
|
||||
runner = CliRunner()
|
||||
|
||||
mock_proxy_module = MagicMock(
|
||||
app=MagicMock(),
|
||||
@ -241,7 +241,12 @@ class TestProxyInitializationHelpers:
|
||||
KeyManagementSettings=MagicMock(),
|
||||
save_worker_config=MagicMock(),
|
||||
)
|
||||
# Remove DATABASE_URL/DIRECT_URL so the CLI doesn't attempt
|
||||
# real prisma operations when these are set in CI.
|
||||
clean_env = {k: v for k, v in os.environ.items() if k not in ("DATABASE_URL", "DIRECT_URL")}
|
||||
with patch.dict(
|
||||
os.environ, clean_env, clear=True,
|
||||
), patch.dict(
|
||||
"sys.modules",
|
||||
{
|
||||
"proxy_server": mock_proxy_module,
|
||||
@ -262,7 +267,7 @@ class TestProxyInitializationHelpers:
|
||||
# --- skip startup ---
|
||||
result = runner.invoke(run_server, ["--local", "--skip_server_startup"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert result.exit_code == 0, f"exit_code={result.exit_code}, output={result.output}"
|
||||
assert "Skipping server startup" in result.output
|
||||
mock_uvicorn_run.assert_not_called()
|
||||
|
||||
@ -271,7 +276,7 @@ class TestProxyInitializationHelpers:
|
||||
|
||||
result = runner.invoke(run_server, ["--local"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert result.exit_code == 0, f"exit_code={result.exit_code}, output={result.output}"
|
||||
mock_uvicorn_run.assert_called_once()
|
||||
|
||||
@patch("uvicorn.run")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user