284 lines
9.7 KiB
Python
284 lines
9.7 KiB
Python
import asyncio
|
|
import time
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
import litellm
|
|
from litellm.router import Router
|
|
|
|
|
|
class _NonCopyableSpan:
|
|
"""Mimics an OTel Span which raises on deepcopy, forcing safe_deep_copy
|
|
to fall back to the original reference."""
|
|
|
|
def __deepcopy__(self, memo):
|
|
raise TypeError("OTel spans cannot be deepcopied")
|
|
|
|
|
|
class _FakeUserAPIKeyAuth:
|
|
"""Mimics UserAPIKeyAuth which contains a parent_otel_span that is not
|
|
deepcopy-able. This is what actually causes safe_deep_copy to fail for
|
|
the metadata dict in production — safe_deep_copy handles the top-level
|
|
litellm_parent_otel_span specially (pops it before copying), but does
|
|
NOT handle user_api_key_auth.parent_otel_span inside it."""
|
|
|
|
def __init__(self, key_alias, parent_otel_span):
|
|
self.key_alias = key_alias
|
|
self.parent_otel_span = parent_otel_span
|
|
|
|
def __deepcopy__(self, memo):
|
|
raise TypeError("Contains OTel span that cannot be deepcopied")
|
|
|
|
|
|
def test_get_silent_experiment_kwargs():
|
|
"""
|
|
Test _get_silent_experiment_kwargs returns isolated kwargs with silent experiment metadata.
|
|
|
|
Uses a non-copyable user_api_key_auth (mimicking the real proxy scenario)
|
|
so that safe_deep_copy falls back to the original metadata reference —
|
|
exercising the identity-check fix path.
|
|
"""
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo",
|
|
"litellm_params": {"model": "gpt-3.5-turbo", "api_key": "fake-key"},
|
|
},
|
|
]
|
|
router = Router(model_list=model_list)
|
|
mock_span = _NonCopyableSpan()
|
|
mock_auth = _FakeUserAPIKeyAuth(
|
|
key_alias="HaneefKeyNonTeamProd",
|
|
parent_otel_span=mock_span,
|
|
)
|
|
kwargs = {
|
|
"metadata": {
|
|
"foo": "bar",
|
|
"litellm_parent_otel_span": mock_span,
|
|
"user_api_key_auth": mock_auth,
|
|
},
|
|
"litellm_call_id": "call-123",
|
|
"stream": True,
|
|
"proxy_server_request": {"body": {"model": "test"}},
|
|
}
|
|
result = router._get_silent_experiment_kwargs(**kwargs)
|
|
assert result["metadata"]["is_silent_experiment"] is True
|
|
assert result["metadata"]["foo"] == "bar"
|
|
assert "litellm_call_id" not in result
|
|
# stream must be forced to False so callbacks fire in background
|
|
assert result["stream"] is False
|
|
# proxy_server_request must be preserved for spend log metadata
|
|
assert "proxy_server_request" in result
|
|
# CRITICAL: metadata must be a DIFFERENT dict object than the original,
|
|
# so that setting model_group / is_silent_experiment on the silent dict
|
|
# doesn't corrupt the primary call's metadata.
|
|
assert result["metadata"] is not kwargs["metadata"]
|
|
# OTel span must be stripped from the silent copy — it's not safe to use
|
|
# across event loops (silent experiment runs in a new event loop).
|
|
assert "litellm_parent_otel_span" not in result["metadata"]
|
|
# Original metadata must NOT be mutated — must carry the real span,
|
|
# not safe_deep_copy's temporary "placeholder" string.
|
|
assert "is_silent_experiment" not in kwargs["metadata"]
|
|
assert kwargs["metadata"]["litellm_parent_otel_span"] is mock_span
|
|
assert kwargs["metadata"]["user_api_key_auth"] is mock_auth
|
|
# Shallow copy must preserve user_api_key_auth so the silent experiment
|
|
# can attribute billing / spend logs to the correct key/team.
|
|
assert result["metadata"]["user_api_key_auth"] is mock_auth
|
|
|
|
|
|
def test_silent_experiment_completion_direct():
|
|
"""
|
|
Test _silent_experiment_completion directly (for router code coverage).
|
|
Mocks router.completion to avoid real API call.
|
|
"""
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo",
|
|
"litellm_params": {"model": "gpt-3.5-turbo", "api_key": "fake-key"},
|
|
},
|
|
]
|
|
router = Router(model_list=model_list)
|
|
messages = [{"role": "user", "content": "hi"}]
|
|
with patch.object(router, "acompletion", new_callable=AsyncMock, return_value=None):
|
|
router._silent_experiment_completion(
|
|
silent_model="gpt-3.5-turbo",
|
|
messages=messages,
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_silent_experiment_acompletion_direct():
|
|
"""
|
|
Test _silent_experiment_acompletion directly (for router code coverage).
|
|
Mocks router.acompletion to avoid real API call.
|
|
"""
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo",
|
|
"litellm_params": {"model": "gpt-3.5-turbo", "api_key": "fake-key"},
|
|
},
|
|
]
|
|
router = Router(model_list=model_list)
|
|
messages = [{"role": "user", "content": "hi"}]
|
|
with patch.object(router, "acompletion", new_callable=AsyncMock, return_value=None):
|
|
await router._silent_experiment_acompletion(
|
|
silent_model="gpt-3.5-turbo",
|
|
messages=messages,
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_router_silent_experiment_acompletion():
|
|
"""
|
|
Test that silent_model triggers a background acompletion call
|
|
and that the silent_model parameter is stripped from both calls.
|
|
"""
|
|
model_list = [
|
|
{
|
|
"model_name": "primary-model",
|
|
"litellm_params": {
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"api_key": "fake-key",
|
|
"silent_model": "silent-model",
|
|
},
|
|
},
|
|
{
|
|
"model_name": "silent-model",
|
|
"litellm_params": {
|
|
"model": "openai/gpt-4",
|
|
"api_key": "fake-key",
|
|
},
|
|
},
|
|
]
|
|
|
|
router = Router(model_list=model_list)
|
|
|
|
# Use AsyncMock for async function mocking
|
|
mock_response = litellm.ModelResponse(choices=[{"message": {"content": "hello"}}])
|
|
mock_acompletion = AsyncMock(return_value=mock_response)
|
|
|
|
# Patch at the litellm.router module level where it's imported and used
|
|
with patch.object(litellm, "acompletion", mock_acompletion):
|
|
response = await router.acompletion(
|
|
model="primary-model",
|
|
messages=[{"role": "user", "content": "hi"}],
|
|
)
|
|
|
|
assert response.choices[0].message.content == "hello"
|
|
|
|
# Give the background task a moment to trigger (it's an asyncio task)
|
|
await asyncio.sleep(0.1)
|
|
|
|
# Should have 2 calls: one for primary, one for silent
|
|
assert mock_acompletion.call_count == 2
|
|
|
|
# Check call arguments
|
|
call_args_list = mock_acompletion.call_args_list
|
|
|
|
# Verify no silent_model in any call to litellm.acompletion
|
|
for call in call_args_list:
|
|
args, kwargs = call
|
|
assert "silent_model" not in kwargs
|
|
if "metadata" in kwargs:
|
|
# One call should have is_silent_experiment=True
|
|
pass
|
|
|
|
# Find the silent call
|
|
silent_call = next(
|
|
(
|
|
c
|
|
for c in call_args_list
|
|
if c[1].get("metadata", {}).get("is_silent_experiment") is True
|
|
),
|
|
None,
|
|
)
|
|
assert silent_call is not None
|
|
assert silent_call[1]["model"] == "openai/gpt-4"
|
|
|
|
# Find the primary call
|
|
primary_call = next(
|
|
(
|
|
c
|
|
for c in call_args_list
|
|
if not c[1].get("metadata", {}).get("is_silent_experiment")
|
|
),
|
|
None,
|
|
)
|
|
assert primary_call is not None
|
|
assert primary_call[1]["model"] == "openai/gpt-3.5-turbo"
|
|
|
|
|
|
def test_router_silent_experiment_completion():
|
|
"""
|
|
Test that silent_model triggers a background completion call (sync)
|
|
and that the silent_model parameter is stripped.
|
|
"""
|
|
model_list = [
|
|
{
|
|
"model_name": "primary-model",
|
|
"litellm_params": {
|
|
"model": "openai/gpt-3.5-turbo",
|
|
"api_key": "fake-key",
|
|
"silent_model": "silent-model",
|
|
},
|
|
},
|
|
{
|
|
"model_name": "silent-model",
|
|
"litellm_params": {
|
|
"model": "openai/gpt-4",
|
|
"api_key": "fake-key",
|
|
},
|
|
},
|
|
]
|
|
|
|
router = Router(model_list=model_list)
|
|
|
|
# Mock litellm.acompletion
|
|
mock_response = litellm.ModelResponse(choices=[{"message": {"content": "hello"}}])
|
|
|
|
# We need an async mock for acompletion
|
|
async def mock_acompletion(*args, **kwargs):
|
|
return mock_response
|
|
|
|
mock_acompletion_mock = AsyncMock(side_effect=mock_acompletion)
|
|
mock_completion_mock = MagicMock(return_value=mock_response)
|
|
|
|
# Patch at the litellm module level
|
|
with (
|
|
patch.object(litellm, "acompletion", mock_acompletion_mock),
|
|
patch.object(litellm, "completion", mock_completion_mock),
|
|
):
|
|
response = router.completion(
|
|
model="primary-model",
|
|
messages=[{"role": "user", "content": "hi"}],
|
|
)
|
|
|
|
assert response.choices[0].message.content == "hello"
|
|
|
|
# The sync background call uses a thread pool. We might need to wait.
|
|
time.sleep(2.0)
|
|
|
|
# Should have 1 acompletion call (the silent background call)
|
|
assert mock_acompletion_mock.call_count == 1
|
|
|
|
call_args_list = mock_acompletion_mock.call_args_list
|
|
|
|
# Verify no silent_model in any call
|
|
for call in call_args_list:
|
|
args, kwargs = call
|
|
assert "silent_model" not in kwargs
|
|
|
|
# Find the silent call
|
|
silent_call = next(
|
|
(
|
|
c
|
|
for c in call_args_list
|
|
if c[1].get("metadata", {}).get("is_silent_experiment") is True
|
|
),
|
|
None,
|
|
)
|
|
assert silent_call is not None
|
|
assert silent_call[1]["model"] == "openai/gpt-4"
|
|
# Verify model_group is set to the silent model name for correct metric attribution
|
|
assert silent_call[1]["metadata"]["model_group"] == "silent-model"
|