test(fireworks): mock remaining live smoke tests
test_completion_fireworks_ai and test_completion_cost_fireworks_ai made real Fireworks calls and broke whenever Fireworks rotated its serverless catalog (no externally-verifiable model list exists). They also asserted nothing — just printed. Mock the HTTP post and assert real behavior instead: the request is built with the right model/messages and the OpenAI-compatible response parses back; the cost path yields a non-zero cost against the local cost map. No network, no model dependency, stronger than the old smoke checks.
This commit is contained in:
parent
9770efe9e1
commit
b5db7ed37d
@ -1047,22 +1047,50 @@ def test_completion_openai_params(model):
|
||||
|
||||
|
||||
def test_completion_fireworks_ai():
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
messages = [
|
||||
{"role": "system", "content": "You're a good bot"},
|
||||
"""
|
||||
Mocked so it does not depend on Fireworks' rotating serverless catalog
|
||||
(no externally-verifiable model list exists). Asserts the request is
|
||||
built correctly and the OpenAI-compatible response is parsed back.
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
messages = [
|
||||
{"role": "system", "content": "You're a good bot"},
|
||||
{"role": "user", "content": "Hey"},
|
||||
]
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.headers = {"content-type": "application/json"}
|
||||
mock_response.json.return_value = {
|
||||
"id": "chatcmpl-test",
|
||||
"object": "chat.completion",
|
||||
"created": 1234567890,
|
||||
"model": "accounts/fireworks/models/deepseek-v3p1",
|
||||
"choices": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hey",
|
||||
},
|
||||
]
|
||||
"index": 0,
|
||||
"message": {"role": "assistant", "content": "Hello there!"},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"usage": {"prompt_tokens": 10, "completion_tokens": 2, "total_tokens": 12},
|
||||
}
|
||||
mock_response.text = json.dumps(mock_response.json.return_value)
|
||||
|
||||
client = HTTPHandler()
|
||||
with patch.object(client, "post", return_value=mock_response) as mock_post:
|
||||
response = completion(
|
||||
model="fireworks_ai/accounts/fireworks/models/deepseek-v3p1",
|
||||
messages=messages,
|
||||
client=client,
|
||||
)
|
||||
print(response)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
mock_post.assert_called_once()
|
||||
request_body = json.loads(mock_post.call_args.kwargs["data"])
|
||||
assert "deepseek-v3p1" in request_body["model"]
|
||||
assert request_body["messages"] == messages
|
||||
assert response.choices[0].message.content == "Hello there!"
|
||||
assert response.usage.total_tokens == 12
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
||||
@ -1186,14 +1186,43 @@ def test_get_model_params_fireworks_ai(model, base_model):
|
||||
],
|
||||
)
|
||||
def test_completion_cost_fireworks_ai(model):
|
||||
"""
|
||||
Mocked so it does not depend on Fireworks' rotating serverless catalog.
|
||||
Validates the Fireworks cost path: a parsed response with usage yields a
|
||||
non-zero cost against the local cost map.
|
||||
"""
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
resp = litellm.completion(model=model, messages=messages) # works fine
|
||||
mock_response_data = {
|
||||
"id": "chatcmpl-test",
|
||||
"object": "chat.completion",
|
||||
"created": 1234567890,
|
||||
"model": model.split("fireworks_ai/")[-1],
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {"role": "assistant", "content": "Going great, thanks!"},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"usage": {"prompt_tokens": 8, "completion_tokens": 5, "total_tokens": 13},
|
||||
}
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.headers = {"content-type": "application/json"}
|
||||
mock_response.json.return_value = mock_response_data
|
||||
mock_response.text = json.dumps(mock_response_data)
|
||||
|
||||
sync_handler = HTTPHandler()
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
|
||||
with patch.object(HTTPHandler, "post", return_value=mock_response):
|
||||
resp = litellm.completion(model=model, messages=messages, client=sync_handler)
|
||||
|
||||
print(resp)
|
||||
cost = completion_cost(completion_response=resp)
|
||||
assert cost > 0
|
||||
|
||||
|
||||
def test_cost_azure_openai_prompt_caching():
|
||||
|
||||
Loading…
Reference in New Issue
Block a user