diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py index cff3fdef45..cce6d33e79 100644 --- a/tests/local_testing/test_completion.py +++ b/tests/local_testing/test_completion.py @@ -1047,22 +1047,50 @@ def test_completion_openai_params(model): def test_completion_fireworks_ai(): - try: - litellm.set_verbose = True - messages = [ - {"role": "system", "content": "You're a good bot"}, + """ + Mocked so it does not depend on Fireworks' rotating serverless catalog + (no externally-verifiable model list exists). Asserts the request is + built correctly and the OpenAI-compatible response is parsed back. + """ + litellm.set_verbose = True + messages = [ + {"role": "system", "content": "You're a good bot"}, + {"role": "user", "content": "Hey"}, + ] + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = { + "id": "chatcmpl-test", + "object": "chat.completion", + "created": 1234567890, + "model": "accounts/fireworks/models/deepseek-v3p1", + "choices": [ { - "role": "user", - "content": "Hey", - }, - ] + "index": 0, + "message": {"role": "assistant", "content": "Hello there!"}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 10, "completion_tokens": 2, "total_tokens": 12}, + } + mock_response.text = json.dumps(mock_response.json.return_value) + + client = HTTPHandler() + with patch.object(client, "post", return_value=mock_response) as mock_post: response = completion( model="fireworks_ai/accounts/fireworks/models/deepseek-v3p1", messages=messages, + client=client, ) - print(response) - except Exception as e: - pytest.fail(f"Error occurred: {e}") + + mock_post.assert_called_once() + request_body = json.loads(mock_post.call_args.kwargs["data"]) + assert "deepseek-v3p1" in request_body["model"] + assert request_body["messages"] == messages + assert response.choices[0].message.content == "Hello there!" + assert response.usage.total_tokens == 12 @pytest.mark.parametrize( diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py index a9bbcffd8e..cf0c645615 100644 --- a/tests/local_testing/test_completion_cost.py +++ b/tests/local_testing/test_completion_cost.py @@ -1186,14 +1186,43 @@ def test_get_model_params_fireworks_ai(model, base_model): ], ) def test_completion_cost_fireworks_ai(model): + """ + Mocked so it does not depend on Fireworks' rotating serverless catalog. + Validates the Fireworks cost path: a parsed response with usage yields a + non-zero cost against the local cost map. + """ os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" litellm.model_cost = litellm.get_model_cost_map(url="") - messages = [{"role": "user", "content": "Hey, how's it going?"}] - resp = litellm.completion(model=model, messages=messages) # works fine + mock_response_data = { + "id": "chatcmpl-test", + "object": "chat.completion", + "created": 1234567890, + "model": model.split("fireworks_ai/")[-1], + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "Going great, thanks!"}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 8, "completion_tokens": 5, "total_tokens": 13}, + } + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = mock_response_data + mock_response.text = json.dumps(mock_response_data) + + sync_handler = HTTPHandler() + messages = [{"role": "user", "content": "Hey, how's it going?"}] + + with patch.object(HTTPHandler, "post", return_value=mock_response): + resp = litellm.completion(model=model, messages=messages, client=sync_handler) - print(resp) cost = completion_cost(completion_response=resp) + assert cost > 0 def test_cost_azure_openai_prompt_caching():