PR was blocked by .github/workflows/guard-fork-dependencies.yml: fork PRs cannot modify uv.lock. Reverting: - uv.lock + pyproject.toml black bump (24.10.0 -> 26.3.1) and the 295 files of mechanical Black 26 reformat coupled to it - pyproject.toml diskcache extra change (kept the runtime mitigation in litellm/caching/disk_cache.py via JSONDisk) Kept: - Dockerfile cache narrowing (drops ~660 MB of uv build cache that surfaced cached setuptools as CVE findings) - litellm/caching/disk_cache.py: dc.JSONDisk to neutralize CVE-2025-69872 - ui/litellm-dashboard/package-lock.json + litellm-js/spend-logs/package-lock.json: next/postcss/hono/uuid CVE bumps (these are not blocked by the fork guard) - tests/test_litellm/caching/test_disk_cache.py - tests/code_coverage_tests/liccheck.ini: harmless black authorization Black + gitpython + langchain dep upgrades will need a follow-up from a maintainer pushing a branch in the canonical BerriAI/litellm repo. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
133 lines
4.2 KiB
Python
133 lines
4.2 KiB
Python
"""
|
|
Test that batch cost calculation uses custom deployment-level pricing
|
|
when model_info is provided.
|
|
|
|
Reproduces the bug where `input_cost_per_token_batches` /
|
|
`output_cost_per_token_batches` set on a proxy deployment's model_info
|
|
are ignored by the batch cost pipeline because they are never threaded
|
|
through to `batch_cost_calculator`.
|
|
"""
|
|
|
|
import pytest
|
|
|
|
from litellm.batches.batch_utils import (
|
|
_batch_cost_calculator,
|
|
_get_batch_job_cost_from_file_content,
|
|
calculate_batch_cost_and_usage,
|
|
)
|
|
from litellm.cost_calculator import batch_cost_calculator
|
|
from litellm.types.utils import Usage
|
|
|
|
|
|
# --- helpers ---
|
|
|
|
|
|
def _make_batch_output_line(prompt_tokens: int = 10, completion_tokens: int = 5):
|
|
"""Return a single successful batch output line (OpenAI JSONL format)."""
|
|
return {
|
|
"id": "batch_req_1",
|
|
"custom_id": "req-1",
|
|
"response": {
|
|
"status_code": 200,
|
|
"body": {
|
|
"id": "chatcmpl-test",
|
|
"object": "chat.completion",
|
|
"model": "fake-batch-model",
|
|
"usage": {
|
|
"prompt_tokens": prompt_tokens,
|
|
"completion_tokens": completion_tokens,
|
|
"total_tokens": prompt_tokens + completion_tokens,
|
|
},
|
|
"choices": [
|
|
{
|
|
"index": 0,
|
|
"message": {"role": "assistant", "content": "Hello"},
|
|
"finish_reason": "stop",
|
|
}
|
|
],
|
|
},
|
|
},
|
|
"error": None,
|
|
}
|
|
|
|
|
|
CUSTOM_MODEL_INFO = {
|
|
"input_cost_per_token_batches": 0.00125,
|
|
"output_cost_per_token_batches": 0.005,
|
|
}
|
|
|
|
|
|
# --- tests ---
|
|
|
|
|
|
def test_batch_cost_calculator_uses_custom_model_info():
|
|
"""batch_cost_calculator should use model_info override when provided."""
|
|
usage = Usage(prompt_tokens=10, completion_tokens=5, total_tokens=15)
|
|
|
|
prompt_cost, completion_cost = batch_cost_calculator(
|
|
usage=usage,
|
|
model="fake-batch-model",
|
|
custom_llm_provider="openai",
|
|
model_info=CUSTOM_MODEL_INFO,
|
|
)
|
|
|
|
expected_prompt = 10 * 0.00125
|
|
expected_completion = 5 * 0.005
|
|
assert prompt_cost == pytest.approx(
|
|
expected_prompt
|
|
), f"Expected prompt cost {expected_prompt}, got {prompt_cost}"
|
|
assert completion_cost == pytest.approx(
|
|
expected_completion
|
|
), f"Expected completion cost {expected_completion}, got {completion_cost}"
|
|
|
|
|
|
def test_get_batch_job_cost_from_file_content_uses_custom_model_info():
|
|
"""_get_batch_job_cost_from_file_content should thread model_info to completion_cost."""
|
|
file_content = [_make_batch_output_line(prompt_tokens=10, completion_tokens=5)]
|
|
|
|
cost = _get_batch_job_cost_from_file_content(
|
|
file_content_dictionary=file_content,
|
|
custom_llm_provider="openai",
|
|
model_info=CUSTOM_MODEL_INFO,
|
|
)
|
|
|
|
expected = (10 * 0.00125) + (5 * 0.005)
|
|
assert cost == pytest.approx(
|
|
expected
|
|
), f"Expected total cost {expected}, got {cost}"
|
|
|
|
|
|
def test_batch_cost_calculator_func_uses_custom_model_info():
|
|
"""_batch_cost_calculator should thread model_info."""
|
|
file_content = [_make_batch_output_line(prompt_tokens=10, completion_tokens=5)]
|
|
|
|
cost = _batch_cost_calculator(
|
|
file_content_dictionary=file_content,
|
|
custom_llm_provider="openai",
|
|
model_info=CUSTOM_MODEL_INFO,
|
|
)
|
|
|
|
expected = (10 * 0.00125) + (5 * 0.005)
|
|
assert cost == pytest.approx(
|
|
expected
|
|
), f"Expected total cost {expected}, got {cost}"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_calculate_batch_cost_and_usage_uses_custom_model_info():
|
|
"""calculate_batch_cost_and_usage should thread model_info."""
|
|
file_content = [_make_batch_output_line(prompt_tokens=10, completion_tokens=5)]
|
|
|
|
batch_cost, batch_usage, batch_models = await calculate_batch_cost_and_usage(
|
|
file_content_dictionary=file_content,
|
|
custom_llm_provider="openai",
|
|
model_info=CUSTOM_MODEL_INFO,
|
|
)
|
|
|
|
expected = (10 * 0.00125) + (5 * 0.005)
|
|
assert batch_cost == pytest.approx(
|
|
expected
|
|
), f"Expected total cost {expected}, got {batch_cost}"
|
|
assert batch_usage.prompt_tokens == 10
|
|
assert batch_usage.completion_tokens == 5
|