Merge pull request #27409 from BerriAI/litellm_/inspiring-allen-ec64a4
[Fix] Tests: Reduce VCR cassette bloat and fix multipart caching
This commit is contained in:
commit
b9b315157b
@ -7,7 +7,9 @@ from __future__ import annotations
|
||||
|
||||
import atexit
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from typing import Iterable
|
||||
|
||||
@ -74,6 +76,17 @@ FILTERED_RESPONSE_HEADERS = (
|
||||
"date",
|
||||
)
|
||||
|
||||
# Tiny placeholder used to replace base64 image payloads in cassettes.
|
||||
# Decodes to b"test" — short, valid base64 so test code that decodes
|
||||
# the field still succeeds.
|
||||
VCR_IMAGE_B64_PLACEHOLDER = "dGVzdA=="
|
||||
|
||||
# Fixed boundary substituted into multipart request bodies so the
|
||||
# ``safe_body`` matcher sees the same bytes across record and replay.
|
||||
# httpx generates a fresh random boundary per request via os.urandom,
|
||||
# which otherwise turns every multipart cassette into a permanent miss.
|
||||
VCR_FIXED_MULTIPART_BOUNDARY = "vcr-static-boundary"
|
||||
|
||||
|
||||
def _scrub_response(response):
|
||||
if not isinstance(response, dict):
|
||||
@ -86,8 +99,88 @@ def _scrub_response(response):
|
||||
return response
|
||||
|
||||
|
||||
def _replace_b64_json_in_place(obj) -> bool:
|
||||
"""Recursively replace ``b64_json`` string values in a JSON tree.
|
||||
|
||||
Returns ``True`` if any value was rewritten. The check on the
|
||||
existing value's length keeps the function idempotent — once a
|
||||
value has been swapped to the placeholder, subsequent invocations
|
||||
are no-ops.
|
||||
"""
|
||||
changed = False
|
||||
if isinstance(obj, dict):
|
||||
for key, value in obj.items():
|
||||
if (
|
||||
key == "b64_json"
|
||||
and isinstance(value, str)
|
||||
and len(value) > len(VCR_IMAGE_B64_PLACEHOLDER)
|
||||
):
|
||||
obj[key] = VCR_IMAGE_B64_PLACEHOLDER
|
||||
changed = True
|
||||
elif _replace_b64_json_in_place(value):
|
||||
changed = True
|
||||
elif isinstance(obj, list):
|
||||
for item in obj:
|
||||
if _replace_b64_json_in_place(item):
|
||||
changed = True
|
||||
return changed
|
||||
|
||||
|
||||
def _strip_image_b64_payloads(response):
|
||||
"""Replace ``b64_json`` payloads in image-gen responses before save.
|
||||
|
||||
Image-edit and image-generation responses carry the full base64
|
||||
PNG/JPEG (1-10+ MB) in ``data[*].b64_json``. The image_gen tests
|
||||
only assert response shape — the field decodes, schema validates —
|
||||
they never inspect pixel content. Swapping to a 4-byte placeholder
|
||||
preserves all those checks while shrinking cassettes by ~99%.
|
||||
"""
|
||||
if not isinstance(response, dict):
|
||||
return response
|
||||
body = response.get("body")
|
||||
if not isinstance(body, dict):
|
||||
return response
|
||||
raw = body.get("string")
|
||||
if raw is None:
|
||||
return response
|
||||
|
||||
if isinstance(raw, (bytes, bytearray)):
|
||||
try:
|
||||
text = bytes(raw).decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
return response
|
||||
was_bytes = True
|
||||
elif isinstance(raw, str):
|
||||
text = raw
|
||||
was_bytes = False
|
||||
else:
|
||||
return response
|
||||
|
||||
try:
|
||||
payload = json.loads(text)
|
||||
except (ValueError, TypeError):
|
||||
return response
|
||||
|
||||
if not _replace_b64_json_in_place(payload):
|
||||
return response
|
||||
|
||||
new_text = json.dumps(payload, separators=(",", ":"))
|
||||
body["string"] = new_text.encode("utf-8") if was_bytes else new_text
|
||||
|
||||
headers = response.get("headers")
|
||||
if isinstance(headers, dict):
|
||||
new_len_value = str(len(new_text.encode("utf-8")))
|
||||
for key in list(headers):
|
||||
if str(key).lower() == "content-length":
|
||||
value = headers[key]
|
||||
headers[key] = (
|
||||
[new_len_value] if isinstance(value, list) else new_len_value
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
def _before_record_response(response):
|
||||
return filter_non_2xx_response(_scrub_response(response))
|
||||
return filter_non_2xx_response(_scrub_response(_strip_image_b64_payloads(response)))
|
||||
|
||||
|
||||
def _safe_body_matcher(r1, r2) -> None:
|
||||
@ -172,8 +265,84 @@ def _strip_headers(headers, names: Iterable[str]) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def _normalize_multipart_boundary(request) -> None:
|
||||
"""Rewrite random multipart boundaries to a fixed string in-place.
|
||||
|
||||
httpx generates a fresh ``boundary=<random hex>`` for every
|
||||
multipart request via ``os.urandom``. Without normalization, the
|
||||
request body bytes differ across runs even when everything else is
|
||||
identical, the ``safe_body`` matcher misses, and the persister
|
||||
keeps appending new episodes until ``MAX_EPISODES_PER_CASSETTE``
|
||||
refuses the save — leaving audio-transcription tests effectively
|
||||
unmocked. Replacing the boundary in both the Content-Type header
|
||||
and the body bytes makes the request deterministic.
|
||||
|
||||
Idempotent — vcrpy invokes this hook multiple times per request,
|
||||
so the second invocation sees ``boundary=vcr-static-boundary``
|
||||
already and short-circuits.
|
||||
"""
|
||||
headers = getattr(request, "headers", None)
|
||||
if headers is None:
|
||||
return
|
||||
|
||||
content_type_key = None
|
||||
content_type_value = None
|
||||
try:
|
||||
for key in list(headers.keys()):
|
||||
if str(key).lower() == "content-type":
|
||||
content_type_key = key
|
||||
value = headers[key]
|
||||
content_type_value = value if isinstance(value, str) else str(value)
|
||||
break
|
||||
except AttributeError:
|
||||
return
|
||||
|
||||
if not content_type_value or "multipart/" not in content_type_value.lower():
|
||||
return
|
||||
|
||||
fixed_param = f"boundary={VCR_FIXED_MULTIPART_BOUNDARY}"
|
||||
if fixed_param in content_type_value:
|
||||
return
|
||||
|
||||
match = re.search(r"boundary=([^\s;]+)", content_type_value)
|
||||
if not match:
|
||||
return
|
||||
current_boundary = match.group(1).strip('"')
|
||||
if current_boundary == VCR_FIXED_MULTIPART_BOUNDARY:
|
||||
return
|
||||
|
||||
try:
|
||||
headers[content_type_key] = content_type_value.replace(
|
||||
match.group(0), fixed_param
|
||||
)
|
||||
except (TypeError, AttributeError):
|
||||
return
|
||||
|
||||
body = getattr(request, "body", None)
|
||||
if body is None:
|
||||
return
|
||||
|
||||
if isinstance(body, (bytes, bytearray)):
|
||||
try:
|
||||
new_body = bytes(body).replace(
|
||||
current_boundary.encode("utf-8"),
|
||||
VCR_FIXED_MULTIPART_BOUNDARY.encode("utf-8"),
|
||||
)
|
||||
except (TypeError, ValueError):
|
||||
return
|
||||
elif isinstance(body, str):
|
||||
new_body = body.replace(current_boundary, VCR_FIXED_MULTIPART_BOUNDARY)
|
||||
else:
|
||||
return
|
||||
|
||||
try:
|
||||
request.body = new_body
|
||||
except (AttributeError, TypeError):
|
||||
pass
|
||||
|
||||
|
||||
def _before_record_request(request):
|
||||
"""Fingerprint API keys, then scrub them.
|
||||
"""Fingerprint API keys, scrub them, and normalize multipart boundaries.
|
||||
|
||||
Order matters in two ways:
|
||||
|
||||
@ -187,7 +356,8 @@ def _before_record_request(request):
|
||||
auth headers we already stripped, so re-hashing would yield
|
||||
``"no-key"`` and the stored vs. incoming fingerprints would
|
||||
diverge. Skip the recompute when the header is already set so
|
||||
this hook is idempotent.
|
||||
this hook is idempotent. The boundary normalizer is also
|
||||
idempotent for the same reason.
|
||||
"""
|
||||
headers = getattr(request, "headers", None)
|
||||
if headers is None:
|
||||
@ -199,6 +369,7 @@ def _before_record_request(request):
|
||||
except (TypeError, AttributeError):
|
||||
pass
|
||||
_strip_headers(headers, FILTERED_REQUEST_HEADERS)
|
||||
_normalize_multipart_boundary(request)
|
||||
return request
|
||||
|
||||
|
||||
|
||||
@ -853,7 +853,11 @@ class BaseLLMChatTest(ABC):
|
||||
@pytest.mark.parametrize(
|
||||
"image_url",
|
||||
[
|
||||
"http://img1.etsystatic.com/260/0/7813604/il_fullxfull.4226713999_q86e.jpg",
|
||||
# In-repo logo served via jsdelivr (sha-pinned, immutable).
|
||||
# Bedrock fetches the URL and base64-embeds it in the
|
||||
# Converse request body; using a multi-MB hosted product
|
||||
# photo here previously bloated cassettes to ~60 MB each.
|
||||
"https://cdn.jsdelivr.net/gh/BerriAI/litellm@d769e81c90d453240c61fc572cdb27fae06a89d0/ui/litellm-dashboard/public/assets/logos/litellm_logo.jpg",
|
||||
"https://awsmp-logos.s3.amazonaws.com/seller-xw5kijmvmzasy/c233c9ade2ccb5491072ae232c814942.png",
|
||||
],
|
||||
)
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
Tests for Evals API operations across providers
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import sys
|
||||
from abc import ABC, abstractmethod
|
||||
@ -20,6 +21,46 @@ from litellm.types.llms.openai_evals import (
|
||||
)
|
||||
|
||||
|
||||
def _stable_eval_name(test_node_name: str, suffix: str = "") -> str:
|
||||
"""Deterministic eval name keyed off the test's node name.
|
||||
|
||||
The previous ``f"Test Eval {int(time.time())}"`` pattern embedded a
|
||||
fresh value into the request body every run, defeating VCR's
|
||||
``safe_body`` matcher and forcing a real OpenAI ``create`` call on
|
||||
every CI run. With a stable per-test name the cassette matches on
|
||||
replay, and provider-side resources stay bounded because each test
|
||||
deletes the eval it owns on teardown.
|
||||
"""
|
||||
nonce = hashlib.sha1(test_node_name.encode()).hexdigest()[:12]
|
||||
return f"vcr-managed-{nonce}{suffix}"
|
||||
|
||||
|
||||
_TESTING_CRITERIA = [
|
||||
{
|
||||
"type": "label_model",
|
||||
"model": "gpt-4o",
|
||||
"input": [
|
||||
{
|
||||
"role": "developer",
|
||||
"content": "Classify the sentiment as 'positive' or 'negative'",
|
||||
},
|
||||
{"role": "user", "content": "Statement: {{item.input}}"},
|
||||
],
|
||||
"passing_labels": ["positive"],
|
||||
"labels": ["positive", "negative"],
|
||||
"name": "Sentiment grader",
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
_PROVIDER_FLAKINESS = (
|
||||
litellm.InternalServerError,
|
||||
litellm.APIConnectionError,
|
||||
litellm.Timeout,
|
||||
litellm.ServiceUnavailableError,
|
||||
)
|
||||
|
||||
|
||||
class BaseEvalsAPITest(ABC):
|
||||
"""
|
||||
Base test class for Evals API operations.
|
||||
@ -41,13 +82,64 @@ class BaseEvalsAPITest(ABC):
|
||||
"""Return the API base URL for the provider"""
|
||||
pass
|
||||
|
||||
@pytest.fixture
|
||||
def managed_eval(self, request):
|
||||
"""Create a stable-named eval for this test; delete on teardown.
|
||||
|
||||
Function-scoped so each cassette captures the full
|
||||
create→test→delete cycle. A class-scoped fixture would push
|
||||
the create into whichever test ran first and the delete into
|
||||
whichever ran last, which is fragile under reordering.
|
||||
|
||||
Replaces the prior ``list_evals().data[0].id`` pattern, which
|
||||
made the URL of ``get_eval`` / ``update_eval`` vary across
|
||||
runs (the "first" eval depends on what other runs left
|
||||
behind).
|
||||
"""
|
||||
custom_llm_provider = self.get_custom_llm_provider()
|
||||
api_key = self.get_api_key()
|
||||
api_base = self.get_api_base()
|
||||
|
||||
if not api_key:
|
||||
pytest.skip(f"No API key provided for {custom_llm_provider}")
|
||||
|
||||
try:
|
||||
created = litellm.create_eval(
|
||||
name=_stable_eval_name(request.node.name),
|
||||
data_source_config={
|
||||
"type": "stored_completions",
|
||||
"metadata": {"usecase": "chatbot", "vcr": "managed"},
|
||||
},
|
||||
testing_criteria=_TESTING_CRITERIA,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
)
|
||||
except _PROVIDER_FLAKINESS:
|
||||
pytest.skip("Provider service unavailable")
|
||||
except litellm.RateLimitError:
|
||||
pytest.skip("Rate limit exceeded")
|
||||
|
||||
yield created
|
||||
|
||||
# Best-effort cleanup. OpenAI eval names are not unique-keyed
|
||||
# (only IDs are), so a failed delete doesn't block the next
|
||||
# run's create.
|
||||
try:
|
||||
litellm.delete_eval(
|
||||
eval_id=created.id,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@pytest.mark.flaky(retries=3, delay=2)
|
||||
def test_create_eval(self):
|
||||
def test_create_eval(self, request):
|
||||
"""
|
||||
Test creating an evaluation.
|
||||
"""
|
||||
import time
|
||||
|
||||
custom_llm_provider = self.get_custom_llm_provider()
|
||||
api_key = self.get_api_key()
|
||||
api_base = self.get_api_base()
|
||||
@ -56,53 +148,45 @@ class BaseEvalsAPITest(ABC):
|
||||
pytest.skip(f"No API key provided for {custom_llm_provider}")
|
||||
|
||||
litellm.set_verbose = True
|
||||
unique_name = _stable_eval_name(request.node.name)
|
||||
|
||||
# Create eval with stored_completions data source
|
||||
unique_name = f"Test Eval {int(time.time())}"
|
||||
|
||||
created_id = None
|
||||
try:
|
||||
response = litellm.create_eval(
|
||||
name=unique_name,
|
||||
data_source_config={
|
||||
"type": "stored_completions",
|
||||
"metadata": {"usecase": "chatbot"},
|
||||
},
|
||||
testing_criteria=[
|
||||
{
|
||||
"type": "label_model",
|
||||
"model": "gpt-4o",
|
||||
"input": [
|
||||
{
|
||||
"role": "developer",
|
||||
"content": "Classify the sentiment as 'positive' or 'negative'",
|
||||
},
|
||||
{"role": "user", "content": "Statement: {{item.input}}"},
|
||||
],
|
||||
"passing_labels": ["positive"],
|
||||
"labels": ["positive", "negative"],
|
||||
"name": "Sentiment grader",
|
||||
}
|
||||
],
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
)
|
||||
except (
|
||||
litellm.InternalServerError,
|
||||
litellm.APIConnectionError,
|
||||
litellm.Timeout,
|
||||
litellm.ServiceUnavailableError,
|
||||
):
|
||||
pytest.skip("Provider service unavailable")
|
||||
except litellm.RateLimitError:
|
||||
pytest.skip("Rate limit exceeded")
|
||||
try:
|
||||
response = litellm.create_eval(
|
||||
name=unique_name,
|
||||
data_source_config={
|
||||
"type": "stored_completions",
|
||||
"metadata": {"usecase": "chatbot"},
|
||||
},
|
||||
testing_criteria=_TESTING_CRITERIA,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
)
|
||||
except _PROVIDER_FLAKINESS:
|
||||
pytest.skip("Provider service unavailable")
|
||||
except litellm.RateLimitError:
|
||||
pytest.skip("Rate limit exceeded")
|
||||
|
||||
assert response is not None
|
||||
assert isinstance(response, Eval)
|
||||
assert response.id is not None
|
||||
assert response.name == unique_name
|
||||
print(f"Created eval: {response}")
|
||||
print(f"Eval ID: {response.id}")
|
||||
assert response is not None
|
||||
assert isinstance(response, Eval)
|
||||
assert response.id is not None
|
||||
assert response.name == unique_name
|
||||
created_id = response.id
|
||||
print(f"Created eval: {response}")
|
||||
print(f"Eval ID: {response.id}")
|
||||
finally:
|
||||
if created_id is not None:
|
||||
try:
|
||||
litellm.delete_eval(
|
||||
eval_id=created_id,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def test_list_evals(self):
|
||||
"""
|
||||
@ -130,7 +214,7 @@ class BaseEvalsAPITest(ABC):
|
||||
assert hasattr(response, "has_more")
|
||||
print(f"Listed evals: {len(response.data)} evaluations")
|
||||
|
||||
def test_get_eval(self):
|
||||
def test_get_eval(self, managed_eval):
|
||||
"""
|
||||
Test getting a specific evaluation by ID.
|
||||
"""
|
||||
@ -138,89 +222,54 @@ class BaseEvalsAPITest(ABC):
|
||||
api_key = self.get_api_key()
|
||||
api_base = self.get_api_base()
|
||||
|
||||
if not api_key:
|
||||
pytest.skip(f"No API key provided for {custom_llm_provider}")
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
# First list existing evals to get an ID
|
||||
list_response = litellm.list_evals(
|
||||
limit=1,
|
||||
response = litellm.get_eval(
|
||||
eval_id=managed_eval.id,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
)
|
||||
|
||||
assert isinstance(list_response, ListEvalsResponse)
|
||||
assert response is not None
|
||||
assert isinstance(response, Eval)
|
||||
assert response.id == managed_eval.id
|
||||
print(f"Retrieved eval: {response}")
|
||||
|
||||
if list_response.data and len(list_response.data) > 0:
|
||||
eval_id = list_response.data[0].id
|
||||
print(f"Testing with eval ID: {eval_id}")
|
||||
|
||||
# Get the eval
|
||||
response = litellm.get_eval(
|
||||
eval_id=eval_id,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
)
|
||||
|
||||
assert response is not None
|
||||
assert isinstance(response, Eval)
|
||||
assert response.id == eval_id
|
||||
print(f"Retrieved eval: {response}")
|
||||
else:
|
||||
pytest.skip("No existing evals to test with")
|
||||
|
||||
def test_update_eval(self):
|
||||
@pytest.mark.flaky(retries=3, delay=2)
|
||||
def test_update_eval(self, request, managed_eval):
|
||||
"""
|
||||
Test updating an evaluation.
|
||||
"""
|
||||
import time
|
||||
|
||||
custom_llm_provider = self.get_custom_llm_provider()
|
||||
api_key = self.get_api_key()
|
||||
api_base = self.get_api_base()
|
||||
|
||||
if not api_key:
|
||||
pytest.skip(f"No API key provided for {custom_llm_provider}")
|
||||
|
||||
litellm.set_verbose = True
|
||||
updated_name = _stable_eval_name(request.node.name, suffix="-updated")
|
||||
|
||||
# First list existing evals
|
||||
list_response = litellm.list_evals(
|
||||
limit=1,
|
||||
response = litellm.update_eval(
|
||||
eval_id=managed_eval.id,
|
||||
name=updated_name,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
)
|
||||
|
||||
assert isinstance(list_response, ListEvalsResponse)
|
||||
|
||||
if list_response.data and len(list_response.data) > 0:
|
||||
eval_id = list_response.data[0].id
|
||||
updated_name = f"Updated Eval {int(time.time())}"
|
||||
|
||||
# Update the eval
|
||||
response = litellm.update_eval(
|
||||
eval_id=eval_id,
|
||||
name=updated_name,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
)
|
||||
|
||||
assert response is not None
|
||||
assert isinstance(response, Eval)
|
||||
assert response.id == eval_id
|
||||
assert response.name == updated_name
|
||||
print(f"Updated eval: {response}")
|
||||
else:
|
||||
pytest.skip("No existing evals to test with")
|
||||
assert response is not None
|
||||
assert isinstance(response, Eval)
|
||||
assert response.id == managed_eval.id
|
||||
assert response.name == updated_name
|
||||
print(f"Updated eval: {response}")
|
||||
|
||||
def test_delete_eval(self):
|
||||
"""
|
||||
Test deleting an evaluation.
|
||||
|
||||
Real delete coverage now lives in the ``managed_eval`` fixture
|
||||
teardown and in ``test_create_eval``'s ``finally`` block, so
|
||||
this stays a no-op skip rather than creating a fresh resource
|
||||
just to delete it.
|
||||
"""
|
||||
custom_llm_provider = self.get_custom_llm_provider()
|
||||
api_key = self.get_api_key()
|
||||
@ -229,8 +278,7 @@ class BaseEvalsAPITest(ABC):
|
||||
if not api_key:
|
||||
pytest.skip(f"No API key provided for {custom_llm_provider}")
|
||||
|
||||
# Skip this test to avoid deleting production evals
|
||||
pytest.skip("Skipping delete test to preserve existing evals")
|
||||
pytest.skip("Delete is exercised via managed_eval fixture teardown.")
|
||||
|
||||
|
||||
class TestOpenAIEvalsAPI(BaseEvalsAPITest):
|
||||
|
||||
220
tests/llm_translation/test_vcr_filters.py
Normal file
220
tests/llm_translation/test_vcr_filters.py
Normal file
@ -0,0 +1,220 @@
|
||||
"""Unit tests for the VCR record-time filters that keep cassettes small.
|
||||
|
||||
Covers:
|
||||
- ``_strip_image_b64_payloads`` — replaces base64 image bodies in
|
||||
image-gen responses so cassettes don't carry MB-class PNG payloads.
|
||||
- ``_normalize_multipart_boundary`` — rewrites random multipart
|
||||
boundaries to a fixed string so audio-transcription request bodies
|
||||
match across record and replay.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
from vcr.request import Request
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
|
||||
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VCR_FIXED_MULTIPART_BOUNDARY,
|
||||
VCR_IMAGE_B64_PLACEHOLDER,
|
||||
_normalize_multipart_boundary,
|
||||
_strip_image_b64_payloads,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Image b64 stripper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _image_response(b64_payload: str, body_type: str = "bytes") -> dict:
|
||||
body_text = json.dumps({"data": [{"b64_json": b64_payload}]})
|
||||
body_string = body_text.encode("utf-8") if body_type == "bytes" else body_text
|
||||
return {
|
||||
"status": {"code": 200, "message": "OK"},
|
||||
"headers": {
|
||||
"content-type": ["application/json"],
|
||||
"content-length": [str(len(body_text.encode("utf-8")))],
|
||||
},
|
||||
"body": {"string": body_string},
|
||||
}
|
||||
|
||||
|
||||
def test_strip_image_b64_replaces_payload_when_body_is_bytes():
|
||||
response = _image_response("A" * 5000, body_type="bytes")
|
||||
out = _strip_image_b64_payloads(response)
|
||||
payload = json.loads(out["body"]["string"].decode("utf-8"))
|
||||
assert payload["data"][0]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER
|
||||
|
||||
|
||||
def test_strip_image_b64_replaces_payload_when_body_is_str():
|
||||
response = _image_response("A" * 5000, body_type="str")
|
||||
out = _strip_image_b64_payloads(response)
|
||||
payload = json.loads(out["body"]["string"])
|
||||
assert payload["data"][0]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER
|
||||
|
||||
|
||||
def test_strip_image_b64_updates_content_length():
|
||||
response = _image_response("A" * 5000)
|
||||
out = _strip_image_b64_payloads(response)
|
||||
expected_len = len(out["body"]["string"])
|
||||
assert out["headers"]["content-length"] == [str(expected_len)]
|
||||
|
||||
|
||||
def test_strip_image_b64_is_idempotent():
|
||||
response = _image_response("A" * 5000)
|
||||
once = _strip_image_b64_payloads(response)
|
||||
twice = _strip_image_b64_payloads(once)
|
||||
assert once["body"]["string"] == twice["body"]["string"]
|
||||
|
||||
|
||||
def test_strip_image_b64_handles_nested_data():
|
||||
body_text = json.dumps(
|
||||
{
|
||||
"outer": {
|
||||
"data": [
|
||||
{"b64_json": "X" * 4000, "label": "first"},
|
||||
{"b64_json": "Y" * 4000, "label": "second"},
|
||||
]
|
||||
}
|
||||
}
|
||||
)
|
||||
response = {
|
||||
"status": {"code": 200, "message": "OK"},
|
||||
"headers": {"content-type": ["application/json"]},
|
||||
"body": {"string": body_text.encode("utf-8")},
|
||||
}
|
||||
out = _strip_image_b64_payloads(response)
|
||||
payload = json.loads(out["body"]["string"].decode("utf-8"))
|
||||
assert payload["outer"]["data"][0]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER
|
||||
assert payload["outer"]["data"][1]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER
|
||||
assert payload["outer"]["data"][0]["label"] == "first"
|
||||
|
||||
|
||||
def test_strip_image_b64_leaves_non_image_response_unchanged():
|
||||
body_text = json.dumps({"choices": [{"message": {"content": "hello"}}]})
|
||||
response = {
|
||||
"status": {"code": 200, "message": "OK"},
|
||||
"headers": {"content-type": ["application/json"]},
|
||||
"body": {"string": body_text.encode("utf-8")},
|
||||
}
|
||||
out = _strip_image_b64_payloads(response)
|
||||
assert json.loads(out["body"]["string"].decode("utf-8")) == json.loads(body_text)
|
||||
|
||||
|
||||
def test_strip_image_b64_leaves_invalid_json_unchanged():
|
||||
response = {
|
||||
"status": {"code": 200, "message": "OK"},
|
||||
"headers": {"content-type": ["application/octet-stream"]},
|
||||
"body": {"string": b"\x89PNG\r\n\x1a\n binary stuff not json"},
|
||||
}
|
||||
out = _strip_image_b64_payloads(response)
|
||||
assert out["body"]["string"] == b"\x89PNG\r\n\x1a\n binary stuff not json"
|
||||
|
||||
|
||||
def test_strip_image_b64_skips_short_values():
|
||||
"""Already-placeholder values aren't re-replaced (idempotency guard)."""
|
||||
body_text = json.dumps({"data": [{"b64_json": VCR_IMAGE_B64_PLACEHOLDER}]})
|
||||
response = {
|
||||
"status": {"code": 200, "message": "OK"},
|
||||
"headers": {"content-type": ["application/json"]},
|
||||
"body": {"string": body_text.encode("utf-8")},
|
||||
}
|
||||
out = _strip_image_b64_payloads(response)
|
||||
payload = json.loads(out["body"]["string"].decode("utf-8"))
|
||||
assert payload["data"][0]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Multipart boundary normalizer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _multipart_request(boundary: str):
|
||||
body_text = (
|
||||
f"--{boundary}\r\n"
|
||||
'Content-Disposition: form-data; name="file"; filename="audio.wav"\r\n'
|
||||
"Content-Type: audio/wav\r\n"
|
||||
"\r\n"
|
||||
"fake-audio-bytes\r\n"
|
||||
f"--{boundary}--\r\n"
|
||||
)
|
||||
return Request(
|
||||
method="POST",
|
||||
uri="https://api.openai.com/v1/audio/transcriptions",
|
||||
body=body_text.encode("utf-8"),
|
||||
headers={
|
||||
"content-type": f"multipart/form-data; boundary={boundary}",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def test_normalize_multipart_rewrites_header_and_body():
|
||||
req = _multipart_request("abc123random")
|
||||
_normalize_multipart_boundary(req)
|
||||
assert (
|
||||
req.headers["content-type"]
|
||||
== f"multipart/form-data; boundary={VCR_FIXED_MULTIPART_BOUNDARY}"
|
||||
)
|
||||
assert b"abc123random" not in req.body
|
||||
assert VCR_FIXED_MULTIPART_BOUNDARY.encode("utf-8") in req.body
|
||||
|
||||
|
||||
def test_normalize_multipart_is_idempotent():
|
||||
req = _multipart_request("abc123random")
|
||||
_normalize_multipart_boundary(req)
|
||||
body_first = req.body
|
||||
header_first = req.headers["content-type"]
|
||||
_normalize_multipart_boundary(req)
|
||||
assert req.body == body_first
|
||||
assert req.headers["content-type"] == header_first
|
||||
|
||||
|
||||
def test_normalize_multipart_two_distinct_boundaries_match_after_normalize():
|
||||
"""Whisper-style: two requests with different random boundaries should
|
||||
end up with byte-identical bodies after normalization."""
|
||||
req1 = _multipart_request("boundaryAAA")
|
||||
req2 = _multipart_request("boundaryBBB")
|
||||
_normalize_multipart_boundary(req1)
|
||||
_normalize_multipart_boundary(req2)
|
||||
assert req1.body == req2.body
|
||||
assert req1.headers["content-type"] == req2.headers["content-type"]
|
||||
|
||||
|
||||
def test_normalize_multipart_skips_non_multipart_requests():
|
||||
req = Request(
|
||||
method="POST",
|
||||
uri="https://api.openai.com/v1/chat/completions",
|
||||
body=b'{"model":"gpt-4o"}',
|
||||
headers={"content-type": "application/json"},
|
||||
)
|
||||
_normalize_multipart_boundary(req)
|
||||
assert req.headers["content-type"] == "application/json"
|
||||
assert req.body == b'{"model":"gpt-4o"}'
|
||||
|
||||
|
||||
def test_normalize_multipart_skips_request_without_content_type():
|
||||
req = Request(
|
||||
method="POST",
|
||||
uri="https://api.openai.com/v1/chat/completions",
|
||||
body=b"unknown body",
|
||||
headers={},
|
||||
)
|
||||
_normalize_multipart_boundary(req)
|
||||
assert req.body == b"unknown body"
|
||||
|
||||
|
||||
def test_normalize_multipart_handles_quoted_boundary():
|
||||
req = Request(
|
||||
method="POST",
|
||||
uri="https://api.openai.com/v1/audio/transcriptions",
|
||||
body=b"--quoted-boundary--body content--quoted-boundary--",
|
||||
headers={"content-type": 'multipart/form-data; boundary="quoted-boundary"'},
|
||||
)
|
||||
_normalize_multipart_boundary(req)
|
||||
assert b"quoted-boundary" not in req.body
|
||||
assert VCR_FIXED_MULTIPART_BOUNDARY.encode("utf-8") in req.body
|
||||
@ -12,7 +12,15 @@ from abc import ABC, abstractmethod
|
||||
|
||||
# Test resources
|
||||
TEST_IMAGE_PATH = "test_image_edit.png"
|
||||
TEST_PDF_URL = "https://arxiv.org/pdf/2201.04234"
|
||||
# Tiny in-repo PDF served via jsdelivr (sha-pinned, immutable). The arxiv
|
||||
# PDF previously used here was several MB — once base64-encoded into the
|
||||
# Vertex OCR request it ballooned cassettes past 100 MB per test. Keep
|
||||
# the URL stable across runs so cassettes don't churn.
|
||||
TEST_PDF_URL = (
|
||||
"https://cdn.jsdelivr.net/gh/BerriAI/litellm"
|
||||
"@d769e81c90d453240c61fc572cdb27fae06a89d0"
|
||||
"/tests/llm_translation/fixtures/dummy.pdf"
|
||||
)
|
||||
|
||||
|
||||
class BaseOCRTest(ABC):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user