diff --git a/tests/_vcr_conftest_common.py b/tests/_vcr_conftest_common.py index 73a5635ab6..b2c7eeb78d 100644 --- a/tests/_vcr_conftest_common.py +++ b/tests/_vcr_conftest_common.py @@ -7,7 +7,9 @@ from __future__ import annotations import atexit import hashlib +import json import os +import re import sys from typing import Iterable @@ -74,6 +76,17 @@ FILTERED_RESPONSE_HEADERS = ( "date", ) +# Tiny placeholder used to replace base64 image payloads in cassettes. +# Decodes to b"test" — short, valid base64 so test code that decodes +# the field still succeeds. +VCR_IMAGE_B64_PLACEHOLDER = "dGVzdA==" + +# Fixed boundary substituted into multipart request bodies so the +# ``safe_body`` matcher sees the same bytes across record and replay. +# httpx generates a fresh random boundary per request via os.urandom, +# which otherwise turns every multipart cassette into a permanent miss. +VCR_FIXED_MULTIPART_BOUNDARY = "vcr-static-boundary" + def _scrub_response(response): if not isinstance(response, dict): @@ -86,8 +99,88 @@ def _scrub_response(response): return response +def _replace_b64_json_in_place(obj) -> bool: + """Recursively replace ``b64_json`` string values in a JSON tree. + + Returns ``True`` if any value was rewritten. The check on the + existing value's length keeps the function idempotent — once a + value has been swapped to the placeholder, subsequent invocations + are no-ops. + """ + changed = False + if isinstance(obj, dict): + for key, value in obj.items(): + if ( + key == "b64_json" + and isinstance(value, str) + and len(value) > len(VCR_IMAGE_B64_PLACEHOLDER) + ): + obj[key] = VCR_IMAGE_B64_PLACEHOLDER + changed = True + elif _replace_b64_json_in_place(value): + changed = True + elif isinstance(obj, list): + for item in obj: + if _replace_b64_json_in_place(item): + changed = True + return changed + + +def _strip_image_b64_payloads(response): + """Replace ``b64_json`` payloads in image-gen responses before save. + + Image-edit and image-generation responses carry the full base64 + PNG/JPEG (1-10+ MB) in ``data[*].b64_json``. The image_gen tests + only assert response shape — the field decodes, schema validates — + they never inspect pixel content. Swapping to a 4-byte placeholder + preserves all those checks while shrinking cassettes by ~99%. + """ + if not isinstance(response, dict): + return response + body = response.get("body") + if not isinstance(body, dict): + return response + raw = body.get("string") + if raw is None: + return response + + if isinstance(raw, (bytes, bytearray)): + try: + text = bytes(raw).decode("utf-8") + except UnicodeDecodeError: + return response + was_bytes = True + elif isinstance(raw, str): + text = raw + was_bytes = False + else: + return response + + try: + payload = json.loads(text) + except (ValueError, TypeError): + return response + + if not _replace_b64_json_in_place(payload): + return response + + new_text = json.dumps(payload, separators=(",", ":")) + body["string"] = new_text.encode("utf-8") if was_bytes else new_text + + headers = response.get("headers") + if isinstance(headers, dict): + new_len_value = str(len(new_text.encode("utf-8"))) + for key in list(headers): + if str(key).lower() == "content-length": + value = headers[key] + headers[key] = ( + [new_len_value] if isinstance(value, list) else new_len_value + ) + return response + + def _before_record_response(response): - return filter_non_2xx_response(_scrub_response(response)) + return filter_non_2xx_response(_scrub_response(_strip_image_b64_payloads(response))) def _safe_body_matcher(r1, r2) -> None: @@ -172,8 +265,84 @@ def _strip_headers(headers, names: Iterable[str]) -> None: pass +def _normalize_multipart_boundary(request) -> None: + """Rewrite random multipart boundaries to a fixed string in-place. + + httpx generates a fresh ``boundary=`` for every + multipart request via ``os.urandom``. Without normalization, the + request body bytes differ across runs even when everything else is + identical, the ``safe_body`` matcher misses, and the persister + keeps appending new episodes until ``MAX_EPISODES_PER_CASSETTE`` + refuses the save — leaving audio-transcription tests effectively + unmocked. Replacing the boundary in both the Content-Type header + and the body bytes makes the request deterministic. + + Idempotent — vcrpy invokes this hook multiple times per request, + so the second invocation sees ``boundary=vcr-static-boundary`` + already and short-circuits. + """ + headers = getattr(request, "headers", None) + if headers is None: + return + + content_type_key = None + content_type_value = None + try: + for key in list(headers.keys()): + if str(key).lower() == "content-type": + content_type_key = key + value = headers[key] + content_type_value = value if isinstance(value, str) else str(value) + break + except AttributeError: + return + + if not content_type_value or "multipart/" not in content_type_value.lower(): + return + + fixed_param = f"boundary={VCR_FIXED_MULTIPART_BOUNDARY}" + if fixed_param in content_type_value: + return + + match = re.search(r"boundary=([^\s;]+)", content_type_value) + if not match: + return + current_boundary = match.group(1).strip('"') + if current_boundary == VCR_FIXED_MULTIPART_BOUNDARY: + return + + try: + headers[content_type_key] = content_type_value.replace( + match.group(0), fixed_param + ) + except (TypeError, AttributeError): + return + + body = getattr(request, "body", None) + if body is None: + return + + if isinstance(body, (bytes, bytearray)): + try: + new_body = bytes(body).replace( + current_boundary.encode("utf-8"), + VCR_FIXED_MULTIPART_BOUNDARY.encode("utf-8"), + ) + except (TypeError, ValueError): + return + elif isinstance(body, str): + new_body = body.replace(current_boundary, VCR_FIXED_MULTIPART_BOUNDARY) + else: + return + + try: + request.body = new_body + except (AttributeError, TypeError): + pass + + def _before_record_request(request): - """Fingerprint API keys, then scrub them. + """Fingerprint API keys, scrub them, and normalize multipart boundaries. Order matters in two ways: @@ -187,7 +356,8 @@ def _before_record_request(request): auth headers we already stripped, so re-hashing would yield ``"no-key"`` and the stored vs. incoming fingerprints would diverge. Skip the recompute when the header is already set so - this hook is idempotent. + this hook is idempotent. The boundary normalizer is also + idempotent for the same reason. """ headers = getattr(request, "headers", None) if headers is None: @@ -199,6 +369,7 @@ def _before_record_request(request): except (TypeError, AttributeError): pass _strip_headers(headers, FILTERED_REQUEST_HEADERS) + _normalize_multipart_boundary(request) return request diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py index aedc4f810c..77850dac45 100644 --- a/tests/llm_translation/base_llm_unit_tests.py +++ b/tests/llm_translation/base_llm_unit_tests.py @@ -853,7 +853,11 @@ class BaseLLMChatTest(ABC): @pytest.mark.parametrize( "image_url", [ - "http://img1.etsystatic.com/260/0/7813604/il_fullxfull.4226713999_q86e.jpg", + # In-repo logo served via jsdelivr (sha-pinned, immutable). + # Bedrock fetches the URL and base64-embeds it in the + # Converse request body; using a multi-MB hosted product + # photo here previously bloated cassettes to ~60 MB each. + "https://cdn.jsdelivr.net/gh/BerriAI/litellm@d769e81c90d453240c61fc572cdb27fae06a89d0/ui/litellm-dashboard/public/assets/logos/litellm_logo.jpg", "https://awsmp-logos.s3.amazonaws.com/seller-xw5kijmvmzasy/c233c9ade2ccb5491072ae232c814942.png", ], ) diff --git a/tests/llm_translation/test_evals_api.py b/tests/llm_translation/test_evals_api.py index 8926300020..4a55663e66 100644 --- a/tests/llm_translation/test_evals_api.py +++ b/tests/llm_translation/test_evals_api.py @@ -2,6 +2,7 @@ Tests for Evals API operations across providers """ +import hashlib import os import sys from abc import ABC, abstractmethod @@ -20,6 +21,46 @@ from litellm.types.llms.openai_evals import ( ) +def _stable_eval_name(test_node_name: str, suffix: str = "") -> str: + """Deterministic eval name keyed off the test's node name. + + The previous ``f"Test Eval {int(time.time())}"`` pattern embedded a + fresh value into the request body every run, defeating VCR's + ``safe_body`` matcher and forcing a real OpenAI ``create`` call on + every CI run. With a stable per-test name the cassette matches on + replay, and provider-side resources stay bounded because each test + deletes the eval it owns on teardown. + """ + nonce = hashlib.sha1(test_node_name.encode()).hexdigest()[:12] + return f"vcr-managed-{nonce}{suffix}" + + +_TESTING_CRITERIA = [ + { + "type": "label_model", + "model": "gpt-4o", + "input": [ + { + "role": "developer", + "content": "Classify the sentiment as 'positive' or 'negative'", + }, + {"role": "user", "content": "Statement: {{item.input}}"}, + ], + "passing_labels": ["positive"], + "labels": ["positive", "negative"], + "name": "Sentiment grader", + } +] + + +_PROVIDER_FLAKINESS = ( + litellm.InternalServerError, + litellm.APIConnectionError, + litellm.Timeout, + litellm.ServiceUnavailableError, +) + + class BaseEvalsAPITest(ABC): """ Base test class for Evals API operations. @@ -41,13 +82,64 @@ class BaseEvalsAPITest(ABC): """Return the API base URL for the provider""" pass + @pytest.fixture + def managed_eval(self, request): + """Create a stable-named eval for this test; delete on teardown. + + Function-scoped so each cassette captures the full + create→test→delete cycle. A class-scoped fixture would push + the create into whichever test ran first and the delete into + whichever ran last, which is fragile under reordering. + + Replaces the prior ``list_evals().data[0].id`` pattern, which + made the URL of ``get_eval`` / ``update_eval`` vary across + runs (the "first" eval depends on what other runs left + behind). + """ + custom_llm_provider = self.get_custom_llm_provider() + api_key = self.get_api_key() + api_base = self.get_api_base() + + if not api_key: + pytest.skip(f"No API key provided for {custom_llm_provider}") + + try: + created = litellm.create_eval( + name=_stable_eval_name(request.node.name), + data_source_config={ + "type": "stored_completions", + "metadata": {"usecase": "chatbot", "vcr": "managed"}, + }, + testing_criteria=_TESTING_CRITERIA, + custom_llm_provider=custom_llm_provider, + api_key=api_key, + api_base=api_base, + ) + except _PROVIDER_FLAKINESS: + pytest.skip("Provider service unavailable") + except litellm.RateLimitError: + pytest.skip("Rate limit exceeded") + + yield created + + # Best-effort cleanup. OpenAI eval names are not unique-keyed + # (only IDs are), so a failed delete doesn't block the next + # run's create. + try: + litellm.delete_eval( + eval_id=created.id, + custom_llm_provider=custom_llm_provider, + api_key=api_key, + api_base=api_base, + ) + except Exception: + pass + @pytest.mark.flaky(retries=3, delay=2) - def test_create_eval(self): + def test_create_eval(self, request): """ Test creating an evaluation. """ - import time - custom_llm_provider = self.get_custom_llm_provider() api_key = self.get_api_key() api_base = self.get_api_base() @@ -56,53 +148,45 @@ class BaseEvalsAPITest(ABC): pytest.skip(f"No API key provided for {custom_llm_provider}") litellm.set_verbose = True + unique_name = _stable_eval_name(request.node.name) - # Create eval with stored_completions data source - unique_name = f"Test Eval {int(time.time())}" - + created_id = None try: - response = litellm.create_eval( - name=unique_name, - data_source_config={ - "type": "stored_completions", - "metadata": {"usecase": "chatbot"}, - }, - testing_criteria=[ - { - "type": "label_model", - "model": "gpt-4o", - "input": [ - { - "role": "developer", - "content": "Classify the sentiment as 'positive' or 'negative'", - }, - {"role": "user", "content": "Statement: {{item.input}}"}, - ], - "passing_labels": ["positive"], - "labels": ["positive", "negative"], - "name": "Sentiment grader", - } - ], - custom_llm_provider=custom_llm_provider, - api_key=api_key, - api_base=api_base, - ) - except ( - litellm.InternalServerError, - litellm.APIConnectionError, - litellm.Timeout, - litellm.ServiceUnavailableError, - ): - pytest.skip("Provider service unavailable") - except litellm.RateLimitError: - pytest.skip("Rate limit exceeded") + try: + response = litellm.create_eval( + name=unique_name, + data_source_config={ + "type": "stored_completions", + "metadata": {"usecase": "chatbot"}, + }, + testing_criteria=_TESTING_CRITERIA, + custom_llm_provider=custom_llm_provider, + api_key=api_key, + api_base=api_base, + ) + except _PROVIDER_FLAKINESS: + pytest.skip("Provider service unavailable") + except litellm.RateLimitError: + pytest.skip("Rate limit exceeded") - assert response is not None - assert isinstance(response, Eval) - assert response.id is not None - assert response.name == unique_name - print(f"Created eval: {response}") - print(f"Eval ID: {response.id}") + assert response is not None + assert isinstance(response, Eval) + assert response.id is not None + assert response.name == unique_name + created_id = response.id + print(f"Created eval: {response}") + print(f"Eval ID: {response.id}") + finally: + if created_id is not None: + try: + litellm.delete_eval( + eval_id=created_id, + custom_llm_provider=custom_llm_provider, + api_key=api_key, + api_base=api_base, + ) + except Exception: + pass def test_list_evals(self): """ @@ -130,7 +214,7 @@ class BaseEvalsAPITest(ABC): assert hasattr(response, "has_more") print(f"Listed evals: {len(response.data)} evaluations") - def test_get_eval(self): + def test_get_eval(self, managed_eval): """ Test getting a specific evaluation by ID. """ @@ -138,89 +222,54 @@ class BaseEvalsAPITest(ABC): api_key = self.get_api_key() api_base = self.get_api_base() - if not api_key: - pytest.skip(f"No API key provided for {custom_llm_provider}") - litellm.set_verbose = True - # First list existing evals to get an ID - list_response = litellm.list_evals( - limit=1, + response = litellm.get_eval( + eval_id=managed_eval.id, custom_llm_provider=custom_llm_provider, api_key=api_key, api_base=api_base, ) - assert isinstance(list_response, ListEvalsResponse) + assert response is not None + assert isinstance(response, Eval) + assert response.id == managed_eval.id + print(f"Retrieved eval: {response}") - if list_response.data and len(list_response.data) > 0: - eval_id = list_response.data[0].id - print(f"Testing with eval ID: {eval_id}") - - # Get the eval - response = litellm.get_eval( - eval_id=eval_id, - custom_llm_provider=custom_llm_provider, - api_key=api_key, - api_base=api_base, - ) - - assert response is not None - assert isinstance(response, Eval) - assert response.id == eval_id - print(f"Retrieved eval: {response}") - else: - pytest.skip("No existing evals to test with") - - def test_update_eval(self): + @pytest.mark.flaky(retries=3, delay=2) + def test_update_eval(self, request, managed_eval): """ Test updating an evaluation. """ - import time - custom_llm_provider = self.get_custom_llm_provider() api_key = self.get_api_key() api_base = self.get_api_base() - if not api_key: - pytest.skip(f"No API key provided for {custom_llm_provider}") - litellm.set_verbose = True + updated_name = _stable_eval_name(request.node.name, suffix="-updated") - # First list existing evals - list_response = litellm.list_evals( - limit=1, + response = litellm.update_eval( + eval_id=managed_eval.id, + name=updated_name, custom_llm_provider=custom_llm_provider, api_key=api_key, api_base=api_base, ) - assert isinstance(list_response, ListEvalsResponse) - - if list_response.data and len(list_response.data) > 0: - eval_id = list_response.data[0].id - updated_name = f"Updated Eval {int(time.time())}" - - # Update the eval - response = litellm.update_eval( - eval_id=eval_id, - name=updated_name, - custom_llm_provider=custom_llm_provider, - api_key=api_key, - api_base=api_base, - ) - - assert response is not None - assert isinstance(response, Eval) - assert response.id == eval_id - assert response.name == updated_name - print(f"Updated eval: {response}") - else: - pytest.skip("No existing evals to test with") + assert response is not None + assert isinstance(response, Eval) + assert response.id == managed_eval.id + assert response.name == updated_name + print(f"Updated eval: {response}") def test_delete_eval(self): """ Test deleting an evaluation. + + Real delete coverage now lives in the ``managed_eval`` fixture + teardown and in ``test_create_eval``'s ``finally`` block, so + this stays a no-op skip rather than creating a fresh resource + just to delete it. """ custom_llm_provider = self.get_custom_llm_provider() api_key = self.get_api_key() @@ -229,8 +278,7 @@ class BaseEvalsAPITest(ABC): if not api_key: pytest.skip(f"No API key provided for {custom_llm_provider}") - # Skip this test to avoid deleting production evals - pytest.skip("Skipping delete test to preserve existing evals") + pytest.skip("Delete is exercised via managed_eval fixture teardown.") class TestOpenAIEvalsAPI(BaseEvalsAPITest): diff --git a/tests/llm_translation/test_vcr_filters.py b/tests/llm_translation/test_vcr_filters.py new file mode 100644 index 0000000000..0389168278 --- /dev/null +++ b/tests/llm_translation/test_vcr_filters.py @@ -0,0 +1,220 @@ +"""Unit tests for the VCR record-time filters that keep cassettes small. + +Covers: +- ``_strip_image_b64_payloads`` — replaces base64 image bodies in + image-gen responses so cassettes don't carry MB-class PNG payloads. +- ``_normalize_multipart_boundary`` — rewrites random multipart + boundaries to a fixed string so audio-transcription request bodies + match across record and replay. +""" + +from __future__ import annotations + +import json +import os +import sys + +from vcr.request import Request + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) + +from tests._vcr_conftest_common import ( # noqa: E402 + VCR_FIXED_MULTIPART_BOUNDARY, + VCR_IMAGE_B64_PLACEHOLDER, + _normalize_multipart_boundary, + _strip_image_b64_payloads, +) + + +# --------------------------------------------------------------------------- +# Image b64 stripper +# --------------------------------------------------------------------------- + + +def _image_response(b64_payload: str, body_type: str = "bytes") -> dict: + body_text = json.dumps({"data": [{"b64_json": b64_payload}]}) + body_string = body_text.encode("utf-8") if body_type == "bytes" else body_text + return { + "status": {"code": 200, "message": "OK"}, + "headers": { + "content-type": ["application/json"], + "content-length": [str(len(body_text.encode("utf-8")))], + }, + "body": {"string": body_string}, + } + + +def test_strip_image_b64_replaces_payload_when_body_is_bytes(): + response = _image_response("A" * 5000, body_type="bytes") + out = _strip_image_b64_payloads(response) + payload = json.loads(out["body"]["string"].decode("utf-8")) + assert payload["data"][0]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER + + +def test_strip_image_b64_replaces_payload_when_body_is_str(): + response = _image_response("A" * 5000, body_type="str") + out = _strip_image_b64_payloads(response) + payload = json.loads(out["body"]["string"]) + assert payload["data"][0]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER + + +def test_strip_image_b64_updates_content_length(): + response = _image_response("A" * 5000) + out = _strip_image_b64_payloads(response) + expected_len = len(out["body"]["string"]) + assert out["headers"]["content-length"] == [str(expected_len)] + + +def test_strip_image_b64_is_idempotent(): + response = _image_response("A" * 5000) + once = _strip_image_b64_payloads(response) + twice = _strip_image_b64_payloads(once) + assert once["body"]["string"] == twice["body"]["string"] + + +def test_strip_image_b64_handles_nested_data(): + body_text = json.dumps( + { + "outer": { + "data": [ + {"b64_json": "X" * 4000, "label": "first"}, + {"b64_json": "Y" * 4000, "label": "second"}, + ] + } + } + ) + response = { + "status": {"code": 200, "message": "OK"}, + "headers": {"content-type": ["application/json"]}, + "body": {"string": body_text.encode("utf-8")}, + } + out = _strip_image_b64_payloads(response) + payload = json.loads(out["body"]["string"].decode("utf-8")) + assert payload["outer"]["data"][0]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER + assert payload["outer"]["data"][1]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER + assert payload["outer"]["data"][0]["label"] == "first" + + +def test_strip_image_b64_leaves_non_image_response_unchanged(): + body_text = json.dumps({"choices": [{"message": {"content": "hello"}}]}) + response = { + "status": {"code": 200, "message": "OK"}, + "headers": {"content-type": ["application/json"]}, + "body": {"string": body_text.encode("utf-8")}, + } + out = _strip_image_b64_payloads(response) + assert json.loads(out["body"]["string"].decode("utf-8")) == json.loads(body_text) + + +def test_strip_image_b64_leaves_invalid_json_unchanged(): + response = { + "status": {"code": 200, "message": "OK"}, + "headers": {"content-type": ["application/octet-stream"]}, + "body": {"string": b"\x89PNG\r\n\x1a\n binary stuff not json"}, + } + out = _strip_image_b64_payloads(response) + assert out["body"]["string"] == b"\x89PNG\r\n\x1a\n binary stuff not json" + + +def test_strip_image_b64_skips_short_values(): + """Already-placeholder values aren't re-replaced (idempotency guard).""" + body_text = json.dumps({"data": [{"b64_json": VCR_IMAGE_B64_PLACEHOLDER}]}) + response = { + "status": {"code": 200, "message": "OK"}, + "headers": {"content-type": ["application/json"]}, + "body": {"string": body_text.encode("utf-8")}, + } + out = _strip_image_b64_payloads(response) + payload = json.loads(out["body"]["string"].decode("utf-8")) + assert payload["data"][0]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER + + +# --------------------------------------------------------------------------- +# Multipart boundary normalizer +# --------------------------------------------------------------------------- + + +def _multipart_request(boundary: str): + body_text = ( + f"--{boundary}\r\n" + 'Content-Disposition: form-data; name="file"; filename="audio.wav"\r\n' + "Content-Type: audio/wav\r\n" + "\r\n" + "fake-audio-bytes\r\n" + f"--{boundary}--\r\n" + ) + return Request( + method="POST", + uri="https://api.openai.com/v1/audio/transcriptions", + body=body_text.encode("utf-8"), + headers={ + "content-type": f"multipart/form-data; boundary={boundary}", + }, + ) + + +def test_normalize_multipart_rewrites_header_and_body(): + req = _multipart_request("abc123random") + _normalize_multipart_boundary(req) + assert ( + req.headers["content-type"] + == f"multipart/form-data; boundary={VCR_FIXED_MULTIPART_BOUNDARY}" + ) + assert b"abc123random" not in req.body + assert VCR_FIXED_MULTIPART_BOUNDARY.encode("utf-8") in req.body + + +def test_normalize_multipart_is_idempotent(): + req = _multipart_request("abc123random") + _normalize_multipart_boundary(req) + body_first = req.body + header_first = req.headers["content-type"] + _normalize_multipart_boundary(req) + assert req.body == body_first + assert req.headers["content-type"] == header_first + + +def test_normalize_multipart_two_distinct_boundaries_match_after_normalize(): + """Whisper-style: two requests with different random boundaries should + end up with byte-identical bodies after normalization.""" + req1 = _multipart_request("boundaryAAA") + req2 = _multipart_request("boundaryBBB") + _normalize_multipart_boundary(req1) + _normalize_multipart_boundary(req2) + assert req1.body == req2.body + assert req1.headers["content-type"] == req2.headers["content-type"] + + +def test_normalize_multipart_skips_non_multipart_requests(): + req = Request( + method="POST", + uri="https://api.openai.com/v1/chat/completions", + body=b'{"model":"gpt-4o"}', + headers={"content-type": "application/json"}, + ) + _normalize_multipart_boundary(req) + assert req.headers["content-type"] == "application/json" + assert req.body == b'{"model":"gpt-4o"}' + + +def test_normalize_multipart_skips_request_without_content_type(): + req = Request( + method="POST", + uri="https://api.openai.com/v1/chat/completions", + body=b"unknown body", + headers={}, + ) + _normalize_multipart_boundary(req) + assert req.body == b"unknown body" + + +def test_normalize_multipart_handles_quoted_boundary(): + req = Request( + method="POST", + uri="https://api.openai.com/v1/audio/transcriptions", + body=b"--quoted-boundary--body content--quoted-boundary--", + headers={"content-type": 'multipart/form-data; boundary="quoted-boundary"'}, + ) + _normalize_multipart_boundary(req) + assert b"quoted-boundary" not in req.body + assert VCR_FIXED_MULTIPART_BOUNDARY.encode("utf-8") in req.body diff --git a/tests/ocr_tests/base_ocr_unit_tests.py b/tests/ocr_tests/base_ocr_unit_tests.py index 2120abc8a0..ae65efd952 100644 --- a/tests/ocr_tests/base_ocr_unit_tests.py +++ b/tests/ocr_tests/base_ocr_unit_tests.py @@ -12,7 +12,15 @@ from abc import ABC, abstractmethod # Test resources TEST_IMAGE_PATH = "test_image_edit.png" -TEST_PDF_URL = "https://arxiv.org/pdf/2201.04234" +# Tiny in-repo PDF served via jsdelivr (sha-pinned, immutable). The arxiv +# PDF previously used here was several MB — once base64-encoded into the +# Vertex OCR request it ballooned cassettes past 100 MB per test. Keep +# the URL stable across runs so cassettes don't churn. +TEST_PDF_URL = ( + "https://cdn.jsdelivr.net/gh/BerriAI/litellm" + "@d769e81c90d453240c61fc572cdb27fae06a89d0" + "/tests/llm_translation/fixtures/dummy.pdf" +) class BaseOCRTest(ABC):