Merge pull request #27409 from BerriAI/litellm_/inspiring-allen-ec64a4

[Fix] Tests: Reduce VCR cassette bloat and fix multipart caching
This commit is contained in:
yuneng-jiang 2026-05-07 12:39:58 -07:00 committed by GitHub
commit b9b315157b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 563 additions and 112 deletions

View File

@ -7,7 +7,9 @@ from __future__ import annotations
import atexit
import hashlib
import json
import os
import re
import sys
from typing import Iterable
@ -74,6 +76,17 @@ FILTERED_RESPONSE_HEADERS = (
"date",
)
# Tiny placeholder used to replace base64 image payloads in cassettes.
# Decodes to b"test" — short, valid base64 so test code that decodes
# the field still succeeds.
VCR_IMAGE_B64_PLACEHOLDER = "dGVzdA=="
# Fixed boundary substituted into multipart request bodies so the
# ``safe_body`` matcher sees the same bytes across record and replay.
# httpx generates a fresh random boundary per request via os.urandom,
# which otherwise turns every multipart cassette into a permanent miss.
VCR_FIXED_MULTIPART_BOUNDARY = "vcr-static-boundary"
def _scrub_response(response):
if not isinstance(response, dict):
@ -86,8 +99,88 @@ def _scrub_response(response):
return response
def _replace_b64_json_in_place(obj) -> bool:
"""Recursively replace ``b64_json`` string values in a JSON tree.
Returns ``True`` if any value was rewritten. The check on the
existing value's length keeps the function idempotent — once a
value has been swapped to the placeholder, subsequent invocations
are no-ops.
"""
changed = False
if isinstance(obj, dict):
for key, value in obj.items():
if (
key == "b64_json"
and isinstance(value, str)
and len(value) > len(VCR_IMAGE_B64_PLACEHOLDER)
):
obj[key] = VCR_IMAGE_B64_PLACEHOLDER
changed = True
elif _replace_b64_json_in_place(value):
changed = True
elif isinstance(obj, list):
for item in obj:
if _replace_b64_json_in_place(item):
changed = True
return changed
def _strip_image_b64_payloads(response):
"""Replace ``b64_json`` payloads in image-gen responses before save.
Image-edit and image-generation responses carry the full base64
PNG/JPEG (1-10+ MB) in ``data[*].b64_json``. The image_gen tests
only assert response shape the field decodes, schema validates
they never inspect pixel content. Swapping to a 4-byte placeholder
preserves all those checks while shrinking cassettes by ~99%.
"""
if not isinstance(response, dict):
return response
body = response.get("body")
if not isinstance(body, dict):
return response
raw = body.get("string")
if raw is None:
return response
if isinstance(raw, (bytes, bytearray)):
try:
text = bytes(raw).decode("utf-8")
except UnicodeDecodeError:
return response
was_bytes = True
elif isinstance(raw, str):
text = raw
was_bytes = False
else:
return response
try:
payload = json.loads(text)
except (ValueError, TypeError):
return response
if not _replace_b64_json_in_place(payload):
return response
new_text = json.dumps(payload, separators=(",", ":"))
body["string"] = new_text.encode("utf-8") if was_bytes else new_text
headers = response.get("headers")
if isinstance(headers, dict):
new_len_value = str(len(new_text.encode("utf-8")))
for key in list(headers):
if str(key).lower() == "content-length":
value = headers[key]
headers[key] = (
[new_len_value] if isinstance(value, list) else new_len_value
)
return response
def _before_record_response(response):
return filter_non_2xx_response(_scrub_response(response))
return filter_non_2xx_response(_scrub_response(_strip_image_b64_payloads(response)))
def _safe_body_matcher(r1, r2) -> None:
@ -172,8 +265,84 @@ def _strip_headers(headers, names: Iterable[str]) -> None:
pass
def _normalize_multipart_boundary(request) -> None:
"""Rewrite random multipart boundaries to a fixed string in-place.
httpx generates a fresh ``boundary=<random hex>`` for every
multipart request via ``os.urandom``. Without normalization, the
request body bytes differ across runs even when everything else is
identical, the ``safe_body`` matcher misses, and the persister
keeps appending new episodes until ``MAX_EPISODES_PER_CASSETTE``
refuses the save leaving audio-transcription tests effectively
unmocked. Replacing the boundary in both the Content-Type header
and the body bytes makes the request deterministic.
Idempotent vcrpy invokes this hook multiple times per request,
so the second invocation sees ``boundary=vcr-static-boundary``
already and short-circuits.
"""
headers = getattr(request, "headers", None)
if headers is None:
return
content_type_key = None
content_type_value = None
try:
for key in list(headers.keys()):
if str(key).lower() == "content-type":
content_type_key = key
value = headers[key]
content_type_value = value if isinstance(value, str) else str(value)
break
except AttributeError:
return
if not content_type_value or "multipart/" not in content_type_value.lower():
return
fixed_param = f"boundary={VCR_FIXED_MULTIPART_BOUNDARY}"
if fixed_param in content_type_value:
return
match = re.search(r"boundary=([^\s;]+)", content_type_value)
if not match:
return
current_boundary = match.group(1).strip('"')
if current_boundary == VCR_FIXED_MULTIPART_BOUNDARY:
return
try:
headers[content_type_key] = content_type_value.replace(
match.group(0), fixed_param
)
except (TypeError, AttributeError):
return
body = getattr(request, "body", None)
if body is None:
return
if isinstance(body, (bytes, bytearray)):
try:
new_body = bytes(body).replace(
current_boundary.encode("utf-8"),
VCR_FIXED_MULTIPART_BOUNDARY.encode("utf-8"),
)
except (TypeError, ValueError):
return
elif isinstance(body, str):
new_body = body.replace(current_boundary, VCR_FIXED_MULTIPART_BOUNDARY)
else:
return
try:
request.body = new_body
except (AttributeError, TypeError):
pass
def _before_record_request(request):
"""Fingerprint API keys, then scrub them.
"""Fingerprint API keys, scrub them, and normalize multipart boundaries.
Order matters in two ways:
@ -187,7 +356,8 @@ def _before_record_request(request):
auth headers we already stripped, so re-hashing would yield
``"no-key"`` and the stored vs. incoming fingerprints would
diverge. Skip the recompute when the header is already set so
this hook is idempotent.
this hook is idempotent. The boundary normalizer is also
idempotent for the same reason.
"""
headers = getattr(request, "headers", None)
if headers is None:
@ -199,6 +369,7 @@ def _before_record_request(request):
except (TypeError, AttributeError):
pass
_strip_headers(headers, FILTERED_REQUEST_HEADERS)
_normalize_multipart_boundary(request)
return request

View File

@ -853,7 +853,11 @@ class BaseLLMChatTest(ABC):
@pytest.mark.parametrize(
"image_url",
[
"http://img1.etsystatic.com/260/0/7813604/il_fullxfull.4226713999_q86e.jpg",
# In-repo logo served via jsdelivr (sha-pinned, immutable).
# Bedrock fetches the URL and base64-embeds it in the
# Converse request body; using a multi-MB hosted product
# photo here previously bloated cassettes to ~60 MB each.
"https://cdn.jsdelivr.net/gh/BerriAI/litellm@d769e81c90d453240c61fc572cdb27fae06a89d0/ui/litellm-dashboard/public/assets/logos/litellm_logo.jpg",
"https://awsmp-logos.s3.amazonaws.com/seller-xw5kijmvmzasy/c233c9ade2ccb5491072ae232c814942.png",
],
)

View File

@ -2,6 +2,7 @@
Tests for Evals API operations across providers
"""
import hashlib
import os
import sys
from abc import ABC, abstractmethod
@ -20,6 +21,46 @@ from litellm.types.llms.openai_evals import (
)
def _stable_eval_name(test_node_name: str, suffix: str = "") -> str:
"""Deterministic eval name keyed off the test's node name.
The previous ``f"Test Eval {int(time.time())}"`` pattern embedded a
fresh value into the request body every run, defeating VCR's
``safe_body`` matcher and forcing a real OpenAI ``create`` call on
every CI run. With a stable per-test name the cassette matches on
replay, and provider-side resources stay bounded because each test
deletes the eval it owns on teardown.
"""
nonce = hashlib.sha1(test_node_name.encode()).hexdigest()[:12]
return f"vcr-managed-{nonce}{suffix}"
_TESTING_CRITERIA = [
{
"type": "label_model",
"model": "gpt-4o",
"input": [
{
"role": "developer",
"content": "Classify the sentiment as 'positive' or 'negative'",
},
{"role": "user", "content": "Statement: {{item.input}}"},
],
"passing_labels": ["positive"],
"labels": ["positive", "negative"],
"name": "Sentiment grader",
}
]
_PROVIDER_FLAKINESS = (
litellm.InternalServerError,
litellm.APIConnectionError,
litellm.Timeout,
litellm.ServiceUnavailableError,
)
class BaseEvalsAPITest(ABC):
"""
Base test class for Evals API operations.
@ -41,13 +82,64 @@ class BaseEvalsAPITest(ABC):
"""Return the API base URL for the provider"""
pass
@pytest.fixture
def managed_eval(self, request):
"""Create a stable-named eval for this test; delete on teardown.
Function-scoped so each cassette captures the full
createtestdelete cycle. A class-scoped fixture would push
the create into whichever test ran first and the delete into
whichever ran last, which is fragile under reordering.
Replaces the prior ``list_evals().data[0].id`` pattern, which
made the URL of ``get_eval`` / ``update_eval`` vary across
runs (the "first" eval depends on what other runs left
behind).
"""
custom_llm_provider = self.get_custom_llm_provider()
api_key = self.get_api_key()
api_base = self.get_api_base()
if not api_key:
pytest.skip(f"No API key provided for {custom_llm_provider}")
try:
created = litellm.create_eval(
name=_stable_eval_name(request.node.name),
data_source_config={
"type": "stored_completions",
"metadata": {"usecase": "chatbot", "vcr": "managed"},
},
testing_criteria=_TESTING_CRITERIA,
custom_llm_provider=custom_llm_provider,
api_key=api_key,
api_base=api_base,
)
except _PROVIDER_FLAKINESS:
pytest.skip("Provider service unavailable")
except litellm.RateLimitError:
pytest.skip("Rate limit exceeded")
yield created
# Best-effort cleanup. OpenAI eval names are not unique-keyed
# (only IDs are), so a failed delete doesn't block the next
# run's create.
try:
litellm.delete_eval(
eval_id=created.id,
custom_llm_provider=custom_llm_provider,
api_key=api_key,
api_base=api_base,
)
except Exception:
pass
@pytest.mark.flaky(retries=3, delay=2)
def test_create_eval(self):
def test_create_eval(self, request):
"""
Test creating an evaluation.
"""
import time
custom_llm_provider = self.get_custom_llm_provider()
api_key = self.get_api_key()
api_base = self.get_api_base()
@ -56,53 +148,45 @@ class BaseEvalsAPITest(ABC):
pytest.skip(f"No API key provided for {custom_llm_provider}")
litellm.set_verbose = True
unique_name = _stable_eval_name(request.node.name)
# Create eval with stored_completions data source
unique_name = f"Test Eval {int(time.time())}"
created_id = None
try:
response = litellm.create_eval(
name=unique_name,
data_source_config={
"type": "stored_completions",
"metadata": {"usecase": "chatbot"},
},
testing_criteria=[
{
"type": "label_model",
"model": "gpt-4o",
"input": [
{
"role": "developer",
"content": "Classify the sentiment as 'positive' or 'negative'",
},
{"role": "user", "content": "Statement: {{item.input}}"},
],
"passing_labels": ["positive"],
"labels": ["positive", "negative"],
"name": "Sentiment grader",
}
],
custom_llm_provider=custom_llm_provider,
api_key=api_key,
api_base=api_base,
)
except (
litellm.InternalServerError,
litellm.APIConnectionError,
litellm.Timeout,
litellm.ServiceUnavailableError,
):
pytest.skip("Provider service unavailable")
except litellm.RateLimitError:
pytest.skip("Rate limit exceeded")
try:
response = litellm.create_eval(
name=unique_name,
data_source_config={
"type": "stored_completions",
"metadata": {"usecase": "chatbot"},
},
testing_criteria=_TESTING_CRITERIA,
custom_llm_provider=custom_llm_provider,
api_key=api_key,
api_base=api_base,
)
except _PROVIDER_FLAKINESS:
pytest.skip("Provider service unavailable")
except litellm.RateLimitError:
pytest.skip("Rate limit exceeded")
assert response is not None
assert isinstance(response, Eval)
assert response.id is not None
assert response.name == unique_name
print(f"Created eval: {response}")
print(f"Eval ID: {response.id}")
assert response is not None
assert isinstance(response, Eval)
assert response.id is not None
assert response.name == unique_name
created_id = response.id
print(f"Created eval: {response}")
print(f"Eval ID: {response.id}")
finally:
if created_id is not None:
try:
litellm.delete_eval(
eval_id=created_id,
custom_llm_provider=custom_llm_provider,
api_key=api_key,
api_base=api_base,
)
except Exception:
pass
def test_list_evals(self):
"""
@ -130,7 +214,7 @@ class BaseEvalsAPITest(ABC):
assert hasattr(response, "has_more")
print(f"Listed evals: {len(response.data)} evaluations")
def test_get_eval(self):
def test_get_eval(self, managed_eval):
"""
Test getting a specific evaluation by ID.
"""
@ -138,89 +222,54 @@ class BaseEvalsAPITest(ABC):
api_key = self.get_api_key()
api_base = self.get_api_base()
if not api_key:
pytest.skip(f"No API key provided for {custom_llm_provider}")
litellm.set_verbose = True
# First list existing evals to get an ID
list_response = litellm.list_evals(
limit=1,
response = litellm.get_eval(
eval_id=managed_eval.id,
custom_llm_provider=custom_llm_provider,
api_key=api_key,
api_base=api_base,
)
assert isinstance(list_response, ListEvalsResponse)
assert response is not None
assert isinstance(response, Eval)
assert response.id == managed_eval.id
print(f"Retrieved eval: {response}")
if list_response.data and len(list_response.data) > 0:
eval_id = list_response.data[0].id
print(f"Testing with eval ID: {eval_id}")
# Get the eval
response = litellm.get_eval(
eval_id=eval_id,
custom_llm_provider=custom_llm_provider,
api_key=api_key,
api_base=api_base,
)
assert response is not None
assert isinstance(response, Eval)
assert response.id == eval_id
print(f"Retrieved eval: {response}")
else:
pytest.skip("No existing evals to test with")
def test_update_eval(self):
@pytest.mark.flaky(retries=3, delay=2)
def test_update_eval(self, request, managed_eval):
"""
Test updating an evaluation.
"""
import time
custom_llm_provider = self.get_custom_llm_provider()
api_key = self.get_api_key()
api_base = self.get_api_base()
if not api_key:
pytest.skip(f"No API key provided for {custom_llm_provider}")
litellm.set_verbose = True
updated_name = _stable_eval_name(request.node.name, suffix="-updated")
# First list existing evals
list_response = litellm.list_evals(
limit=1,
response = litellm.update_eval(
eval_id=managed_eval.id,
name=updated_name,
custom_llm_provider=custom_llm_provider,
api_key=api_key,
api_base=api_base,
)
assert isinstance(list_response, ListEvalsResponse)
if list_response.data and len(list_response.data) > 0:
eval_id = list_response.data[0].id
updated_name = f"Updated Eval {int(time.time())}"
# Update the eval
response = litellm.update_eval(
eval_id=eval_id,
name=updated_name,
custom_llm_provider=custom_llm_provider,
api_key=api_key,
api_base=api_base,
)
assert response is not None
assert isinstance(response, Eval)
assert response.id == eval_id
assert response.name == updated_name
print(f"Updated eval: {response}")
else:
pytest.skip("No existing evals to test with")
assert response is not None
assert isinstance(response, Eval)
assert response.id == managed_eval.id
assert response.name == updated_name
print(f"Updated eval: {response}")
def test_delete_eval(self):
"""
Test deleting an evaluation.
Real delete coverage now lives in the ``managed_eval`` fixture
teardown and in ``test_create_eval``'s ``finally`` block, so
this stays a no-op skip rather than creating a fresh resource
just to delete it.
"""
custom_llm_provider = self.get_custom_llm_provider()
api_key = self.get_api_key()
@ -229,8 +278,7 @@ class BaseEvalsAPITest(ABC):
if not api_key:
pytest.skip(f"No API key provided for {custom_llm_provider}")
# Skip this test to avoid deleting production evals
pytest.skip("Skipping delete test to preserve existing evals")
pytest.skip("Delete is exercised via managed_eval fixture teardown.")
class TestOpenAIEvalsAPI(BaseEvalsAPITest):

View File

@ -0,0 +1,220 @@
"""Unit tests for the VCR record-time filters that keep cassettes small.
Covers:
- ``_strip_image_b64_payloads`` replaces base64 image bodies in
image-gen responses so cassettes don't carry MB-class PNG payloads.
- ``_normalize_multipart_boundary`` rewrites random multipart
boundaries to a fixed string so audio-transcription request bodies
match across record and replay.
"""
from __future__ import annotations
import json
import os
import sys
from vcr.request import Request
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from tests._vcr_conftest_common import ( # noqa: E402
VCR_FIXED_MULTIPART_BOUNDARY,
VCR_IMAGE_B64_PLACEHOLDER,
_normalize_multipart_boundary,
_strip_image_b64_payloads,
)
# ---------------------------------------------------------------------------
# Image b64 stripper
# ---------------------------------------------------------------------------
def _image_response(b64_payload: str, body_type: str = "bytes") -> dict:
body_text = json.dumps({"data": [{"b64_json": b64_payload}]})
body_string = body_text.encode("utf-8") if body_type == "bytes" else body_text
return {
"status": {"code": 200, "message": "OK"},
"headers": {
"content-type": ["application/json"],
"content-length": [str(len(body_text.encode("utf-8")))],
},
"body": {"string": body_string},
}
def test_strip_image_b64_replaces_payload_when_body_is_bytes():
response = _image_response("A" * 5000, body_type="bytes")
out = _strip_image_b64_payloads(response)
payload = json.loads(out["body"]["string"].decode("utf-8"))
assert payload["data"][0]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER
def test_strip_image_b64_replaces_payload_when_body_is_str():
response = _image_response("A" * 5000, body_type="str")
out = _strip_image_b64_payloads(response)
payload = json.loads(out["body"]["string"])
assert payload["data"][0]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER
def test_strip_image_b64_updates_content_length():
response = _image_response("A" * 5000)
out = _strip_image_b64_payloads(response)
expected_len = len(out["body"]["string"])
assert out["headers"]["content-length"] == [str(expected_len)]
def test_strip_image_b64_is_idempotent():
response = _image_response("A" * 5000)
once = _strip_image_b64_payloads(response)
twice = _strip_image_b64_payloads(once)
assert once["body"]["string"] == twice["body"]["string"]
def test_strip_image_b64_handles_nested_data():
body_text = json.dumps(
{
"outer": {
"data": [
{"b64_json": "X" * 4000, "label": "first"},
{"b64_json": "Y" * 4000, "label": "second"},
]
}
}
)
response = {
"status": {"code": 200, "message": "OK"},
"headers": {"content-type": ["application/json"]},
"body": {"string": body_text.encode("utf-8")},
}
out = _strip_image_b64_payloads(response)
payload = json.loads(out["body"]["string"].decode("utf-8"))
assert payload["outer"]["data"][0]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER
assert payload["outer"]["data"][1]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER
assert payload["outer"]["data"][0]["label"] == "first"
def test_strip_image_b64_leaves_non_image_response_unchanged():
body_text = json.dumps({"choices": [{"message": {"content": "hello"}}]})
response = {
"status": {"code": 200, "message": "OK"},
"headers": {"content-type": ["application/json"]},
"body": {"string": body_text.encode("utf-8")},
}
out = _strip_image_b64_payloads(response)
assert json.loads(out["body"]["string"].decode("utf-8")) == json.loads(body_text)
def test_strip_image_b64_leaves_invalid_json_unchanged():
response = {
"status": {"code": 200, "message": "OK"},
"headers": {"content-type": ["application/octet-stream"]},
"body": {"string": b"\x89PNG\r\n\x1a\n binary stuff not json"},
}
out = _strip_image_b64_payloads(response)
assert out["body"]["string"] == b"\x89PNG\r\n\x1a\n binary stuff not json"
def test_strip_image_b64_skips_short_values():
"""Already-placeholder values aren't re-replaced (idempotency guard)."""
body_text = json.dumps({"data": [{"b64_json": VCR_IMAGE_B64_PLACEHOLDER}]})
response = {
"status": {"code": 200, "message": "OK"},
"headers": {"content-type": ["application/json"]},
"body": {"string": body_text.encode("utf-8")},
}
out = _strip_image_b64_payloads(response)
payload = json.loads(out["body"]["string"].decode("utf-8"))
assert payload["data"][0]["b64_json"] == VCR_IMAGE_B64_PLACEHOLDER
# ---------------------------------------------------------------------------
# Multipart boundary normalizer
# ---------------------------------------------------------------------------
def _multipart_request(boundary: str):
body_text = (
f"--{boundary}\r\n"
'Content-Disposition: form-data; name="file"; filename="audio.wav"\r\n'
"Content-Type: audio/wav\r\n"
"\r\n"
"fake-audio-bytes\r\n"
f"--{boundary}--\r\n"
)
return Request(
method="POST",
uri="https://api.openai.com/v1/audio/transcriptions",
body=body_text.encode("utf-8"),
headers={
"content-type": f"multipart/form-data; boundary={boundary}",
},
)
def test_normalize_multipart_rewrites_header_and_body():
req = _multipart_request("abc123random")
_normalize_multipart_boundary(req)
assert (
req.headers["content-type"]
== f"multipart/form-data; boundary={VCR_FIXED_MULTIPART_BOUNDARY}"
)
assert b"abc123random" not in req.body
assert VCR_FIXED_MULTIPART_BOUNDARY.encode("utf-8") in req.body
def test_normalize_multipart_is_idempotent():
req = _multipart_request("abc123random")
_normalize_multipart_boundary(req)
body_first = req.body
header_first = req.headers["content-type"]
_normalize_multipart_boundary(req)
assert req.body == body_first
assert req.headers["content-type"] == header_first
def test_normalize_multipart_two_distinct_boundaries_match_after_normalize():
"""Whisper-style: two requests with different random boundaries should
end up with byte-identical bodies after normalization."""
req1 = _multipart_request("boundaryAAA")
req2 = _multipart_request("boundaryBBB")
_normalize_multipart_boundary(req1)
_normalize_multipart_boundary(req2)
assert req1.body == req2.body
assert req1.headers["content-type"] == req2.headers["content-type"]
def test_normalize_multipart_skips_non_multipart_requests():
req = Request(
method="POST",
uri="https://api.openai.com/v1/chat/completions",
body=b'{"model":"gpt-4o"}',
headers={"content-type": "application/json"},
)
_normalize_multipart_boundary(req)
assert req.headers["content-type"] == "application/json"
assert req.body == b'{"model":"gpt-4o"}'
def test_normalize_multipart_skips_request_without_content_type():
req = Request(
method="POST",
uri="https://api.openai.com/v1/chat/completions",
body=b"unknown body",
headers={},
)
_normalize_multipart_boundary(req)
assert req.body == b"unknown body"
def test_normalize_multipart_handles_quoted_boundary():
req = Request(
method="POST",
uri="https://api.openai.com/v1/audio/transcriptions",
body=b"--quoted-boundary--body content--quoted-boundary--",
headers={"content-type": 'multipart/form-data; boundary="quoted-boundary"'},
)
_normalize_multipart_boundary(req)
assert b"quoted-boundary" not in req.body
assert VCR_FIXED_MULTIPART_BOUNDARY.encode("utf-8") in req.body

View File

@ -12,7 +12,15 @@ from abc import ABC, abstractmethod
# Test resources
TEST_IMAGE_PATH = "test_image_edit.png"
TEST_PDF_URL = "https://arxiv.org/pdf/2201.04234"
# Tiny in-repo PDF served via jsdelivr (sha-pinned, immutable). The arxiv
# PDF previously used here was several MB — once base64-encoded into the
# Vertex OCR request it ballooned cassettes past 100 MB per test. Keep
# the URL stable across runs so cassettes don't churn.
TEST_PDF_URL = (
"https://cdn.jsdelivr.net/gh/BerriAI/litellm"
"@d769e81c90d453240c61fc572cdb27fae06a89d0"
"/tests/llm_translation/fixtures/dummy.pdf"
)
class BaseOCRTest(ABC):