From a64716ed5b59946aa5360bb545cf930e8ca24a7c Mon Sep 17 00:00:00 2001
From: Yuneng Jiang <yuneng@berri.ai>
Date: Thu, 7 May 2026 14:06:04 -0700
Subject: [PATCH] [Fix] Fine-Tuning Test: Wait for OpenAI File Propagation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OpenAI file uploads are eventually consistent — a freshly uploaded file
may briefly 404 from `retrieve` and is rejected by the fine-tuning
endpoint with `'file-... does not exist'` until processing finishes.
The async fine-tuning test called `acreate_fine_tuning_job` immediately
after `acreate_file` and flaked on this race.

Add a polling helper that waits up to ~30s for `status=processed` (and
short-circuits on `error`), called between upload and FT job creation.
Mirrors the same propagation lag covered by the `await asyncio.sleep(1)`
in the sister batches test, but more robust against longer delays.
---
 tests/batches_tests/test_fine_tuning_api.py | 30 +++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tests/batches_tests/test_fine_tuning_api.py b/tests/batches_tests/test_fine_tuning_api.py
index 1c1af308df..a1df53cc0f 100644
--- a/tests/batches_tests/test_fine_tuning_api.py
+++ b/tests/batches_tests/test_fine_tuning_api.py
@@ -47,6 +47,34 @@ class TestCustomLogger(CustomLogger):
         self.standard_logging_object = kwargs["standard_logging_object"]
 
 
+async def _wait_for_openai_file_ready(file_id: str, max_attempts: int = 30) -> None:
+    """
+    Poll OpenAI's files API until the uploaded file is in `processed` state.
+
+    OpenAI file uploads are eventually consistent — a freshly uploaded file
+    may briefly 404 from `retrieve` and is rejected by downstream endpoints
+    (fine-tuning, batches) until processing finishes. Polling avoids the
+    propagation-lag flake.
+    """
+    last_status: Optional[str] = None
+    for _ in range(max_attempts):
+        try:
+            file_obj = await litellm.afile_retrieve(
+                file_id=file_id, custom_llm_provider="openai"
+            )
+            last_status = getattr(file_obj, "status", None)
+            if last_status == "processed":
+                return
+            if last_status == "error":
+                raise RuntimeError(f"File {file_id} failed processing (status=error)")
+        except openai.NotFoundError:
+            last_status = "not_found"
+        await asyncio.sleep(1)
+    raise TimeoutError(
+        f"File {file_id} not ready after {max_attempts}s (last_status={last_status})"
+    )
+
+
 @pytest.mark.asyncio
 async def test_create_fine_tune_jobs_async():
     try:
@@ -64,6 +92,8 @@ async def test_create_fine_tune_jobs_async():
         )
         print("Response from creating file=", file_obj)
 
+        await _wait_for_openai_file_ready(file_obj.id)
+
         create_fine_tuning_response = await litellm.acreate_fine_tuning_job(
             model="gpt-3.5-turbo-0125",
             training_file=file_obj.id,