Feature/add audio support for scaleway (#26110)

* feat(scaleway): add SCALEWAY to LlmProviders enum * feat(scaleway): add audio transcription config and dispatch wiring Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * test(scaleway): add behavior tests for audio transcription config Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * chore(scaleway): advertise audio_transcriptions in endpoint-support JSON * docs(scaleway): document audio transcription support * fix(scaleway): address PR review — plain-text response_format + missing-key fail-fast Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * test(scaleway): cover new response paths, drop gettysburg.wav coupling Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-20 23:49:41 +02:00 · 2026-04-20 23:49:41 +02:00 · a19bff4ca6
commit a19bff4ca6
parent ea12bae9a3
8 changed files with 457 additions and 2 deletions
--- a/docs/my-website/docs/providers/scaleway.md
+++ b/docs/my-website/docs/providers/scaleway.md
@ -60,3 +60,44 @@ curl http://localhost:4000/chat/completions \
 ## Supported features

 Scaleway provider supports all features in [Generative APIs reference documentation ↗](https://www.scaleway.com/en/developers/api/generative-apis/), such as streaming, structured outputs and tool calling.
+
+## Audio transcription
+
+Scaleway's `/audio/transcriptions` endpoint is OpenAI-compatible and works with Whisper models.
+
+### Python SDK
+
+```python
+import os
+from litellm import transcription
+
+os.environ["SCW_SECRET_KEY"] = "your-scaleway-secret-key"
+
+with open("speech.mp3", "rb") as audio_file:
+    response = transcription(
+        model="scaleway/whisper-large-v3",
+        file=audio_file,
+    )
+print(response.text)
+```
+
+### Proxy config
+
+```yaml
+model_list:
+  - model_name: scaleway-whisper
+    litellm_params:
+      model: scaleway/whisper-large-v3
+      api_key: "os.environ/SCW_SECRET_KEY"
+```
+
+### Proxy request
+
+```bash
+curl http://localhost:4000/v1/audio/transcriptions \
+  -H "Authorization: Bearer YOUR_LITELLM_MASTER_KEY" \
+  -F model="scaleway-whisper" \
+  -F file="@speech.mp3"
+```
+
+Supported optional params: `language`, `prompt`, `response_format`, `temperature`, `timestamp_granularities`.
--- a/litellm/litellm_core_utils/get_supported_openai_params.py
+++ b/litellm/litellm_core_utils/get_supported_openai_params.py
@ -296,6 +296,15 @@ def get_supported_openai_params(  # noqa: PLR0915
            return OVHCloudAudioTranscriptionConfig().get_supported_openai_params(
                model=model
            )
+    elif custom_llm_provider == "scaleway":
+        if request_type == "transcription":
+            from litellm.llms.scaleway.audio_transcription.transformation import (
+                ScalewayAudioTranscriptionConfig,
+            )
+
+            return ScalewayAudioTranscriptionConfig().get_supported_openai_params(
+                model=model
+            )
    elif custom_llm_provider == "elevenlabs":
        if request_type == "transcription":
            from litellm.llms.elevenlabs.audio_transcription.transformation import (
--- a/litellm/llms/scaleway/audio_transcription/transformation.py
+++ b/litellm/llms/scaleway/audio_transcription/transformation.py
@ -0,0 +1,158 @@
+"""
+Support for Scaleway's OpenAI-compatible `/v1/audio/transcriptions` endpoint.
+
+API reference: https://www.scaleway.com/en/developers/api/generative-apis/#path-audio-create-an-audio-transcription
+"""
+
+from typing import List, Optional, Union
+
+import httpx
+
+from litellm.litellm_core_utils.audio_utils.utils import process_audio_file
+from litellm.llms.base_llm.audio_transcription.transformation import (
+    AudioTranscriptionRequestData,
+    BaseAudioTranscriptionConfig,
+)
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import (
+    AllMessageValues,
+    OpenAIAudioTranscriptionOptionalParams,
+)
+from litellm.types.utils import FileTypes, TranscriptionResponse
+
+
+class ScalewayAudioTranscriptionException(BaseLLMException):
+    pass
+
+
+class ScalewayAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
+    def get_supported_openai_params(
+        self, model: str
+    ) -> List[OpenAIAudioTranscriptionOptionalParams]:
+        return [
+            "language",
+            "prompt",
+            "response_format",
+            "temperature",
+            "timestamp_granularities",
+        ]
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        supported_params = self.get_supported_openai_params(model)
+        for k, v in non_default_params.items():
+            if k in supported_params:
+                optional_params[k] = v
+        return optional_params
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: dict,
+        litellm_params: dict,
+        stream: Optional[bool] = None,
+    ) -> str:
+        api_base = (
+            "https://api.scaleway.ai/v1" if api_base is None else api_base.rstrip("/")
+        )
+        return f"{api_base}/audio/transcriptions"
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+    ) -> BaseLLMException:
+        return ScalewayAudioTranscriptionException(
+            message=error_message,
+            status_code=status_code,
+            headers=headers,
+        )
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> dict:
+        if api_key is None:
+            api_key = get_secret_str("SCW_SECRET_KEY")
+
+        if not api_key:
+            raise ScalewayAudioTranscriptionException(
+                message=(
+                    "Scaleway API key not found. Pass `api_key=...` or set the "
+                    "SCW_SECRET_KEY environment variable."
+                ),
+                status_code=401,
+                headers={},
+            )
+
+        default_headers = {
+            "Authorization": f"Bearer {api_key}",
+            "accept": "application/json",
+        }
+        default_headers.update(headers or {})
+        return default_headers
+
+    def transform_audio_transcription_request(
+        self,
+        model: str,
+        audio_file: FileTypes,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> AudioTranscriptionRequestData:
+        processed_audio = process_audio_file(audio_file)
+
+        form_fields: dict = {"model": model}
+        for key in self.get_supported_openai_params(model):
+            value = optional_params.get(key)
+            if value is not None:
+                form_fields[key] = value
+
+        files = {
+            "file": (
+                processed_audio.filename,
+                processed_audio.file_content,
+                processed_audio.content_type,
+            )
+        }
+
+        return AudioTranscriptionRequestData(data=form_fields, files=files)
+
+    def transform_audio_transcription_response(
+        self,
+        raw_response: httpx.Response,
+    ) -> TranscriptionResponse:
+        content_type = (raw_response.headers.get("content-type") or "").lower()
+        if "application/json" not in content_type:
+            return TranscriptionResponse(text=raw_response.text)
+
+        try:
+            response_json = raw_response.json()
+        except Exception:
+            raise ScalewayAudioTranscriptionException(
+                message=raw_response.text,
+                status_code=raw_response.status_code,
+                headers=raw_response.headers,
+            )
+
+        text = response_json.get("text") or ""
+        response = TranscriptionResponse(text=text)
+
+        if "segments" in response_json:
+            response["segments"] = response_json["segments"]
+        if "language" in response_json:
+            response["language"] = response_json["language"]
+
+        response._hidden_params = response_json
+        return response
--- a/litellm/provider_endpoints_support_backup.json
+++ b/litellm/provider_endpoints_support_backup.json
@ -1950,7 +1950,7 @@
        "responses": true,
        "embeddings": false,
        "image_generations": false,
-        "audio_transcriptions": false,
+        "audio_transcriptions": true,
        "audio_speech": false,
        "moderations": false,
        "batches": false,
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -3290,6 +3290,7 @@ class LlmProviders(str, Enum):
    MANUS = "manus"
    WANDB = "wandb"
    OVHCLOUD = "ovhcloud"
+    SCALEWAY = "scaleway"
    LEMONADE = "lemonade"
    AMAZON_NOVA = "amazon_nova"
    A2A_AGENT = "a2a_agent"
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -8472,6 +8472,12 @@ class ProviderConfigManager:
            )

            return OVHCloudAudioTranscriptionConfig()
+        elif litellm.LlmProviders.SCALEWAY == provider:
+            from litellm.llms.scaleway.audio_transcription.transformation import (
+                ScalewayAudioTranscriptionConfig,
+            )
+
+            return ScalewayAudioTranscriptionConfig()
        elif litellm.LlmProviders.MISTRAL == provider:
            from litellm.llms.mistral.audio_transcription.transformation import (
                MistralAudioTranscriptionConfig,
--- a/provider_endpoints_support.json
+++ b/provider_endpoints_support.json
@ -1968,7 +1968,7 @@
        "responses": true,
        "embeddings": false,
        "image_generations": false,
-        "audio_transcriptions": false,
+        "audio_transcriptions": true,
        "audio_speech": false,
        "moderations": false,
        "batches": false,
--- a/tests/test_litellm/llms/scaleway/test_scaleway_audio_transcription_transformation.py
+++ b/tests/test_litellm/llms/scaleway/test_scaleway_audio_transcription_transformation.py
@ -0,0 +1,240 @@
+import os
+from unittest.mock import MagicMock
+
+import httpx
+import pytest
+
+from litellm.llms.scaleway.audio_transcription.transformation import (
+    ScalewayAudioTranscriptionConfig,
+    ScalewayAudioTranscriptionException,
+)
+from litellm.types.utils import TranscriptionResponse
+
+
+# ---------------------------------------------------------------------------
+# get_complete_url
+# ---------------------------------------------------------------------------
+
+
+def test_scaleway_get_complete_url_default_base():
+    """With no api_base supplied, Scaleway's Generative API endpoint is used."""
+    url = ScalewayAudioTranscriptionConfig().get_complete_url(
+        api_base=None,
+        api_key="fake",
+        model="whisper-large-v3",
+        optional_params={},
+        litellm_params={},
+    )
+    assert url == "https://api.scaleway.ai/v1/audio/transcriptions"
+
+
+def test_scaleway_get_complete_url_custom_base_strips_trailing_slash():
+    """Caller-supplied api_base is respected; trailing slash is normalized."""
+    url = ScalewayAudioTranscriptionConfig().get_complete_url(
+        api_base="https://custom.example.com/v1/",
+        api_key="fake",
+        model="whisper-large-v3",
+        optional_params={},
+        litellm_params={},
+    )
+    assert url == "https://custom.example.com/v1/audio/transcriptions"
+
+
+# ---------------------------------------------------------------------------
+# validate_environment
+# ---------------------------------------------------------------------------
+
+
+def test_scaleway_validate_environment_explicit_api_key():
+    headers = ScalewayAudioTranscriptionConfig().validate_environment(
+        headers={},
+        model="whisper-large-v3",
+        messages=[],
+        optional_params={},
+        litellm_params={},
+        api_key="explicit-key",
+    )
+    assert headers["Authorization"] == "Bearer explicit-key"
+    assert headers["accept"] == "application/json"
+
+
+def test_scaleway_validate_environment_reads_scw_secret_key(monkeypatch):
+    monkeypatch.setenv("SCW_SECRET_KEY", "env-secret")
+    headers = ScalewayAudioTranscriptionConfig().validate_environment(
+        headers={},
+        model="whisper-large-v3",
+        messages=[],
+        optional_params={},
+        litellm_params={},
+    )
+    assert headers["Authorization"] == "Bearer env-secret"
+
+
+def test_scaleway_validate_environment_explicit_api_key_wins_over_env(monkeypatch):
+    """Caller-supplied api_key must win over the SCW_SECRET_KEY env var."""
+    monkeypatch.setenv("SCW_SECRET_KEY", "env-secret")
+    headers = ScalewayAudioTranscriptionConfig().validate_environment(
+        headers={},
+        model="whisper-large-v3",
+        messages=[],
+        optional_params={},
+        litellm_params={},
+        api_key="explicit-wins",
+    )
+    assert headers["Authorization"] == "Bearer explicit-wins"
+
+
+# ---------------------------------------------------------------------------
+# transform_audio_transcription_request
+# ---------------------------------------------------------------------------
+
+
+def _open_test_audio():
+    """Shared helper: open the repo's canonical speech fixture."""
+    wav_path = os.path.join(
+        os.path.dirname(__file__),
+        "../../../..",
+        "tests",
+        "llm_translation",
+        "gettysburg.wav",
+    )
+    return open(wav_path, "rb")
+
+
+def test_scaleway_transform_request_builds_multipart_with_supported_params():
+    with _open_test_audio() as audio_file:
+        result = (
+            ScalewayAudioTranscriptionConfig().transform_audio_transcription_request(
+                model="whisper-large-v3",
+                audio_file=audio_file,
+                optional_params={
+                    "language": "en",
+                    "temperature": 0.0,
+                    "response_format": "verbose_json",
+                },
+                litellm_params={},
+            )
+        )
+
+    assert isinstance(result.data, dict)
+    assert result.data["model"] == "whisper-large-v3"
+    assert result.data["language"] == "en"
+    assert result.data["temperature"] == 0.0
+    assert result.data["response_format"] == "verbose_json"
+    assert result.files is not None
+    assert "file" in result.files
+    assert len(result.files["file"]) == 3  # (filename, content, content_type)
+
+
+def test_scaleway_transform_request_drops_unsupported_params():
+    """Only params in get_supported_openai_params() should land in the form."""
+    with _open_test_audio() as audio_file:
+        result = (
+            ScalewayAudioTranscriptionConfig().transform_audio_transcription_request(
+                model="whisper-large-v3",
+                audio_file=audio_file,
+                optional_params={
+                    "language": "en",
+                    "stream": True,  # not supported
+                    "diarize": True,  # not supported
+                },
+                litellm_params={},
+            )
+        )
+
+    assert "stream" not in result.data
+    assert "diarize" not in result.data
+    assert result.data["language"] == "en"
+
+
+# ---------------------------------------------------------------------------
+# transform_audio_transcription_response
+# ---------------------------------------------------------------------------
+
+
+def test_scaleway_transform_response_parses_text():
+    mock_response = MagicMock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "application/json"}
+    mock_response.json.return_value = {"text": "Four score and seven years ago"}
+
+    response = (
+        ScalewayAudioTranscriptionConfig().transform_audio_transcription_response(
+            mock_response
+        )
+    )
+
+    assert isinstance(response, TranscriptionResponse)
+    assert response.text == "Four score and seven years ago"
+
+
+def test_scaleway_transform_response_preserves_segments_and_language():
+    mock_response = MagicMock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "application/json"}
+    mock_response.json.return_value = {
+        "text": "hello world",
+        "language": "en",
+        "segments": [
+            {"text": "hello", "start": 0.0, "end": 0.5},
+            {"text": "world", "start": 0.6, "end": 1.1},
+        ],
+    }
+
+    response = (
+        ScalewayAudioTranscriptionConfig().transform_audio_transcription_response(
+            mock_response
+        )
+    )
+
+    assert response.text == "hello world"
+    assert response["language"] == "en"
+    assert len(response["segments"]) == 2
+
+
+def test_scaleway_transform_response_raises_typed_exception_on_non_json():
+    """Malformed upstream body must raise the Scaleway-typed exception so
+    error handlers downstream can classify it as a Scaleway failure."""
+    mock_response = MagicMock(spec=httpx.Response)
+    mock_response.json.side_effect = ValueError("not json")
+    mock_response.headers = {"content-type": "application/json"}
+    mock_response.text = "upstream 502 bad gateway"
+    mock_response.status_code = 502
+
+    with pytest.raises(ScalewayAudioTranscriptionException):
+        ScalewayAudioTranscriptionConfig().transform_audio_transcription_response(
+            mock_response
+        )
+
+
+def test_scaleway_transform_response_returns_plain_text_for_non_json_content_type():
+    """When Scaleway responds with text/srt/vtt (response_format="text" etc.),
+    the content-type is not application/json — return the body as plain text
+    rather than exploding on .json()."""
+    mock_response = MagicMock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "text/plain; charset=utf-8"}
+    mock_response.text = "Four score and seven years ago"
+
+    response = (
+        ScalewayAudioTranscriptionConfig().transform_audio_transcription_response(
+            mock_response
+        )
+    )
+
+    assert isinstance(response, TranscriptionResponse)
+    assert response.text == "Four score and seven years ago"
+
+
+def test_scaleway_validate_environment_raises_when_no_key(monkeypatch):
+    """Missing credential should fail fast with a typed exception rather than
+    silently emitting 'Bearer None'."""
+    monkeypatch.delenv("SCW_SECRET_KEY", raising=False)
+
+    with pytest.raises(ScalewayAudioTranscriptionException) as excinfo:
+        ScalewayAudioTranscriptionConfig().validate_environment(
+            headers={},
+            model="whisper-large-v3",
+            messages=[],
+            optional_params={},
+            litellm_params={},
+        )
+
+    assert "SCW_SECRET_KEY" in str(excinfo.value)