Feature/add audio support for scaleway (#26110)

* feat(scaleway): add SCALEWAY to LlmProviders enum

* feat(scaleway): add audio transcription config and dispatch wiring

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* test(scaleway): add behavior tests for audio transcription config

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* chore(scaleway): advertise audio_transcriptions in endpoint-support JSON

* docs(scaleway): document audio transcription support

* fix(scaleway): address PR review — plain-text response_format + missing-key fail-fast

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* test(scaleway): cover new response paths, drop gettysburg.wav coupling

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
nhyy244 2026-04-20 23:49:41 +02:00 committed by GitHub
parent ea12bae9a3
commit a19bff4ca6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 457 additions and 2 deletions

View File

@ -60,3 +60,44 @@ curl http://localhost:4000/chat/completions \
## Supported features
Scaleway provider supports all features in [Generative APIs reference documentation ↗](https://www.scaleway.com/en/developers/api/generative-apis/), such as streaming, structured outputs and tool calling.
## Audio transcription
Scaleway's `/audio/transcriptions` endpoint is OpenAI-compatible and works with Whisper models.
### Python SDK
```python
import os
from litellm import transcription
os.environ["SCW_SECRET_KEY"] = "your-scaleway-secret-key"
with open("speech.mp3", "rb") as audio_file:
response = transcription(
model="scaleway/whisper-large-v3",
file=audio_file,
)
print(response.text)
```
### Proxy config
```yaml
model_list:
- model_name: scaleway-whisper
litellm_params:
model: scaleway/whisper-large-v3
api_key: "os.environ/SCW_SECRET_KEY"
```
### Proxy request
```bash
curl http://localhost:4000/v1/audio/transcriptions \
-H "Authorization: Bearer YOUR_LITELLM_MASTER_KEY" \
-F model="scaleway-whisper" \
-F file="@speech.mp3"
```
Supported optional params: `language`, `prompt`, `response_format`, `temperature`, `timestamp_granularities`.

View File

@ -296,6 +296,15 @@ def get_supported_openai_params( # noqa: PLR0915
return OVHCloudAudioTranscriptionConfig().get_supported_openai_params(
model=model
)
elif custom_llm_provider == "scaleway":
if request_type == "transcription":
from litellm.llms.scaleway.audio_transcription.transformation import (
ScalewayAudioTranscriptionConfig,
)
return ScalewayAudioTranscriptionConfig().get_supported_openai_params(
model=model
)
elif custom_llm_provider == "elevenlabs":
if request_type == "transcription":
from litellm.llms.elevenlabs.audio_transcription.transformation import (

View File

@ -0,0 +1,158 @@
"""
Support for Scaleway's OpenAI-compatible `/v1/audio/transcriptions` endpoint.
API reference: https://www.scaleway.com/en/developers/api/generative-apis/#path-audio-create-an-audio-transcription
"""
from typing import List, Optional, Union
import httpx
from litellm.litellm_core_utils.audio_utils.utils import process_audio_file
from litellm.llms.base_llm.audio_transcription.transformation import (
AudioTranscriptionRequestData,
BaseAudioTranscriptionConfig,
)
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import (
AllMessageValues,
OpenAIAudioTranscriptionOptionalParams,
)
from litellm.types.utils import FileTypes, TranscriptionResponse
class ScalewayAudioTranscriptionException(BaseLLMException):
pass
class ScalewayAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
def get_supported_openai_params(
self, model: str
) -> List[OpenAIAudioTranscriptionOptionalParams]:
return [
"language",
"prompt",
"response_format",
"temperature",
"timestamp_granularities",
]
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
supported_params = self.get_supported_openai_params(model)
for k, v in non_default_params.items():
if k in supported_params:
optional_params[k] = v
return optional_params
def get_complete_url(
self,
api_base: Optional[str],
api_key: Optional[str],
model: str,
optional_params: dict,
litellm_params: dict,
stream: Optional[bool] = None,
) -> str:
api_base = (
"https://api.scaleway.ai/v1" if api_base is None else api_base.rstrip("/")
)
return f"{api_base}/audio/transcriptions"
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException:
return ScalewayAudioTranscriptionException(
message=error_message,
status_code=status_code,
headers=headers,
)
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
if api_key is None:
api_key = get_secret_str("SCW_SECRET_KEY")
if not api_key:
raise ScalewayAudioTranscriptionException(
message=(
"Scaleway API key not found. Pass `api_key=...` or set the "
"SCW_SECRET_KEY environment variable."
),
status_code=401,
headers={},
)
default_headers = {
"Authorization": f"Bearer {api_key}",
"accept": "application/json",
}
default_headers.update(headers or {})
return default_headers
def transform_audio_transcription_request(
self,
model: str,
audio_file: FileTypes,
optional_params: dict,
litellm_params: dict,
) -> AudioTranscriptionRequestData:
processed_audio = process_audio_file(audio_file)
form_fields: dict = {"model": model}
for key in self.get_supported_openai_params(model):
value = optional_params.get(key)
if value is not None:
form_fields[key] = value
files = {
"file": (
processed_audio.filename,
processed_audio.file_content,
processed_audio.content_type,
)
}
return AudioTranscriptionRequestData(data=form_fields, files=files)
def transform_audio_transcription_response(
self,
raw_response: httpx.Response,
) -> TranscriptionResponse:
content_type = (raw_response.headers.get("content-type") or "").lower()
if "application/json" not in content_type:
return TranscriptionResponse(text=raw_response.text)
try:
response_json = raw_response.json()
except Exception:
raise ScalewayAudioTranscriptionException(
message=raw_response.text,
status_code=raw_response.status_code,
headers=raw_response.headers,
)
text = response_json.get("text") or ""
response = TranscriptionResponse(text=text)
if "segments" in response_json:
response["segments"] = response_json["segments"]
if "language" in response_json:
response["language"] = response_json["language"]
response._hidden_params = response_json
return response

View File

@ -1950,7 +1950,7 @@
"responses": true,
"embeddings": false,
"image_generations": false,
"audio_transcriptions": false,
"audio_transcriptions": true,
"audio_speech": false,
"moderations": false,
"batches": false,

View File

@ -3290,6 +3290,7 @@ class LlmProviders(str, Enum):
MANUS = "manus"
WANDB = "wandb"
OVHCLOUD = "ovhcloud"
SCALEWAY = "scaleway"
LEMONADE = "lemonade"
AMAZON_NOVA = "amazon_nova"
A2A_AGENT = "a2a_agent"

View File

@ -8472,6 +8472,12 @@ class ProviderConfigManager:
)
return OVHCloudAudioTranscriptionConfig()
elif litellm.LlmProviders.SCALEWAY == provider:
from litellm.llms.scaleway.audio_transcription.transformation import (
ScalewayAudioTranscriptionConfig,
)
return ScalewayAudioTranscriptionConfig()
elif litellm.LlmProviders.MISTRAL == provider:
from litellm.llms.mistral.audio_transcription.transformation import (
MistralAudioTranscriptionConfig,

View File

@ -1968,7 +1968,7 @@
"responses": true,
"embeddings": false,
"image_generations": false,
"audio_transcriptions": false,
"audio_transcriptions": true,
"audio_speech": false,
"moderations": false,
"batches": false,

View File

@ -0,0 +1,240 @@
import os
from unittest.mock import MagicMock
import httpx
import pytest
from litellm.llms.scaleway.audio_transcription.transformation import (
ScalewayAudioTranscriptionConfig,
ScalewayAudioTranscriptionException,
)
from litellm.types.utils import TranscriptionResponse
# ---------------------------------------------------------------------------
# get_complete_url
# ---------------------------------------------------------------------------
def test_scaleway_get_complete_url_default_base():
"""With no api_base supplied, Scaleway's Generative API endpoint is used."""
url = ScalewayAudioTranscriptionConfig().get_complete_url(
api_base=None,
api_key="fake",
model="whisper-large-v3",
optional_params={},
litellm_params={},
)
assert url == "https://api.scaleway.ai/v1/audio/transcriptions"
def test_scaleway_get_complete_url_custom_base_strips_trailing_slash():
"""Caller-supplied api_base is respected; trailing slash is normalized."""
url = ScalewayAudioTranscriptionConfig().get_complete_url(
api_base="https://custom.example.com/v1/",
api_key="fake",
model="whisper-large-v3",
optional_params={},
litellm_params={},
)
assert url == "https://custom.example.com/v1/audio/transcriptions"
# ---------------------------------------------------------------------------
# validate_environment
# ---------------------------------------------------------------------------
def test_scaleway_validate_environment_explicit_api_key():
headers = ScalewayAudioTranscriptionConfig().validate_environment(
headers={},
model="whisper-large-v3",
messages=[],
optional_params={},
litellm_params={},
api_key="explicit-key",
)
assert headers["Authorization"] == "Bearer explicit-key"
assert headers["accept"] == "application/json"
def test_scaleway_validate_environment_reads_scw_secret_key(monkeypatch):
monkeypatch.setenv("SCW_SECRET_KEY", "env-secret")
headers = ScalewayAudioTranscriptionConfig().validate_environment(
headers={},
model="whisper-large-v3",
messages=[],
optional_params={},
litellm_params={},
)
assert headers["Authorization"] == "Bearer env-secret"
def test_scaleway_validate_environment_explicit_api_key_wins_over_env(monkeypatch):
"""Caller-supplied api_key must win over the SCW_SECRET_KEY env var."""
monkeypatch.setenv("SCW_SECRET_KEY", "env-secret")
headers = ScalewayAudioTranscriptionConfig().validate_environment(
headers={},
model="whisper-large-v3",
messages=[],
optional_params={},
litellm_params={},
api_key="explicit-wins",
)
assert headers["Authorization"] == "Bearer explicit-wins"
# ---------------------------------------------------------------------------
# transform_audio_transcription_request
# ---------------------------------------------------------------------------
def _open_test_audio():
"""Shared helper: open the repo's canonical speech fixture."""
wav_path = os.path.join(
os.path.dirname(__file__),
"../../../..",
"tests",
"llm_translation",
"gettysburg.wav",
)
return open(wav_path, "rb")
def test_scaleway_transform_request_builds_multipart_with_supported_params():
with _open_test_audio() as audio_file:
result = (
ScalewayAudioTranscriptionConfig().transform_audio_transcription_request(
model="whisper-large-v3",
audio_file=audio_file,
optional_params={
"language": "en",
"temperature": 0.0,
"response_format": "verbose_json",
},
litellm_params={},
)
)
assert isinstance(result.data, dict)
assert result.data["model"] == "whisper-large-v3"
assert result.data["language"] == "en"
assert result.data["temperature"] == 0.0
assert result.data["response_format"] == "verbose_json"
assert result.files is not None
assert "file" in result.files
assert len(result.files["file"]) == 3 # (filename, content, content_type)
def test_scaleway_transform_request_drops_unsupported_params():
"""Only params in get_supported_openai_params() should land in the form."""
with _open_test_audio() as audio_file:
result = (
ScalewayAudioTranscriptionConfig().transform_audio_transcription_request(
model="whisper-large-v3",
audio_file=audio_file,
optional_params={
"language": "en",
"stream": True, # not supported
"diarize": True, # not supported
},
litellm_params={},
)
)
assert "stream" not in result.data
assert "diarize" not in result.data
assert result.data["language"] == "en"
# ---------------------------------------------------------------------------
# transform_audio_transcription_response
# ---------------------------------------------------------------------------
def test_scaleway_transform_response_parses_text():
mock_response = MagicMock(spec=httpx.Response)
mock_response.headers = {"content-type": "application/json"}
mock_response.json.return_value = {"text": "Four score and seven years ago"}
response = (
ScalewayAudioTranscriptionConfig().transform_audio_transcription_response(
mock_response
)
)
assert isinstance(response, TranscriptionResponse)
assert response.text == "Four score and seven years ago"
def test_scaleway_transform_response_preserves_segments_and_language():
mock_response = MagicMock(spec=httpx.Response)
mock_response.headers = {"content-type": "application/json"}
mock_response.json.return_value = {
"text": "hello world",
"language": "en",
"segments": [
{"text": "hello", "start": 0.0, "end": 0.5},
{"text": "world", "start": 0.6, "end": 1.1},
],
}
response = (
ScalewayAudioTranscriptionConfig().transform_audio_transcription_response(
mock_response
)
)
assert response.text == "hello world"
assert response["language"] == "en"
assert len(response["segments"]) == 2
def test_scaleway_transform_response_raises_typed_exception_on_non_json():
"""Malformed upstream body must raise the Scaleway-typed exception so
error handlers downstream can classify it as a Scaleway failure."""
mock_response = MagicMock(spec=httpx.Response)
mock_response.json.side_effect = ValueError("not json")
mock_response.headers = {"content-type": "application/json"}
mock_response.text = "upstream 502 bad gateway"
mock_response.status_code = 502
with pytest.raises(ScalewayAudioTranscriptionException):
ScalewayAudioTranscriptionConfig().transform_audio_transcription_response(
mock_response
)
def test_scaleway_transform_response_returns_plain_text_for_non_json_content_type():
"""When Scaleway responds with text/srt/vtt (response_format="text" etc.),
the content-type is not application/json return the body as plain text
rather than exploding on .json()."""
mock_response = MagicMock(spec=httpx.Response)
mock_response.headers = {"content-type": "text/plain; charset=utf-8"}
mock_response.text = "Four score and seven years ago"
response = (
ScalewayAudioTranscriptionConfig().transform_audio_transcription_response(
mock_response
)
)
assert isinstance(response, TranscriptionResponse)
assert response.text == "Four score and seven years ago"
def test_scaleway_validate_environment_raises_when_no_key(monkeypatch):
"""Missing credential should fail fast with a typed exception rather than
silently emitting 'Bearer None'."""
monkeypatch.delenv("SCW_SECRET_KEY", raising=False)
with pytest.raises(ScalewayAudioTranscriptionException) as excinfo:
ScalewayAudioTranscriptionConfig().validate_environment(
headers={},
model="whisper-large-v3",
messages=[],
optional_params={},
litellm_params={},
)
assert "SCW_SECRET_KEY" in str(excinfo.value)