Feature/add audio support for scaleway (#26110)
* feat(scaleway): add SCALEWAY to LlmProviders enum * feat(scaleway): add audio transcription config and dispatch wiring Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * test(scaleway): add behavior tests for audio transcription config Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * chore(scaleway): advertise audio_transcriptions in endpoint-support JSON * docs(scaleway): document audio transcription support * fix(scaleway): address PR review — plain-text response_format + missing-key fail-fast Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * test(scaleway): cover new response paths, drop gettysburg.wav coupling Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
ea12bae9a3
commit
a19bff4ca6
@ -60,3 +60,44 @@ curl http://localhost:4000/chat/completions \
|
||||
## Supported features
|
||||
|
||||
Scaleway provider supports all features in [Generative APIs reference documentation ↗](https://www.scaleway.com/en/developers/api/generative-apis/), such as streaming, structured outputs and tool calling.
|
||||
|
||||
## Audio transcription
|
||||
|
||||
Scaleway's `/audio/transcriptions` endpoint is OpenAI-compatible and works with Whisper models.
|
||||
|
||||
### Python SDK
|
||||
|
||||
```python
|
||||
import os
|
||||
from litellm import transcription
|
||||
|
||||
os.environ["SCW_SECRET_KEY"] = "your-scaleway-secret-key"
|
||||
|
||||
with open("speech.mp3", "rb") as audio_file:
|
||||
response = transcription(
|
||||
model="scaleway/whisper-large-v3",
|
||||
file=audio_file,
|
||||
)
|
||||
print(response.text)
|
||||
```
|
||||
|
||||
### Proxy config
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: scaleway-whisper
|
||||
litellm_params:
|
||||
model: scaleway/whisper-large-v3
|
||||
api_key: "os.environ/SCW_SECRET_KEY"
|
||||
```
|
||||
|
||||
### Proxy request
|
||||
|
||||
```bash
|
||||
curl http://localhost:4000/v1/audio/transcriptions \
|
||||
-H "Authorization: Bearer YOUR_LITELLM_MASTER_KEY" \
|
||||
-F model="scaleway-whisper" \
|
||||
-F file="@speech.mp3"
|
||||
```
|
||||
|
||||
Supported optional params: `language`, `prompt`, `response_format`, `temperature`, `timestamp_granularities`.
|
||||
|
||||
@ -296,6 +296,15 @@ def get_supported_openai_params( # noqa: PLR0915
|
||||
return OVHCloudAudioTranscriptionConfig().get_supported_openai_params(
|
||||
model=model
|
||||
)
|
||||
elif custom_llm_provider == "scaleway":
|
||||
if request_type == "transcription":
|
||||
from litellm.llms.scaleway.audio_transcription.transformation import (
|
||||
ScalewayAudioTranscriptionConfig,
|
||||
)
|
||||
|
||||
return ScalewayAudioTranscriptionConfig().get_supported_openai_params(
|
||||
model=model
|
||||
)
|
||||
elif custom_llm_provider == "elevenlabs":
|
||||
if request_type == "transcription":
|
||||
from litellm.llms.elevenlabs.audio_transcription.transformation import (
|
||||
|
||||
158
litellm/llms/scaleway/audio_transcription/transformation.py
Normal file
158
litellm/llms/scaleway/audio_transcription/transformation.py
Normal file
@ -0,0 +1,158 @@
|
||||
"""
|
||||
Support for Scaleway's OpenAI-compatible `/v1/audio/transcriptions` endpoint.
|
||||
|
||||
API reference: https://www.scaleway.com/en/developers/api/generative-apis/#path-audio-create-an-audio-transcription
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
from litellm.litellm_core_utils.audio_utils.utils import process_audio_file
|
||||
from litellm.llms.base_llm.audio_transcription.transformation import (
|
||||
AudioTranscriptionRequestData,
|
||||
BaseAudioTranscriptionConfig,
|
||||
)
|
||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
OpenAIAudioTranscriptionOptionalParams,
|
||||
)
|
||||
from litellm.types.utils import FileTypes, TranscriptionResponse
|
||||
|
||||
|
||||
class ScalewayAudioTranscriptionException(BaseLLMException):
|
||||
pass
|
||||
|
||||
|
||||
class ScalewayAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
|
||||
def get_supported_openai_params(
|
||||
self, model: str
|
||||
) -> List[OpenAIAudioTranscriptionOptionalParams]:
|
||||
return [
|
||||
"language",
|
||||
"prompt",
|
||||
"response_format",
|
||||
"temperature",
|
||||
"timestamp_granularities",
|
||||
]
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
supported_params = self.get_supported_openai_params(model)
|
||||
for k, v in non_default_params.items():
|
||||
if k in supported_params:
|
||||
optional_params[k] = v
|
||||
return optional_params
|
||||
|
||||
def get_complete_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
api_key: Optional[str],
|
||||
model: str,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
stream: Optional[bool] = None,
|
||||
) -> str:
|
||||
api_base = (
|
||||
"https://api.scaleway.ai/v1" if api_base is None else api_base.rstrip("/")
|
||||
)
|
||||
return f"{api_base}/audio/transcriptions"
|
||||
|
||||
def get_error_class(
|
||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||
) -> BaseLLMException:
|
||||
return ScalewayAudioTranscriptionException(
|
||||
message=error_message,
|
||||
status_code=status_code,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
if api_key is None:
|
||||
api_key = get_secret_str("SCW_SECRET_KEY")
|
||||
|
||||
if not api_key:
|
||||
raise ScalewayAudioTranscriptionException(
|
||||
message=(
|
||||
"Scaleway API key not found. Pass `api_key=...` or set the "
|
||||
"SCW_SECRET_KEY environment variable."
|
||||
),
|
||||
status_code=401,
|
||||
headers={},
|
||||
)
|
||||
|
||||
default_headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"accept": "application/json",
|
||||
}
|
||||
default_headers.update(headers or {})
|
||||
return default_headers
|
||||
|
||||
def transform_audio_transcription_request(
|
||||
self,
|
||||
model: str,
|
||||
audio_file: FileTypes,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
) -> AudioTranscriptionRequestData:
|
||||
processed_audio = process_audio_file(audio_file)
|
||||
|
||||
form_fields: dict = {"model": model}
|
||||
for key in self.get_supported_openai_params(model):
|
||||
value = optional_params.get(key)
|
||||
if value is not None:
|
||||
form_fields[key] = value
|
||||
|
||||
files = {
|
||||
"file": (
|
||||
processed_audio.filename,
|
||||
processed_audio.file_content,
|
||||
processed_audio.content_type,
|
||||
)
|
||||
}
|
||||
|
||||
return AudioTranscriptionRequestData(data=form_fields, files=files)
|
||||
|
||||
def transform_audio_transcription_response(
|
||||
self,
|
||||
raw_response: httpx.Response,
|
||||
) -> TranscriptionResponse:
|
||||
content_type = (raw_response.headers.get("content-type") or "").lower()
|
||||
if "application/json" not in content_type:
|
||||
return TranscriptionResponse(text=raw_response.text)
|
||||
|
||||
try:
|
||||
response_json = raw_response.json()
|
||||
except Exception:
|
||||
raise ScalewayAudioTranscriptionException(
|
||||
message=raw_response.text,
|
||||
status_code=raw_response.status_code,
|
||||
headers=raw_response.headers,
|
||||
)
|
||||
|
||||
text = response_json.get("text") or ""
|
||||
response = TranscriptionResponse(text=text)
|
||||
|
||||
if "segments" in response_json:
|
||||
response["segments"] = response_json["segments"]
|
||||
if "language" in response_json:
|
||||
response["language"] = response_json["language"]
|
||||
|
||||
response._hidden_params = response_json
|
||||
return response
|
||||
@ -1950,7 +1950,7 @@
|
||||
"responses": true,
|
||||
"embeddings": false,
|
||||
"image_generations": false,
|
||||
"audio_transcriptions": false,
|
||||
"audio_transcriptions": true,
|
||||
"audio_speech": false,
|
||||
"moderations": false,
|
||||
"batches": false,
|
||||
|
||||
@ -3290,6 +3290,7 @@ class LlmProviders(str, Enum):
|
||||
MANUS = "manus"
|
||||
WANDB = "wandb"
|
||||
OVHCLOUD = "ovhcloud"
|
||||
SCALEWAY = "scaleway"
|
||||
LEMONADE = "lemonade"
|
||||
AMAZON_NOVA = "amazon_nova"
|
||||
A2A_AGENT = "a2a_agent"
|
||||
|
||||
@ -8472,6 +8472,12 @@ class ProviderConfigManager:
|
||||
)
|
||||
|
||||
return OVHCloudAudioTranscriptionConfig()
|
||||
elif litellm.LlmProviders.SCALEWAY == provider:
|
||||
from litellm.llms.scaleway.audio_transcription.transformation import (
|
||||
ScalewayAudioTranscriptionConfig,
|
||||
)
|
||||
|
||||
return ScalewayAudioTranscriptionConfig()
|
||||
elif litellm.LlmProviders.MISTRAL == provider:
|
||||
from litellm.llms.mistral.audio_transcription.transformation import (
|
||||
MistralAudioTranscriptionConfig,
|
||||
|
||||
@ -1968,7 +1968,7 @@
|
||||
"responses": true,
|
||||
"embeddings": false,
|
||||
"image_generations": false,
|
||||
"audio_transcriptions": false,
|
||||
"audio_transcriptions": true,
|
||||
"audio_speech": false,
|
||||
"moderations": false,
|
||||
"batches": false,
|
||||
|
||||
@ -0,0 +1,240 @@
|
||||
import os
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from litellm.llms.scaleway.audio_transcription.transformation import (
|
||||
ScalewayAudioTranscriptionConfig,
|
||||
ScalewayAudioTranscriptionException,
|
||||
)
|
||||
from litellm.types.utils import TranscriptionResponse
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_complete_url
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_scaleway_get_complete_url_default_base():
|
||||
"""With no api_base supplied, Scaleway's Generative API endpoint is used."""
|
||||
url = ScalewayAudioTranscriptionConfig().get_complete_url(
|
||||
api_base=None,
|
||||
api_key="fake",
|
||||
model="whisper-large-v3",
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
)
|
||||
assert url == "https://api.scaleway.ai/v1/audio/transcriptions"
|
||||
|
||||
|
||||
def test_scaleway_get_complete_url_custom_base_strips_trailing_slash():
|
||||
"""Caller-supplied api_base is respected; trailing slash is normalized."""
|
||||
url = ScalewayAudioTranscriptionConfig().get_complete_url(
|
||||
api_base="https://custom.example.com/v1/",
|
||||
api_key="fake",
|
||||
model="whisper-large-v3",
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
)
|
||||
assert url == "https://custom.example.com/v1/audio/transcriptions"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# validate_environment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_scaleway_validate_environment_explicit_api_key():
|
||||
headers = ScalewayAudioTranscriptionConfig().validate_environment(
|
||||
headers={},
|
||||
model="whisper-large-v3",
|
||||
messages=[],
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
api_key="explicit-key",
|
||||
)
|
||||
assert headers["Authorization"] == "Bearer explicit-key"
|
||||
assert headers["accept"] == "application/json"
|
||||
|
||||
|
||||
def test_scaleway_validate_environment_reads_scw_secret_key(monkeypatch):
|
||||
monkeypatch.setenv("SCW_SECRET_KEY", "env-secret")
|
||||
headers = ScalewayAudioTranscriptionConfig().validate_environment(
|
||||
headers={},
|
||||
model="whisper-large-v3",
|
||||
messages=[],
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
)
|
||||
assert headers["Authorization"] == "Bearer env-secret"
|
||||
|
||||
|
||||
def test_scaleway_validate_environment_explicit_api_key_wins_over_env(monkeypatch):
|
||||
"""Caller-supplied api_key must win over the SCW_SECRET_KEY env var."""
|
||||
monkeypatch.setenv("SCW_SECRET_KEY", "env-secret")
|
||||
headers = ScalewayAudioTranscriptionConfig().validate_environment(
|
||||
headers={},
|
||||
model="whisper-large-v3",
|
||||
messages=[],
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
api_key="explicit-wins",
|
||||
)
|
||||
assert headers["Authorization"] == "Bearer explicit-wins"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# transform_audio_transcription_request
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _open_test_audio():
|
||||
"""Shared helper: open the repo's canonical speech fixture."""
|
||||
wav_path = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"../../../..",
|
||||
"tests",
|
||||
"llm_translation",
|
||||
"gettysburg.wav",
|
||||
)
|
||||
return open(wav_path, "rb")
|
||||
|
||||
|
||||
def test_scaleway_transform_request_builds_multipart_with_supported_params():
|
||||
with _open_test_audio() as audio_file:
|
||||
result = (
|
||||
ScalewayAudioTranscriptionConfig().transform_audio_transcription_request(
|
||||
model="whisper-large-v3",
|
||||
audio_file=audio_file,
|
||||
optional_params={
|
||||
"language": "en",
|
||||
"temperature": 0.0,
|
||||
"response_format": "verbose_json",
|
||||
},
|
||||
litellm_params={},
|
||||
)
|
||||
)
|
||||
|
||||
assert isinstance(result.data, dict)
|
||||
assert result.data["model"] == "whisper-large-v3"
|
||||
assert result.data["language"] == "en"
|
||||
assert result.data["temperature"] == 0.0
|
||||
assert result.data["response_format"] == "verbose_json"
|
||||
assert result.files is not None
|
||||
assert "file" in result.files
|
||||
assert len(result.files["file"]) == 3 # (filename, content, content_type)
|
||||
|
||||
|
||||
def test_scaleway_transform_request_drops_unsupported_params():
|
||||
"""Only params in get_supported_openai_params() should land in the form."""
|
||||
with _open_test_audio() as audio_file:
|
||||
result = (
|
||||
ScalewayAudioTranscriptionConfig().transform_audio_transcription_request(
|
||||
model="whisper-large-v3",
|
||||
audio_file=audio_file,
|
||||
optional_params={
|
||||
"language": "en",
|
||||
"stream": True, # not supported
|
||||
"diarize": True, # not supported
|
||||
},
|
||||
litellm_params={},
|
||||
)
|
||||
)
|
||||
|
||||
assert "stream" not in result.data
|
||||
assert "diarize" not in result.data
|
||||
assert result.data["language"] == "en"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# transform_audio_transcription_response
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_scaleway_transform_response_parses_text():
|
||||
mock_response = MagicMock(spec=httpx.Response)
|
||||
mock_response.headers = {"content-type": "application/json"}
|
||||
mock_response.json.return_value = {"text": "Four score and seven years ago"}
|
||||
|
||||
response = (
|
||||
ScalewayAudioTranscriptionConfig().transform_audio_transcription_response(
|
||||
mock_response
|
||||
)
|
||||
)
|
||||
|
||||
assert isinstance(response, TranscriptionResponse)
|
||||
assert response.text == "Four score and seven years ago"
|
||||
|
||||
|
||||
def test_scaleway_transform_response_preserves_segments_and_language():
|
||||
mock_response = MagicMock(spec=httpx.Response)
|
||||
mock_response.headers = {"content-type": "application/json"}
|
||||
mock_response.json.return_value = {
|
||||
"text": "hello world",
|
||||
"language": "en",
|
||||
"segments": [
|
||||
{"text": "hello", "start": 0.0, "end": 0.5},
|
||||
{"text": "world", "start": 0.6, "end": 1.1},
|
||||
],
|
||||
}
|
||||
|
||||
response = (
|
||||
ScalewayAudioTranscriptionConfig().transform_audio_transcription_response(
|
||||
mock_response
|
||||
)
|
||||
)
|
||||
|
||||
assert response.text == "hello world"
|
||||
assert response["language"] == "en"
|
||||
assert len(response["segments"]) == 2
|
||||
|
||||
|
||||
def test_scaleway_transform_response_raises_typed_exception_on_non_json():
|
||||
"""Malformed upstream body must raise the Scaleway-typed exception so
|
||||
error handlers downstream can classify it as a Scaleway failure."""
|
||||
mock_response = MagicMock(spec=httpx.Response)
|
||||
mock_response.json.side_effect = ValueError("not json")
|
||||
mock_response.headers = {"content-type": "application/json"}
|
||||
mock_response.text = "upstream 502 bad gateway"
|
||||
mock_response.status_code = 502
|
||||
|
||||
with pytest.raises(ScalewayAudioTranscriptionException):
|
||||
ScalewayAudioTranscriptionConfig().transform_audio_transcription_response(
|
||||
mock_response
|
||||
)
|
||||
|
||||
|
||||
def test_scaleway_transform_response_returns_plain_text_for_non_json_content_type():
|
||||
"""When Scaleway responds with text/srt/vtt (response_format="text" etc.),
|
||||
the content-type is not application/json — return the body as plain text
|
||||
rather than exploding on .json()."""
|
||||
mock_response = MagicMock(spec=httpx.Response)
|
||||
mock_response.headers = {"content-type": "text/plain; charset=utf-8"}
|
||||
mock_response.text = "Four score and seven years ago"
|
||||
|
||||
response = (
|
||||
ScalewayAudioTranscriptionConfig().transform_audio_transcription_response(
|
||||
mock_response
|
||||
)
|
||||
)
|
||||
|
||||
assert isinstance(response, TranscriptionResponse)
|
||||
assert response.text == "Four score and seven years ago"
|
||||
|
||||
|
||||
def test_scaleway_validate_environment_raises_when_no_key(monkeypatch):
|
||||
"""Missing credential should fail fast with a typed exception rather than
|
||||
silently emitting 'Bearer None'."""
|
||||
monkeypatch.delenv("SCW_SECRET_KEY", raising=False)
|
||||
|
||||
with pytest.raises(ScalewayAudioTranscriptionException) as excinfo:
|
||||
ScalewayAudioTranscriptionConfig().validate_environment(
|
||||
headers={},
|
||||
model="whisper-large-v3",
|
||||
messages=[],
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
)
|
||||
|
||||
assert "SCW_SECRET_KEY" in str(excinfo.value)
|
||||
Loading…
Reference in New Issue
Block a user