Spaces:
Sleeping
Sleeping
""" | |
Translates from OpenAI's `/v1/audio/transcriptions` to Deepgram's `/v1/listen` | |
""" | |
import io | |
from typing import List, Optional, Union | |
from httpx import Headers, Response | |
from litellm.llms.base_llm.chat.transformation import BaseLLMException | |
from litellm.secret_managers.main import get_secret_str | |
from litellm.types.llms.openai import ( | |
AllMessageValues, | |
OpenAIAudioTranscriptionOptionalParams, | |
) | |
from litellm.types.utils import FileTypes, TranscriptionResponse | |
from ...base_llm.audio_transcription.transformation import ( | |
BaseAudioTranscriptionConfig, | |
LiteLLMLoggingObj, | |
) | |
from ..common_utils import DeepgramException | |
class DeepgramAudioTranscriptionConfig(BaseAudioTranscriptionConfig): | |
def get_supported_openai_params( | |
self, model: str | |
) -> List[OpenAIAudioTranscriptionOptionalParams]: | |
return ["language"] | |
def map_openai_params( | |
self, | |
non_default_params: dict, | |
optional_params: dict, | |
model: str, | |
drop_params: bool, | |
) -> dict: | |
supported_params = self.get_supported_openai_params(model) | |
for k, v in non_default_params.items(): | |
if k in supported_params: | |
optional_params[k] = v | |
return optional_params | |
def get_error_class( | |
self, error_message: str, status_code: int, headers: Union[dict, Headers] | |
) -> BaseLLMException: | |
return DeepgramException( | |
message=error_message, status_code=status_code, headers=headers | |
) | |
def transform_audio_transcription_request( | |
self, | |
model: str, | |
audio_file: FileTypes, | |
optional_params: dict, | |
litellm_params: dict, | |
) -> Union[dict, bytes]: | |
""" | |
Processes the audio file input based on its type and returns the binary data. | |
Args: | |
audio_file: Can be a file path (str), a tuple (filename, file_content), or binary data (bytes). | |
Returns: | |
The binary data of the audio file. | |
""" | |
binary_data: bytes # Explicitly declare the type | |
# Handle the audio file based on type | |
if isinstance(audio_file, str): | |
# If it's a file path | |
with open(audio_file, "rb") as f: | |
binary_data = f.read() # `f.read()` always returns `bytes` | |
elif isinstance(audio_file, tuple): | |
# Handle tuple case | |
_, file_content = audio_file[:2] | |
if isinstance(file_content, str): | |
with open(file_content, "rb") as f: | |
binary_data = f.read() # `f.read()` always returns `bytes` | |
elif isinstance(file_content, bytes): | |
binary_data = file_content | |
else: | |
raise TypeError( | |
f"Unexpected type in tuple: {type(file_content)}. Expected str or bytes." | |
) | |
elif isinstance(audio_file, bytes): | |
# Assume it's already binary data | |
binary_data = audio_file | |
elif isinstance(audio_file, io.BufferedReader) or isinstance( | |
audio_file, io.BytesIO | |
): | |
# Handle file-like objects | |
binary_data = audio_file.read() | |
else: | |
raise TypeError(f"Unsupported type for audio_file: {type(audio_file)}") | |
return binary_data | |
def transform_audio_transcription_response( | |
self, | |
model: str, | |
raw_response: Response, | |
model_response: TranscriptionResponse, | |
logging_obj: LiteLLMLoggingObj, | |
request_data: dict, | |
optional_params: dict, | |
litellm_params: dict, | |
api_key: Optional[str] = None, | |
) -> TranscriptionResponse: | |
""" | |
Transforms the raw response from Deepgram to the TranscriptionResponse format | |
""" | |
try: | |
response_json = raw_response.json() | |
# Get the first alternative from the first channel | |
first_channel = response_json["results"]["channels"][0] | |
first_alternative = first_channel["alternatives"][0] | |
# Extract the full transcript | |
text = first_alternative["transcript"] | |
# Create TranscriptionResponse object | |
response = TranscriptionResponse(text=text) | |
# Add additional metadata matching OpenAI format | |
response["task"] = "transcribe" | |
response[ | |
"language" | |
] = "english" # Deepgram auto-detects but doesn't return language | |
response["duration"] = response_json["metadata"]["duration"] | |
# Transform words to match OpenAI format | |
if "words" in first_alternative: | |
response["words"] = [ | |
{"word": word["word"], "start": word["start"], "end": word["end"]} | |
for word in first_alternative["words"] | |
] | |
# Store full response in hidden params | |
response._hidden_params = response_json | |
return response | |
except Exception as e: | |
raise ValueError( | |
f"Error transforming Deepgram response: {str(e)}\nResponse: {raw_response.text}" | |
) | |
def get_complete_url( | |
self, | |
api_base: Optional[str], | |
api_key: Optional[str], | |
model: str, | |
optional_params: dict, | |
litellm_params: dict, | |
stream: Optional[bool] = None, | |
) -> str: | |
if api_base is None: | |
api_base = ( | |
get_secret_str("DEEPGRAM_API_BASE") or "https://api.deepgram.com/v1" | |
) | |
api_base = api_base.rstrip("/") # Remove trailing slash if present | |
return f"{api_base}/listen?model={model}" | |
def validate_environment( | |
self, | |
headers: dict, | |
model: str, | |
messages: List[AllMessageValues], | |
optional_params: dict, | |
litellm_params: dict, | |
api_key: Optional[str] = None, | |
api_base: Optional[str] = None, | |
) -> dict: | |
api_key = api_key or get_secret_str("DEEPGRAM_API_KEY") | |
return { | |
"Authorization": f"Token {api_key}", | |
} | |