audio_chat

Running

App Files Files Community

pvanand commited on Sep 29, 2024

Commit

fbae44c

•

1 Parent(s): 65df059

Update main.py

Browse files

Files changed (1) hide show

main.py +63 -0

main.py CHANGED Viewed

@@ -498,6 +498,69 @@ async def presentation_chat(query: PresentationChatModel, background_tasks: Back
         })
 # ============================================================================
 # Main Execution
 # ============================================================================

         })
+# ============================================================================
+# AUDIO ENDPOINTS
+# ============================================================================
+from enum import Enum
+import io
+openai_client = OpenAI()
+class OpenaiTTSModels:
+    class ModelType(str, Enum):
+        tts_1_hd = "tts-1-hd"
+        tts_1 = "tts-1"
+    class VoiceType(str, Enum):
+        alloy = "alloy"
+        echo = "echo"
+        fable = "fable"
+        onyx = "onyx"
+        nova = "nova"
+        shimmer = "shimmer"
+    class OutputFormat(str, Enum):
+        mp3 = "mp3"
+        opus = "opus"
+        aac = "aac"
+        flac = "flac"
+        wav = "wav"
+        pcm = "pcm"
+class AudioAPI:
+    class TTSRequest(BaseModel):
+        model: OpenaiTTSModels.ModelType = Field(..., description="The TTS model to use")
+        voice: OpenaiTTSModels.VoiceType = Field(..., description="The voice type for speech synthesis")
+        input: str = Field(..., description="The text to convert to speech")
+        output_format: OpenaiTTSModels.OutputFormat = Field(default=OpenaiTTSModels.OutputFormat.mp3, description="The audio output format")
+@app.post("v2/tts")
+async def text_to_speech(request: AudioAPI.TTSRequest, api_key: str = Depends(get_api_key)):
+    """
+    Convert text to speech using OpenAI's TTS API with real-time audio streaming.
+    Requires API Key authentication via X-API-Key header.
+    """
+    try:
+        response = openai_client.audio.speech.create(
+            model=request.model,
+            voice=request.voice,
+            input=request.input,
+            response_format=request.output_format
+        )
+        content_type = f"audio/{request.output_format.value}"
+        if request.output_format == OpenaiTTSModels.OutputFormat.pcm:
+            content_type = "audio/pcm"
+        return StreamingResponse(io.BytesIO(response.content), media_type=content_type)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 # ============================================================================
 # Main Execution
 # ============================================================================