Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -498,6 +498,69 @@ async def presentation_chat(query: PresentationChatModel, background_tasks: Back
|
|
498 |
})
|
499 |
|
500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
501 |
# ============================================================================
|
502 |
# Main Execution
|
503 |
# ============================================================================
|
|
|
498 |
})
|
499 |
|
500 |
|
501 |
+
# ============================================================================
|
502 |
+
# AUDIO ENDPOINTS
|
503 |
+
# ============================================================================
|
504 |
+
|
505 |
+
|
506 |
+
|
507 |
+
from enum import Enum
|
508 |
+
import io
|
509 |
+
|
510 |
+
|
511 |
+
openai_client = OpenAI()
|
512 |
+
|
513 |
+
class OpenaiTTSModels:
|
514 |
+
class ModelType(str, Enum):
|
515 |
+
tts_1_hd = "tts-1-hd"
|
516 |
+
tts_1 = "tts-1"
|
517 |
+
|
518 |
+
class VoiceType(str, Enum):
|
519 |
+
alloy = "alloy"
|
520 |
+
echo = "echo"
|
521 |
+
fable = "fable"
|
522 |
+
onyx = "onyx"
|
523 |
+
nova = "nova"
|
524 |
+
shimmer = "shimmer"
|
525 |
+
|
526 |
+
class OutputFormat(str, Enum):
|
527 |
+
mp3 = "mp3"
|
528 |
+
opus = "opus"
|
529 |
+
aac = "aac"
|
530 |
+
flac = "flac"
|
531 |
+
wav = "wav"
|
532 |
+
pcm = "pcm"
|
533 |
+
|
534 |
+
class AudioAPI:
|
535 |
+
class TTSRequest(BaseModel):
|
536 |
+
model: OpenaiTTSModels.ModelType = Field(..., description="The TTS model to use")
|
537 |
+
voice: OpenaiTTSModels.VoiceType = Field(..., description="The voice type for speech synthesis")
|
538 |
+
input: str = Field(..., description="The text to convert to speech")
|
539 |
+
output_format: OpenaiTTSModels.OutputFormat = Field(default=OpenaiTTSModels.OutputFormat.mp3, description="The audio output format")
|
540 |
+
|
541 |
+
@app.post("v2/tts")
|
542 |
+
async def text_to_speech(request: AudioAPI.TTSRequest, api_key: str = Depends(get_api_key)):
|
543 |
+
"""
|
544 |
+
Convert text to speech using OpenAI's TTS API with real-time audio streaming.
|
545 |
+
Requires API Key authentication via X-API-Key header.
|
546 |
+
"""
|
547 |
+
try:
|
548 |
+
response = openai_client.audio.speech.create(
|
549 |
+
model=request.model,
|
550 |
+
voice=request.voice,
|
551 |
+
input=request.input,
|
552 |
+
response_format=request.output_format
|
553 |
+
)
|
554 |
+
|
555 |
+
content_type = f"audio/{request.output_format.value}"
|
556 |
+
if request.output_format == OpenaiTTSModels.OutputFormat.pcm:
|
557 |
+
content_type = "audio/pcm"
|
558 |
+
|
559 |
+
return StreamingResponse(io.BytesIO(response.content), media_type=content_type)
|
560 |
+
|
561 |
+
except Exception as e:
|
562 |
+
raise HTTPException(status_code=500, detail=str(e))
|
563 |
+
|
564 |
# ============================================================================
|
565 |
# Main Execution
|
566 |
# ============================================================================
|