pvanand commited on
Commit
fbae44c
1 Parent(s): 65df059

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +63 -0
main.py CHANGED
@@ -498,6 +498,69 @@ async def presentation_chat(query: PresentationChatModel, background_tasks: Back
498
  })
499
 
500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  # ============================================================================
502
  # Main Execution
503
  # ============================================================================
 
498
  })
499
 
500
 
501
+ # ============================================================================
502
+ # AUDIO ENDPOINTS
503
+ # ============================================================================
504
+
505
+
506
+
507
+ from enum import Enum
508
+ import io
509
+
510
+
511
+ openai_client = OpenAI()
512
+
513
+ class OpenaiTTSModels:
514
+ class ModelType(str, Enum):
515
+ tts_1_hd = "tts-1-hd"
516
+ tts_1 = "tts-1"
517
+
518
+ class VoiceType(str, Enum):
519
+ alloy = "alloy"
520
+ echo = "echo"
521
+ fable = "fable"
522
+ onyx = "onyx"
523
+ nova = "nova"
524
+ shimmer = "shimmer"
525
+
526
+ class OutputFormat(str, Enum):
527
+ mp3 = "mp3"
528
+ opus = "opus"
529
+ aac = "aac"
530
+ flac = "flac"
531
+ wav = "wav"
532
+ pcm = "pcm"
533
+
534
+ class AudioAPI:
535
+ class TTSRequest(BaseModel):
536
+ model: OpenaiTTSModels.ModelType = Field(..., description="The TTS model to use")
537
+ voice: OpenaiTTSModels.VoiceType = Field(..., description="The voice type for speech synthesis")
538
+ input: str = Field(..., description="The text to convert to speech")
539
+ output_format: OpenaiTTSModels.OutputFormat = Field(default=OpenaiTTSModels.OutputFormat.mp3, description="The audio output format")
540
+
541
+ @app.post("v2/tts")
542
+ async def text_to_speech(request: AudioAPI.TTSRequest, api_key: str = Depends(get_api_key)):
543
+ """
544
+ Convert text to speech using OpenAI's TTS API with real-time audio streaming.
545
+ Requires API Key authentication via X-API-Key header.
546
+ """
547
+ try:
548
+ response = openai_client.audio.speech.create(
549
+ model=request.model,
550
+ voice=request.voice,
551
+ input=request.input,
552
+ response_format=request.output_format
553
+ )
554
+
555
+ content_type = f"audio/{request.output_format.value}"
556
+ if request.output_format == OpenaiTTSModels.OutputFormat.pcm:
557
+ content_type = "audio/pcm"
558
+
559
+ return StreamingResponse(io.BytesIO(response.content), media_type=content_type)
560
+
561
+ except Exception as e:
562
+ raise HTTPException(status_code=500, detail=str(e))
563
+
564
  # ============================================================================
565
  # Main Execution
566
  # ============================================================================