shriniket73 commited on
Commit
3f9aa51
1 Parent(s): 7ac11e7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from fastapi import FastAPI, HTTPException
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ import torch
5
+ import base64
6
+ import io
7
+ from typing import Dict
8
+ from pydantic import BaseModel
9
+ import numpy as np
10
+ import logging
11
+ from pathlib import Path
12
+ import os
13
+ from tts_service import OptimizedTTSService
14
+
15
+ # Configure logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+ class TTSRequest(BaseModel):
20
+ text: str
21
+
22
+ app = FastAPI()
23
+
24
+ # Add CORS middleware
25
+ app.add_middleware(
26
+ CORSMiddleware,
27
+ allow_origins=["*"],
28
+ allow_credentials=True,
29
+ allow_methods=["*"],
30
+ allow_headers=["*"],
31
+ )
32
+
33
+ # Initialize service with embedded latents
34
+ class TTSServiceConfig:
35
+ def __init__(self):
36
+ self.latents_path = "models/goggins_latents.pt"
37
+ self.model_path = "models/xtts_v2"
38
+
39
+ # Ensure directories exist
40
+ Path("models").mkdir(exist_ok=True)
41
+ Path("cache").mkdir(exist_ok=True)
42
+
43
+ config = TTSServiceConfig()
44
+ service = OptimizedTTSService(config)
45
+
46
+ @app.post("/generate")
47
+ async def generate_speech(request: TTSRequest):
48
+ try:
49
+ logger.info(f"Generating speech for text: {request.text[:50]}...")
50
+ wav = service.generate_speech(request.text)
51
+
52
+ buffer = io.BytesIO()
53
+ np.save(buffer, wav.astype(np.float32))
54
+
55
+ return {
56
+ "status": "success",
57
+ "audio": base64.b64encode(buffer.getvalue()).decode()
58
+ }
59
+ except Exception as e:
60
+ logger.error(f"Error generating speech: {str(e)}")
61
+ raise HTTPException(status_code=500, detail=str(e))