apitts-v2-2

Sleeping

App Files Files Community

hivecorp commited on Feb 14

Commit

a66f077

verified ·

1 Parent(s): 366652b

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -11

app.py CHANGED Viewed

@@ -4,11 +4,13 @@ import asyncio
 from fastapi import FastAPI
 import edge_tts
 from fastapi.responses import FileResponse
 app = FastAPI()
 def split_text(text, max_chunk_size=500):
-    """Split text into smaller chunks."""
     sentences = text.replace('।', '.').replace('؟', '?').split('.')
     chunks = []
     current_chunk = []
@@ -31,41 +33,64 @@ def split_text(text, max_chunk_size=500):
     return chunks
 async def process_chunk(text, voice, temp_dir, chunk_index):
-    """Process a single chunk of text."""
     tmp_path = os.path.join(temp_dir, f"chunk_{chunk_index}.mp3")
-    print(f"🎤 Processing chunk {chunk_index}: {text[:50]}...")  # Logging
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(tmp_path)
     return tmp_path
 async def combine_audio_files(chunk_files, output_path):
-    """Combine multiple MP3 files into one."""
-    from pydub import AudioSegment
     combined = AudioSegment.empty()
     for file in chunk_files:
-        print(f"🔹 Adding {file} to final output")  # Logging
         combined += AudioSegment.from_mp3(file)
     combined.export(output_path, format="mp3")
     for file in chunk_files:
         os.remove(file)
 @app.get("/tts")
 async def tts(text: str, voice: str = "en-US-AriaNeural"):
-    """Main API function to process TTS."""
     temp_dir = "temp_audio"
     os.makedirs(temp_dir, exist_ok=True)
     chunks = split_text(text)
     if len(chunks) == 1:
-        return await FileResponse(await process_chunk(text, voice, temp_dir, 0), media_type="audio/mpeg", filename="speech.mp3")
-    chunk_files = await asyncio.gather(*[process_chunk(ch, voice, temp_dir, i) for i, ch in enumerate(chunks)])
     output_file = "final_output.mp3"
     await combine_audio_files(chunk_files, output_file)
     return FileResponse(output_file, media_type="audio/mpeg", filename="speech.mp3")

 from fastapi import FastAPI
 import edge_tts
 from fastapi.responses import FileResponse
+from pydub import AudioSegment
 app = FastAPI()
+# 🔹 Function to split text into manageable chunks
 def split_text(text, max_chunk_size=500):
+    """Split text into smaller chunks at sentence boundaries."""
     sentences = text.replace('।', '.').replace('؟', '?').split('.')
     chunks = []
     current_chunk = []
     return chunks
+# 🔹 Function to process a single chunk asynchronously
 async def process_chunk(text, voice, temp_dir, chunk_index):
+    """Generate speech for a single chunk and save as MP3."""
     tmp_path = os.path.join(temp_dir, f"chunk_{chunk_index}.mp3")
+    print(f"🎤 Processing chunk {chunk_index}: {text[:50]}...")  # Logging for debugging
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(tmp_path)
     return tmp_path
+# 🔹 Function to merge all chunked MP3 files into a single audio file
 async def combine_audio_files(chunk_files, output_path):
+    """Combine multiple MP3 files into one final MP3."""
     combined = AudioSegment.empty()
     for file in chunk_files:
+        print(f"🔹 Adding {file} to final output")  # Logging for debugging
         combined += AudioSegment.from_mp3(file)
     combined.export(output_path, format="mp3")
+    # Remove temporary files
     for file in chunk_files:
         os.remove(file)
+@app.get("/")
+def home():
+    return {"message": "✅ EdgeTTS FastAPI is running!"}
+# 🔹 Main TTS API
 @app.get("/tts")
 async def tts(text: str, voice: str = "en-US-AriaNeural"):
+    """Main API function to handle text-to-speech conversion."""
     temp_dir = "temp_audio"
     os.makedirs(temp_dir, exist_ok=True)
     chunks = split_text(text)
+    # If text is short, process directly
     if len(chunks) == 1:
+        print("📢 Processing without chunking...")
+        output_file = await process_chunk(text, voice, temp_dir, 0)
+        return FileResponse(output_file, media_type="audio/mpeg", filename="speech.mp3")
+    print(f"🚀 Splitting into {len(chunks)} chunks and processing concurrently...")
+    # 🔹 Concurrently process all chunks
+    chunk_files = await asyncio.gather(*[
+        process_chunk(ch, voice, temp_dir, i) for i, ch in enumerate(chunks)
+    ])
+    # 🔹 Merge all MP3 files
     output_file = "final_output.mp3"
     await combine_audio_files(chunk_files, output_file)
+    print("✅ TTS Generation Complete. Sending response...")
     return FileResponse(output_file, media_type="audio/mpeg", filename="speech.mp3")
+# 🔹 Ensure app starts in Hugging Face Spaces
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)