Spaces:
Sleeping
Sleeping
| """ | |
| WitFoo Training TTS — HuggingFace Space | |
| Multilingual text-to-speech using Microsoft Edge TTS neural voices. | |
| Supports: en, es, fr, de, ja, ar | |
| """ | |
| import gradio as gr | |
| import asyncio | |
| import tempfile | |
| import os | |
| import edge_tts | |
| # High-quality neural voices per language | |
| VOICES = { | |
| "en": "en-US-GuyNeural", | |
| "es": "es-MX-JorgeNeural", | |
| "fr": "fr-FR-HenriNeural", | |
| "de": "de-DE-ConradNeural", | |
| "ja": "ja-JP-KeitaNeural", | |
| "ar": "ar-SA-HamedNeural", | |
| } | |
| # Female voice alternatives | |
| VOICES_FEMALE = { | |
| "en": "en-US-JennyNeural", | |
| "es": "es-MX-DaliaNeural", | |
| "fr": "fr-FR-DeniseNeural", | |
| "de": "de-DE-KatjaNeural", | |
| "ja": "ja-JP-NanamiNeural", | |
| "ar": "ar-SA-ZariyahNeural", | |
| } | |
| SUPPORTED_LANGUAGES = list(VOICES.keys()) | |
| LANG_NAMES = {"en": "English", "es": "Spanish", "fr": "French", "de": "German", "ja": "Japanese", "ar": "Arabic"} | |
| async def _generate_async(text: str, voice: str, output_path: str) -> None: | |
| """Generate speech using Edge TTS.""" | |
| communicate = edge_tts.Communicate(text, voice, rate="-5%") | |
| await communicate.save(output_path) | |
| def _run_async(coro): | |
| """Run async coroutine, handling both fresh and existing event loops.""" | |
| try: | |
| loop = asyncio.get_running_loop() | |
| import concurrent.futures | |
| with concurrent.futures.ThreadPoolExecutor() as pool: | |
| future = pool.submit(asyncio.run, coro) | |
| return future.result() | |
| except RuntimeError: | |
| return asyncio.run(coro) | |
| def generate_speech(text: str, language: str, voice_type: str) -> str: | |
| """Generate speech in the specified language.""" | |
| if not text.strip(): | |
| raise gr.Error("Text cannot be empty") | |
| if language not in SUPPORTED_LANGUAGES: | |
| raise gr.Error(f"Unsupported language: {language}") | |
| voices = VOICES_FEMALE if voice_type == "Female" else VOICES | |
| voice = voices[language] | |
| output_path = tempfile.mktemp(suffix=".mp3") | |
| _run_async(_generate_async(text, voice, output_path)) | |
| return output_path | |
| def batch_generate(texts: str, language: str, voice_type: str): | |
| """Generate speech for multiple segments (||| separated).""" | |
| segments = [t.strip() for t in texts.split("|||") if t.strip()] | |
| if not segments: | |
| raise gr.Error("No text segments found. Separate with |||") | |
| voices = VOICES_FEMALE if voice_type == "Female" else VOICES | |
| voice = voices.get(language, VOICES["en"]) | |
| results = [] | |
| for i, segment in enumerate(segments): | |
| print(f"[{i+1}/{len(segments)}] {segment[:60]}...") | |
| output_path = tempfile.mktemp(suffix=f"_seg{i+1:03d}.mp3") | |
| _run_async(_generate_async(segment, voice, output_path)) | |
| results.append(output_path) | |
| return results | |
| # Gradio interface | |
| with gr.Blocks(title="WitFoo Training TTS", theme=gr.themes.Base()) as demo: | |
| gr.Markdown("# WitFoo Training TTS") | |
| gr.Markdown("Generate multilingual voiceover for training courses using neural TTS voices.") | |
| with gr.Tab("Single Generation"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| text_input = gr.Textbox(label="Narration Text", lines=8, | |
| placeholder="Enter narration text to convert to speech...") | |
| with gr.Row(): | |
| lang_input = gr.Dropdown(choices=SUPPORTED_LANGUAGES, value="en", label="Language") | |
| voice_input = gr.Radio(choices=["Male", "Female"], value="Male", label="Voice") | |
| generate_btn = gr.Button("Generate Speech", variant="primary", size="lg") | |
| with gr.Column(): | |
| audio_output = gr.Audio(label="Generated Speech", type="filepath") | |
| generate_btn.click(fn=generate_speech, inputs=[text_input, lang_input, voice_input], outputs=audio_output) | |
| with gr.Tab("Batch Generation"): | |
| gr.Markdown("Separate text segments with `|||` for batch processing.") | |
| batch_text = gr.Textbox(label="Texts (||| separated)", lines=12, | |
| placeholder="First paragraph...\n|||\nSecond paragraph...\n|||\nThird paragraph...") | |
| with gr.Row(): | |
| batch_lang = gr.Dropdown(choices=SUPPORTED_LANGUAGES, value="en", label="Language") | |
| batch_voice = gr.Radio(choices=["Male", "Female"], value="Male", label="Voice") | |
| batch_btn = gr.Button("Generate All Segments", variant="primary", size="lg") | |
| batch_output = gr.File(label="Generated Audio Files", file_count="multiple") | |
| batch_btn.click(fn=batch_generate, inputs=[batch_text, batch_lang, batch_voice], outputs=batch_output) | |
| gr.Markdown("---") | |
| gr.Markdown("**Voices:** " + " | ".join([f"{LANG_NAMES[k]}: {v}" for k, v in VOICES.items()])) | |
| gr.Markdown("*WitFoo Training Program — 9 certification courses, 6 languages, 178 lessons*") | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |