Spaces:

witfoo
/

training-tts

Sleeping

App Files Files Community

training-tts / app.py

f15hb0wn

Upload app.py with huggingface_hub

f8fcbb0 verified 8 days ago

raw

history blame contribute delete

4.84 kB

	"""
	WitFoo Training TTS — HuggingFace Space
	Multilingual text-to-speech using Microsoft Edge TTS neural voices.
	Supports: en, es, fr, de, ja, ar
	"""

	import gradio as gr
	import asyncio
	import tempfile
	import os
	import edge_tts

	# High-quality neural voices per language
	VOICES = {
	"en": "en-US-GuyNeural",
	"es": "es-MX-JorgeNeural",
	"fr": "fr-FR-HenriNeural",
	"de": "de-DE-ConradNeural",
	"ja": "ja-JP-KeitaNeural",
	"ar": "ar-SA-HamedNeural",
	}

	# Female voice alternatives
	VOICES_FEMALE = {
	"en": "en-US-JennyNeural",
	"es": "es-MX-DaliaNeural",
	"fr": "fr-FR-DeniseNeural",
	"de": "de-DE-KatjaNeural",
	"ja": "ja-JP-NanamiNeural",
	"ar": "ar-SA-ZariyahNeural",
	}

	SUPPORTED_LANGUAGES = list(VOICES.keys())
	LANG_NAMES = {"en": "English", "es": "Spanish", "fr": "French", "de": "German", "ja": "Japanese", "ar": "Arabic"}


	async def _generate_async(text: str, voice: str, output_path: str) -> None:
	"""Generate speech using Edge TTS."""
	communicate = edge_tts.Communicate(text, voice, rate="-5%")
	await communicate.save(output_path)


	def _run_async(coro):
	"""Run async coroutine, handling both fresh and existing event loops."""
	try:
	loop = asyncio.get_running_loop()
	import concurrent.futures
	with concurrent.futures.ThreadPoolExecutor() as pool:
	future = pool.submit(asyncio.run, coro)
	return future.result()
	except RuntimeError:
	return asyncio.run(coro)


	def generate_speech(text: str, language: str, voice_type: str) -> str:
	"""Generate speech in the specified language."""
	if not text.strip():
	raise gr.Error("Text cannot be empty")
	if language not in SUPPORTED_LANGUAGES:
	raise gr.Error(f"Unsupported language: {language}")

	voices = VOICES_FEMALE if voice_type == "Female" else VOICES
	voice = voices[language]

	output_path = tempfile.mktemp(suffix=".mp3")
	_run_async(_generate_async(text, voice, output_path))

	return output_path


	def batch_generate(texts: str, language: str, voice_type: str):
	"""Generate speech for multiple segments (\|\|\| separated)."""
	segments = [t.strip() for t in texts.split("\|\|\|") if t.strip()]
	if not segments:
	raise gr.Error("No text segments found. Separate with \|\|\|")

	voices = VOICES_FEMALE if voice_type == "Female" else VOICES
	voice = voices.get(language, VOICES["en"])

	results = []
	for i, segment in enumerate(segments):
	print(f"[{i+1}/{len(segments)}] {segment[:60]}...")
	output_path = tempfile.mktemp(suffix=f"_seg{i+1:03d}.mp3")
	_run_async(_generate_async(segment, voice, output_path))
	results.append(output_path)

	return results


	# Gradio interface
	with gr.Blocks(title="WitFoo Training TTS", theme=gr.themes.Base()) as demo:
	gr.Markdown("# WitFoo Training TTS")
	gr.Markdown("Generate multilingual voiceover for training courses using neural TTS voices.")

	with gr.Tab("Single Generation"):
	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(label="Narration Text", lines=8,
	placeholder="Enter narration text to convert to speech...")
	with gr.Row():
	lang_input = gr.Dropdown(choices=SUPPORTED_LANGUAGES, value="en", label="Language")
	voice_input = gr.Radio(choices=["Male", "Female"], value="Male", label="Voice")
	generate_btn = gr.Button("Generate Speech", variant="primary", size="lg")
	with gr.Column():
	audio_output = gr.Audio(label="Generated Speech", type="filepath")

	generate_btn.click(fn=generate_speech, inputs=[text_input, lang_input, voice_input], outputs=audio_output)

	with gr.Tab("Batch Generation"):
	gr.Markdown("Separate text segments with `\|\|\|` for batch processing.")
	batch_text = gr.Textbox(label="Texts (\|\|\| separated)", lines=12,
	placeholder="First paragraph...\n\|\|\|\nSecond paragraph...\n\|\|\|\nThird paragraph...")
	with gr.Row():
	batch_lang = gr.Dropdown(choices=SUPPORTED_LANGUAGES, value="en", label="Language")
	batch_voice = gr.Radio(choices=["Male", "Female"], value="Male", label="Voice")
	batch_btn = gr.Button("Generate All Segments", variant="primary", size="lg")
	batch_output = gr.File(label="Generated Audio Files", file_count="multiple")

	batch_btn.click(fn=batch_generate, inputs=[batch_text, batch_lang, batch_voice], outputs=batch_output)

	gr.Markdown("---")
	gr.Markdown("Voices: " + " \| ".join([f"{LANG_NAMES[k]}: {v}" for k, v in VOICES.items()]))
	gr.Markdown("WitFoo Training Program — 9 certification courses, 6 languages, 178 lessons")

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)