Spaces:

Edmon02
/

SpeechT5_hy

Runtime error

App Files Files Community

SpeechT5_hy / archive /app_final.py

Edmon02

feat: Implement project organization plan and optimize TTS deployment

3f1840e 25 days ago

raw

history blame contribute delete

5.7 kB

	"""
	Armenian TTS - HuggingFace Spaces Compatible
	===========================================

	Final version optimized for HF Spaces with Gradio 3.x compatibility.
	"""

	import gradio as gr
	import numpy as np
	import logging
	import os
	import sys

	# Minimal logging setup
	logger = logging.getLogger(__name__)
	logging.basicConfig(level=logging.INFO)

	# Global variables
	tts_pipeline = None
	pipeline_ready = False

	def initialize_tts():
	"""Initialize TTS pipeline with comprehensive error handling."""
	global tts_pipeline, pipeline_ready

	try:
	# Setup path for imports
	current_dir = os.path.dirname(os.path.abspath(__file__))
	src_path = os.path.join(current_dir, 'src')
	if src_path not in sys.path:
	sys.path.insert(0, src_path)

	# Import and initialize pipeline
	from src.pipeline import TTSPipeline

	logger.info("Initializing TTS pipeline...")
	tts_pipeline = TTSPipeline(
	model_checkpoint="Edmon02/TTS_NB_2",
	max_chunk_length=200,
	crossfade_duration=0.1,
	use_mixed_precision=True
	)

	# Apply optimizations
	tts_pipeline.optimize_for_production()
	pipeline_ready = True
	logger.info("TTS pipeline ready!")
	return True

	except Exception as e:
	logger.error(f"Failed to initialize TTS pipeline: {e}")
	pipeline_ready = False
	return False

	def synthesize_speech(text):
	"""
	Main synthesis function with fallback handling.

	Args:
	text (str): Armenian text to synthesize

	Returns:
	tuple: (sample_rate, audio_array)
	"""
	# Validate input
	if not text or not isinstance(text, str) or not text.strip():
	return create_silence(1.0)

	# Check pipeline status
	if not pipeline_ready or tts_pipeline is None:
	logger.warning("Pipeline not ready, generating fallback audio")
	return create_fallback_audio(text)

	try:
	logger.info(f"Synthesizing: {text[:50]}...")

	# Generate speech using pipeline
	sample_rate, audio = tts_pipeline.synthesize(
	text=text.strip(),
	speaker="BDL",
	enable_chunking=True,
	apply_audio_processing=True
	)

	logger.info(f"Generated {len(audio)} samples at {sample_rate}Hz")
	return sample_rate, audio

	except Exception as e:
	logger.error(f"Synthesis error: {e}")
	return create_fallback_audio(text)

	def create_silence(duration_seconds=1.0):
	"""Create silence audio."""
	sample_rate = 16000
	samples = int(duration_seconds * sample_rate)
	return sample_rate, np.zeros(samples, dtype=np.int16)

	def create_fallback_audio(text):
	"""Create simple fallback audio based on text."""
	# Calculate duration based on text length
	duration = min(max(len(text) * 0.1, 0.5), 5.0)
	sample_rate = 16000
	samples = int(duration * sample_rate)

	# Generate simple tone
	t = np.linspace(0, duration, samples)

	# Create a pleasant tone sequence
	base_freq = 440 # A4
	audio = np.sin(2 * np.pi * base_freq * t) * 0.3

	# Add some variation for longer texts
	if len(text) > 20:
	audio += np.sin(2 * np.pi * (base_freq * 1.5) * t) * 0.2

	# Apply fade in/out
	fade_samples = min(samples // 10, 1000)
	if fade_samples > 0:
	fade_in = np.linspace(0, 1, fade_samples)
	fade_out = np.linspace(1, 0, fade_samples)
	audio[:fade_samples] *= fade_in
	audio[-fade_samples:] *= fade_out

	# Convert to int16
	audio_int16 = (audio * 32767).astype(np.int16)

	return sample_rate, audio_int16

	# Initialize the pipeline
	logger.info("Starting Armenian TTS application...")
	init_success = initialize_tts()

	if init_success:
	app_status = "🟢 TTS System Ready"
	app_description = """
	🎤 Armenian Text-to-Speech System

	Convert Armenian text to natural speech using SpeechT5.

	How to use:
	1. Enter Armenian text in the box below
	2. Click Submit to generate speech
	3. Play the generated audio

	Tips for best results:
	- Use standard Armenian script (Unicode)
	- Include punctuation for natural pauses
	- Shorter sentences work better for quality
	"""
	else:
	app_status = "🟡 Test Mode (Limited Functionality)"
	app_description = """
	🎤 Armenian Text-to-Speech System - Test Mode

	The TTS system is running in test mode with limited functionality.
	Text input will generate simple audio tones as placeholders.
	"""

	# Create the Gradio interface using Gradio 3.x syntax
	demo = gr.Interface(
	fn=synthesize_speech,
	inputs=gr.inputs.Textbox(
	lines=3,
	placeholder="Մուտքագրեք ձեր հայերեն տեքստը այստեղ...",
	label="Armenian Text Input"
	),
	outputs=gr.outputs.Audio(
	label="Generated Speech"
	),
	title=f"🇦🇲 Armenian Text-to-Speech {app_status}",
	description=app_description,
	examples=[
	"Բարև ձեզ, ինչպե՞ս եք:",
	"Այսօր գեղեցիկ օր է:",
	"Շնորհակալություն:",
	"Հայաստան իմ սիրելի երկիրն է:",
	"Երևանը Հայաստանի մայրաքաղաքն է:"
	],
	theme="default",
	allow_screenshot=False,
	allow_flagging="never"
	)

	# Launch the application
	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	debug=False,
	quiet=False
	)