Spaces:

Edmon02
/

SpeechT5_hy

Runtime error

App Files Files Community

SpeechT5_hy / app.py

Edmon02

feat: Implement project organization plan and optimize TTS deployment

3f1840e 16 days ago

raw

history blame contribute delete

4.04 kB

	"""
	Armenian TTS - Minimal HF Spaces Version
	=======================================

	Absolutely minimal version to avoid all possible compatibility issues.
	"""

	import gradio as gr
	import numpy as np
	import logging
	import os
	import sys

	# Simple logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def setup_pipeline():
	"""Setup TTS pipeline with maximum error handling."""
	try:
	# Add source path
	current_dir = os.path.dirname(os.path.abspath(__file__))
	src_path = os.path.join(current_dir, 'src')
	if src_path not in sys.path:
	sys.path.insert(0, src_path)

	# Try to import and initialize
	from src.pipeline import TTSPipeline

	pipeline = TTSPipeline(
	model_checkpoint="Edmon02/TTS_NB_2",
	max_chunk_length=200,
	use_mixed_precision=True
	)
	pipeline.optimize_for_production()
	logger.info("TTS pipeline initialized successfully")
	return pipeline, True

	except Exception as e:
	logger.error(f"Pipeline initialization failed: {e}")
	return None, False

	def tts_process(text):
	"""Process text to speech with complete error handling."""
	global tts_pipeline, pipeline_available

	# Basic input validation
	if not text or not isinstance(text, str) or len(text.strip()) == 0:
	# Return 1 second of silence
	return 16000, np.zeros(16000, dtype=np.int16)

	text = text.strip()

	# If no pipeline available, create a simple audio response
	if not pipeline_available or tts_pipeline is None:
	logger.info(f"Using fallback for text: {text[:30]}...")

	# Create simple fallback audio
	duration = min(len(text) * 0.08, 4.0) # Max 4 seconds
	sample_rate = 16000
	samples = int(duration * sample_rate)

	if samples <= 0:
	return sample_rate, np.zeros(8000, dtype=np.int16)

	# Generate a simple pleasant tone
	t = np.linspace(0, duration, samples)
	frequency = 440 # A4 note
	audio = np.sin(2 * np.pi * frequency * t) * 0.2

	# Add some harmonics for richer sound
	audio += np.sin(2 * np.pi * frequency * 2 * t) * 0.1
	audio += np.sin(2 * np.pi * frequency * 3 * t) * 0.05

	# Apply simple envelope
	envelope = np.exp(-t * 2) # Exponential decay
	audio *= envelope

	# Convert to int16
	audio_int16 = (audio * 32767).astype(np.int16)
	return sample_rate, audio_int16

	# Try real TTS
	try:
	logger.info(f"Synthesizing: {text[:50]}...")

	sample_rate, audio = tts_pipeline.synthesize(
	text=text,
	speaker="BDL",
	enable_chunking=True,
	apply_audio_processing=True
	)

	logger.info(f"Successfully generated {len(audio)} samples")
	return sample_rate, audio

	except Exception as e:
	logger.error(f"TTS synthesis failed: {e}")
	# Fallback to silence
	return 16000, np.zeros(8000, dtype=np.int16)

	# Initialize the pipeline once
	logger.info("Initializing Armenian TTS application...")
	tts_pipeline, pipeline_available = setup_pipeline()

	if pipeline_available:
	title = "🇦🇲 Armenian Text-to-Speech (Ready)"
	description = "Convert Armenian text to speech using SpeechT5."
	else:
	title = "🇦🇲 Armenian TTS (Test Mode)"
	description = "TTS system in test mode - will generate simple audio tones."

	# Create the simplest possible Gradio interface
	app = gr.Interface(
	fn=tts_process,
	inputs="text",
	outputs="audio",
	title=title,
	description=description,
	examples=[
	"Բարև ձեզ",
	"Շնորհակալություն",
	"Ինչպե՞ս եք"
	]
	)

	# Launch the app
	if __name__ == "__main__":
	app.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)