Spaces:

Edmon02
/

SpeechT5_hy

Runtime error

App Files Files Community

SpeechT5_hy / archive /app_simple.py

Edmon02

feat: Implement project organization plan and optimize TTS deployment

3f1840e 25 days ago

raw

history blame contribute delete

6.43 kB

	"""
	SpeechT5 Armenian TTS - HuggingFace Spaces Deployment Version
	============================================================

	Simplified and optimized for HuggingFace Spaces deployment.
	"""

	import gradio as gr
	import numpy as np
	import logging
	import time
	from typing import Tuple, Optional
	import os
	import sys

	# Add src to path for imports
	current_dir = os.path.dirname(os.path.abspath(__file__))
	src_path = os.path.join(current_dir, 'src')
	if src_path not in sys.path:
	sys.path.insert(0, src_path)

	try:
	from src.pipeline import TTSPipeline
	HAS_PIPELINE = True
	except ImportError as e:
	logging.error(f"Failed to import pipeline: {e}")
	# Fallback import attempt
	sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
	try:
	from src.pipeline import TTSPipeline
	HAS_PIPELINE = True
	except ImportError:
	HAS_PIPELINE = False
	# Create a dummy pipeline for testing
	class TTSPipeline:
	def __init__(self, args, *kwargs):
	pass
	def synthesize(self, text, **kwargs):
	# Return dummy audio for testing
	duration = min(len(text) * 0.1, 5.0) # Approximate duration
	sample_rate = 16000
	samples = int(duration * sample_rate)
	# Generate a simple sine wave as placeholder
	t = np.linspace(0, duration, samples)
	frequency = 440 # A4 note
	audio = (np.sin(2 * np.pi * frequency * t) * 0.3).astype(np.float32)
	return sample_rate, (audio * 32767).astype(np.int16)
	def optimize_for_production(self):
	pass

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	# Global pipeline instance
	tts_pipeline: Optional[TTSPipeline] = None


	def initialize_pipeline():
	"""Initialize the TTS pipeline with error handling."""
	global tts_pipeline

	if not HAS_PIPELINE:
	logger.warning("Pipeline not available - using dummy implementation")
	tts_pipeline = TTSPipeline()
	return True

	try:
	logger.info("Initializing TTS Pipeline...")
	tts_pipeline = TTSPipeline(
	model_checkpoint="Edmon02/TTS_NB_2",
	max_chunk_length=200,
	crossfade_duration=0.1,
	use_mixed_precision=True
	)

	# Apply production optimizations
	tts_pipeline.optimize_for_production()

	logger.info("TTS Pipeline initialized successfully")
	return True

	except Exception as e:
	logger.error(f"Failed to initialize TTS pipeline: {e}")
	# Fallback to dummy pipeline
	tts_pipeline = TTSPipeline()
	return False


	def generate_speech(text: str) -> Tuple[int, np.ndarray]:
	"""
	Main synthesis function optimized for HF Spaces.

	Args:
	text: Input text to synthesize

	Returns:
	Tuple of (sample_rate, audio_array)
	"""
	global tts_pipeline

	start_time = time.time()

	try:
	# Validate inputs
	if not text or not text.strip():
	logger.warning("Empty text provided")
	return 16000, np.zeros(1000, dtype=np.int16)

	if tts_pipeline is None:
	logger.error("TTS pipeline not initialized")
	return 16000, np.zeros(1000, dtype=np.int16)

	# Log request
	logger.info(f"Processing request: {len(text)} characters")

	# Synthesize speech with default settings
	sample_rate, audio = tts_pipeline.synthesize(
	text=text,
	speaker="BDL",
	enable_chunking=True,
	apply_audio_processing=True
	)

	# Log performance
	total_time = time.time() - start_time
	logger.info(f"Request completed in {total_time:.3f}s")

	return sample_rate, audio

	except Exception as e:
	logger.error(f"Synthesis failed: {e}")
	return 16000, np.zeros(1000, dtype=np.int16)


	# Create the Gradio interface
	def create_app():
	"""Create the main Gradio application."""

	# Simple interface definition
	interface = gr.Interface(
	fn=generate_speech,
	inputs=[
	gr.Textbox(
	label="Armenian Text",
	placeholder="Մուտքագրեք ձեր տեքստը այստեղ...",
	lines=3,
	max_lines=10
	)
	],
	outputs=[
	gr.Audio(
	label="Generated Speech",
	type="numpy"
	)
	],
	title="🎤 SpeechT5 Armenian Text-to-Speech",
	description="""
	Convert Armenian text to natural speech using SpeechT5.

	Instructions:
	1. Enter Armenian text in the input box
	2. Click Submit to generate speech
	3. Listen to the generated audio

	Tips:
	- Works best with standard Armenian orthography
	- Shorter sentences produce better quality
	- Include proper punctuation for natural pauses
	""",
	examples=[
	["Բարև ձեզ, ինչպե՞ս եք:"],
	["Այսօր գեղեցիկ օր է:"],
	["Հայաստանն ունի հարուստ պատմություն:"],
	["Երևանը Հայաստանի մայրաքաղաքն է:"],
	["Արարատ լեռը Հայաստանի խորհրդանիշն է:"]
	],
	theme=gr.themes.Soft(),
	allow_flagging="never", # Disable flagging to avoid schema issues
	cache_examples=False # Disable example caching
	)

	return interface


	def main():
	"""Main application entry point."""
	logger.info("Starting SpeechT5 Armenian TTS Application")

	# Initialize pipeline
	if not initialize_pipeline():
	logger.error("Failed to initialize TTS pipeline - continuing with limited functionality")

	# Create and launch interface
	app = create_app()

	# Launch with HF Spaces settings
	app.launch(
	share=False, # Don't create share link on HF Spaces
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True
	)


	if __name__ == "__main__":
	main()