Spaces:
Runtime error
Runtime error
""" | |
Armenian TTS - Minimal HF Spaces Version | |
======================================= | |
Absolutely minimal version to avoid all possible compatibility issues. | |
""" | |
import gradio as gr | |
import numpy as np | |
import logging | |
import os | |
import sys | |
# Simple logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
def setup_pipeline(): | |
"""Setup TTS pipeline with maximum error handling.""" | |
try: | |
# Add source path | |
current_dir = os.path.dirname(os.path.abspath(__file__)) | |
src_path = os.path.join(current_dir, 'src') | |
if src_path not in sys.path: | |
sys.path.insert(0, src_path) | |
# Try to import and initialize | |
from src.pipeline import TTSPipeline | |
pipeline = TTSPipeline( | |
model_checkpoint="Edmon02/TTS_NB_2", | |
max_chunk_length=200, | |
use_mixed_precision=True | |
) | |
pipeline.optimize_for_production() | |
logger.info("TTS pipeline initialized successfully") | |
return pipeline, True | |
except Exception as e: | |
logger.error(f"Pipeline initialization failed: {e}") | |
return None, False | |
def tts_process(text): | |
"""Process text to speech with complete error handling.""" | |
global tts_pipeline, pipeline_available | |
# Basic input validation | |
if not text or not isinstance(text, str) or len(text.strip()) == 0: | |
# Return 1 second of silence | |
return 16000, np.zeros(16000, dtype=np.int16) | |
text = text.strip() | |
# If no pipeline available, create a simple audio response | |
if not pipeline_available or tts_pipeline is None: | |
logger.info(f"Using fallback for text: {text[:30]}...") | |
# Create simple fallback audio | |
duration = min(len(text) * 0.08, 4.0) # Max 4 seconds | |
sample_rate = 16000 | |
samples = int(duration * sample_rate) | |
if samples <= 0: | |
return sample_rate, np.zeros(8000, dtype=np.int16) | |
# Generate a simple pleasant tone | |
t = np.linspace(0, duration, samples) | |
frequency = 440 # A4 note | |
audio = np.sin(2 * np.pi * frequency * t) * 0.2 | |
# Add some harmonics for richer sound | |
audio += np.sin(2 * np.pi * frequency * 2 * t) * 0.1 | |
audio += np.sin(2 * np.pi * frequency * 3 * t) * 0.05 | |
# Apply simple envelope | |
envelope = np.exp(-t * 2) # Exponential decay | |
audio *= envelope | |
# Convert to int16 | |
audio_int16 = (audio * 32767).astype(np.int16) | |
return sample_rate, audio_int16 | |
# Try real TTS | |
try: | |
logger.info(f"Synthesizing: {text[:50]}...") | |
sample_rate, audio = tts_pipeline.synthesize( | |
text=text, | |
speaker="BDL", | |
enable_chunking=True, | |
apply_audio_processing=True | |
) | |
logger.info(f"Successfully generated {len(audio)} samples") | |
return sample_rate, audio | |
except Exception as e: | |
logger.error(f"TTS synthesis failed: {e}") | |
# Fallback to silence | |
return 16000, np.zeros(8000, dtype=np.int16) | |
# Initialize the pipeline once | |
logger.info("Initializing Armenian TTS application...") | |
tts_pipeline, pipeline_available = setup_pipeline() | |
if pipeline_available: | |
title = "🇦🇲 Armenian Text-to-Speech (Ready)" | |
description = "Convert Armenian text to speech using SpeechT5." | |
else: | |
title = "🇦🇲 Armenian TTS (Test Mode)" | |
description = "TTS system in test mode - will generate simple audio tones." | |
# Create the simplest possible Gradio interface | |
app = gr.Interface( | |
fn=tts_process, | |
inputs="text", | |
outputs="audio", | |
title=title, | |
description=description, | |
examples=[ | |
"Բարև ձեզ", | |
"Շնորհակալություն", | |
"Ինչպե՞ս եք" | |
] | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
app.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=False | |
) | |