Spaces:
Runtime error
Runtime error
""" | |
Armenian TTS - HuggingFace Spaces Compatible | |
=========================================== | |
Final version optimized for HF Spaces with Gradio 3.x compatibility. | |
""" | |
import gradio as gr | |
import numpy as np | |
import logging | |
import os | |
import sys | |
# Minimal logging setup | |
logger = logging.getLogger(__name__) | |
logging.basicConfig(level=logging.INFO) | |
# Global variables | |
tts_pipeline = None | |
pipeline_ready = False | |
def initialize_tts(): | |
"""Initialize TTS pipeline with comprehensive error handling.""" | |
global tts_pipeline, pipeline_ready | |
try: | |
# Setup path for imports | |
current_dir = os.path.dirname(os.path.abspath(__file__)) | |
src_path = os.path.join(current_dir, 'src') | |
if src_path not in sys.path: | |
sys.path.insert(0, src_path) | |
# Import and initialize pipeline | |
from src.pipeline import TTSPipeline | |
logger.info("Initializing TTS pipeline...") | |
tts_pipeline = TTSPipeline( | |
model_checkpoint="Edmon02/TTS_NB_2", | |
max_chunk_length=200, | |
crossfade_duration=0.1, | |
use_mixed_precision=True | |
) | |
# Apply optimizations | |
tts_pipeline.optimize_for_production() | |
pipeline_ready = True | |
logger.info("TTS pipeline ready!") | |
return True | |
except Exception as e: | |
logger.error(f"Failed to initialize TTS pipeline: {e}") | |
pipeline_ready = False | |
return False | |
def synthesize_speech(text): | |
""" | |
Main synthesis function with fallback handling. | |
Args: | |
text (str): Armenian text to synthesize | |
Returns: | |
tuple: (sample_rate, audio_array) | |
""" | |
# Validate input | |
if not text or not isinstance(text, str) or not text.strip(): | |
return create_silence(1.0) | |
# Check pipeline status | |
if not pipeline_ready or tts_pipeline is None: | |
logger.warning("Pipeline not ready, generating fallback audio") | |
return create_fallback_audio(text) | |
try: | |
logger.info(f"Synthesizing: {text[:50]}...") | |
# Generate speech using pipeline | |
sample_rate, audio = tts_pipeline.synthesize( | |
text=text.strip(), | |
speaker="BDL", | |
enable_chunking=True, | |
apply_audio_processing=True | |
) | |
logger.info(f"Generated {len(audio)} samples at {sample_rate}Hz") | |
return sample_rate, audio | |
except Exception as e: | |
logger.error(f"Synthesis error: {e}") | |
return create_fallback_audio(text) | |
def create_silence(duration_seconds=1.0): | |
"""Create silence audio.""" | |
sample_rate = 16000 | |
samples = int(duration_seconds * sample_rate) | |
return sample_rate, np.zeros(samples, dtype=np.int16) | |
def create_fallback_audio(text): | |
"""Create simple fallback audio based on text.""" | |
# Calculate duration based on text length | |
duration = min(max(len(text) * 0.1, 0.5), 5.0) | |
sample_rate = 16000 | |
samples = int(duration * sample_rate) | |
# Generate simple tone | |
t = np.linspace(0, duration, samples) | |
# Create a pleasant tone sequence | |
base_freq = 440 # A4 | |
audio = np.sin(2 * np.pi * base_freq * t) * 0.3 | |
# Add some variation for longer texts | |
if len(text) > 20: | |
audio += np.sin(2 * np.pi * (base_freq * 1.5) * t) * 0.2 | |
# Apply fade in/out | |
fade_samples = min(samples // 10, 1000) | |
if fade_samples > 0: | |
fade_in = np.linspace(0, 1, fade_samples) | |
fade_out = np.linspace(1, 0, fade_samples) | |
audio[:fade_samples] *= fade_in | |
audio[-fade_samples:] *= fade_out | |
# Convert to int16 | |
audio_int16 = (audio * 32767).astype(np.int16) | |
return sample_rate, audio_int16 | |
# Initialize the pipeline | |
logger.info("Starting Armenian TTS application...") | |
init_success = initialize_tts() | |
if init_success: | |
app_status = "🟢 TTS System Ready" | |
app_description = """ | |
🎤 **Armenian Text-to-Speech System** | |
Convert Armenian text to natural speech using SpeechT5. | |
**How to use:** | |
1. Enter Armenian text in the box below | |
2. Click Submit to generate speech | |
3. Play the generated audio | |
**Tips for best results:** | |
- Use standard Armenian script (Unicode) | |
- Include punctuation for natural pauses | |
- Shorter sentences work better for quality | |
""" | |
else: | |
app_status = "🟡 Test Mode (Limited Functionality)" | |
app_description = """ | |
🎤 **Armenian Text-to-Speech System - Test Mode** | |
The TTS system is running in test mode with limited functionality. | |
Text input will generate simple audio tones as placeholders. | |
""" | |
# Create the Gradio interface using Gradio 3.x syntax | |
demo = gr.Interface( | |
fn=synthesize_speech, | |
inputs=gr.inputs.Textbox( | |
lines=3, | |
placeholder="Մուտքագրեք ձեր հայերեն տեքստը այստեղ...", | |
label="Armenian Text Input" | |
), | |
outputs=gr.outputs.Audio( | |
label="Generated Speech" | |
), | |
title=f"🇦🇲 Armenian Text-to-Speech {app_status}", | |
description=app_description, | |
examples=[ | |
"Բարև ձեզ, ինչպե՞ս եք:", | |
"Այսօր գեղեցիկ օր է:", | |
"Շնորհակալություն:", | |
"Հայաստան իմ սիրելի երկիրն է:", | |
"Երևանը Հայաստանի մայրաքաղաքն է:" | |
], | |
theme="default", | |
allow_screenshot=False, | |
allow_flagging="never" | |
) | |
# Launch the application | |
if __name__ == "__main__": | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=False, | |
debug=False, | |
quiet=False | |
) | |