Spaces:
Runtime error
Runtime error
""" | |
SpeechT5 Armenian TTS - HuggingFace Spaces Deployment Version | |
============================================================ | |
Simplified and optimized for HuggingFace Spaces deployment. | |
""" | |
import gradio as gr | |
import numpy as np | |
import logging | |
import time | |
from typing import Tuple, Optional | |
import os | |
import sys | |
# Add src to path for imports | |
current_dir = os.path.dirname(os.path.abspath(__file__)) | |
src_path = os.path.join(current_dir, 'src') | |
if src_path not in sys.path: | |
sys.path.insert(0, src_path) | |
try: | |
from src.pipeline import TTSPipeline | |
HAS_PIPELINE = True | |
except ImportError as e: | |
logging.error(f"Failed to import pipeline: {e}") | |
# Fallback import attempt | |
sys.path.append(os.path.join(os.path.dirname(__file__), 'src')) | |
try: | |
from src.pipeline import TTSPipeline | |
HAS_PIPELINE = True | |
except ImportError: | |
HAS_PIPELINE = False | |
# Create a dummy pipeline for testing | |
class TTSPipeline: | |
def __init__(self, *args, **kwargs): | |
pass | |
def synthesize(self, text, **kwargs): | |
# Return dummy audio for testing | |
duration = min(len(text) * 0.1, 5.0) # Approximate duration | |
sample_rate = 16000 | |
samples = int(duration * sample_rate) | |
# Generate a simple sine wave as placeholder | |
t = np.linspace(0, duration, samples) | |
frequency = 440 # A4 note | |
audio = (np.sin(2 * np.pi * frequency * t) * 0.3).astype(np.float32) | |
return sample_rate, (audio * 32767).astype(np.int16) | |
def optimize_for_production(self): | |
pass | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
# Global pipeline instance | |
tts_pipeline: Optional[TTSPipeline] = None | |
def initialize_pipeline(): | |
"""Initialize the TTS pipeline with error handling.""" | |
global tts_pipeline | |
if not HAS_PIPELINE: | |
logger.warning("Pipeline not available - using dummy implementation") | |
tts_pipeline = TTSPipeline() | |
return True | |
try: | |
logger.info("Initializing TTS Pipeline...") | |
tts_pipeline = TTSPipeline( | |
model_checkpoint="Edmon02/TTS_NB_2", | |
max_chunk_length=200, | |
crossfade_duration=0.1, | |
use_mixed_precision=True | |
) | |
# Apply production optimizations | |
tts_pipeline.optimize_for_production() | |
logger.info("TTS Pipeline initialized successfully") | |
return True | |
except Exception as e: | |
logger.error(f"Failed to initialize TTS pipeline: {e}") | |
# Fallback to dummy pipeline | |
tts_pipeline = TTSPipeline() | |
return False | |
def generate_speech(text: str) -> Tuple[int, np.ndarray]: | |
""" | |
Main synthesis function optimized for HF Spaces. | |
Args: | |
text: Input text to synthesize | |
Returns: | |
Tuple of (sample_rate, audio_array) | |
""" | |
global tts_pipeline | |
start_time = time.time() | |
try: | |
# Validate inputs | |
if not text or not text.strip(): | |
logger.warning("Empty text provided") | |
return 16000, np.zeros(1000, dtype=np.int16) | |
if tts_pipeline is None: | |
logger.error("TTS pipeline not initialized") | |
return 16000, np.zeros(1000, dtype=np.int16) | |
# Log request | |
logger.info(f"Processing request: {len(text)} characters") | |
# Synthesize speech with default settings | |
sample_rate, audio = tts_pipeline.synthesize( | |
text=text, | |
speaker="BDL", | |
enable_chunking=True, | |
apply_audio_processing=True | |
) | |
# Log performance | |
total_time = time.time() - start_time | |
logger.info(f"Request completed in {total_time:.3f}s") | |
return sample_rate, audio | |
except Exception as e: | |
logger.error(f"Synthesis failed: {e}") | |
return 16000, np.zeros(1000, dtype=np.int16) | |
# Create the Gradio interface | |
def create_app(): | |
"""Create the main Gradio application.""" | |
# Simple interface definition | |
interface = gr.Interface( | |
fn=generate_speech, | |
inputs=[ | |
gr.Textbox( | |
label="Armenian Text", | |
placeholder="Մուտքագրեք ձեր տեքստը այստեղ...", | |
lines=3, | |
max_lines=10 | |
) | |
], | |
outputs=[ | |
gr.Audio( | |
label="Generated Speech", | |
type="numpy" | |
) | |
], | |
title="🎤 SpeechT5 Armenian Text-to-Speech", | |
description=""" | |
Convert Armenian text to natural speech using SpeechT5. | |
**Instructions:** | |
1. Enter Armenian text in the input box | |
2. Click Submit to generate speech | |
3. Listen to the generated audio | |
**Tips:** | |
- Works best with standard Armenian orthography | |
- Shorter sentences produce better quality | |
- Include proper punctuation for natural pauses | |
""", | |
examples=[ | |
["Բարև ձեզ, ինչպե՞ս եք:"], | |
["Այսօր գեղեցիկ օր է:"], | |
["Հայաստանն ունի հարուստ պատմություն:"], | |
["Երևանը Հայաստանի մայրաքաղաքն է:"], | |
["Արարատ լեռը Հայաստանի խորհրդանիշն է:"] | |
], | |
theme=gr.themes.Soft(), | |
allow_flagging="never", # Disable flagging to avoid schema issues | |
cache_examples=False # Disable example caching | |
) | |
return interface | |
def main(): | |
"""Main application entry point.""" | |
logger.info("Starting SpeechT5 Armenian TTS Application") | |
# Initialize pipeline | |
if not initialize_pipeline(): | |
logger.error("Failed to initialize TTS pipeline - continuing with limited functionality") | |
# Create and launch interface | |
app = create_app() | |
# Launch with HF Spaces settings | |
app.launch( | |
share=False, # Don't create share link on HF Spaces | |
server_name="0.0.0.0", | |
server_port=7860, | |
show_error=True | |
) | |
if __name__ == "__main__": | |
main() | |