SpeechT5_hy / archive /app_simple.py
Edmon02's picture
feat: Implement project organization plan and optimize TTS deployment
3f1840e
"""
SpeechT5 Armenian TTS - HuggingFace Spaces Deployment Version
============================================================
Simplified and optimized for HuggingFace Spaces deployment.
"""
import gradio as gr
import numpy as np
import logging
import time
from typing import Tuple, Optional
import os
import sys
# Add src to path for imports
current_dir = os.path.dirname(os.path.abspath(__file__))
src_path = os.path.join(current_dir, 'src')
if src_path not in sys.path:
sys.path.insert(0, src_path)
try:
from src.pipeline import TTSPipeline
HAS_PIPELINE = True
except ImportError as e:
logging.error(f"Failed to import pipeline: {e}")
# Fallback import attempt
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
try:
from src.pipeline import TTSPipeline
HAS_PIPELINE = True
except ImportError:
HAS_PIPELINE = False
# Create a dummy pipeline for testing
class TTSPipeline:
def __init__(self, *args, **kwargs):
pass
def synthesize(self, text, **kwargs):
# Return dummy audio for testing
duration = min(len(text) * 0.1, 5.0) # Approximate duration
sample_rate = 16000
samples = int(duration * sample_rate)
# Generate a simple sine wave as placeholder
t = np.linspace(0, duration, samples)
frequency = 440 # A4 note
audio = (np.sin(2 * np.pi * frequency * t) * 0.3).astype(np.float32)
return sample_rate, (audio * 32767).astype(np.int16)
def optimize_for_production(self):
pass
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Global pipeline instance
tts_pipeline: Optional[TTSPipeline] = None
def initialize_pipeline():
"""Initialize the TTS pipeline with error handling."""
global tts_pipeline
if not HAS_PIPELINE:
logger.warning("Pipeline not available - using dummy implementation")
tts_pipeline = TTSPipeline()
return True
try:
logger.info("Initializing TTS Pipeline...")
tts_pipeline = TTSPipeline(
model_checkpoint="Edmon02/TTS_NB_2",
max_chunk_length=200,
crossfade_duration=0.1,
use_mixed_precision=True
)
# Apply production optimizations
tts_pipeline.optimize_for_production()
logger.info("TTS Pipeline initialized successfully")
return True
except Exception as e:
logger.error(f"Failed to initialize TTS pipeline: {e}")
# Fallback to dummy pipeline
tts_pipeline = TTSPipeline()
return False
def generate_speech(text: str) -> Tuple[int, np.ndarray]:
"""
Main synthesis function optimized for HF Spaces.
Args:
text: Input text to synthesize
Returns:
Tuple of (sample_rate, audio_array)
"""
global tts_pipeline
start_time = time.time()
try:
# Validate inputs
if not text or not text.strip():
logger.warning("Empty text provided")
return 16000, np.zeros(1000, dtype=np.int16)
if tts_pipeline is None:
logger.error("TTS pipeline not initialized")
return 16000, np.zeros(1000, dtype=np.int16)
# Log request
logger.info(f"Processing request: {len(text)} characters")
# Synthesize speech with default settings
sample_rate, audio = tts_pipeline.synthesize(
text=text,
speaker="BDL",
enable_chunking=True,
apply_audio_processing=True
)
# Log performance
total_time = time.time() - start_time
logger.info(f"Request completed in {total_time:.3f}s")
return sample_rate, audio
except Exception as e:
logger.error(f"Synthesis failed: {e}")
return 16000, np.zeros(1000, dtype=np.int16)
# Create the Gradio interface
def create_app():
"""Create the main Gradio application."""
# Simple interface definition
interface = gr.Interface(
fn=generate_speech,
inputs=[
gr.Textbox(
label="Armenian Text",
placeholder="Մուտքագրեք ձեր տեքստը այստեղ...",
lines=3,
max_lines=10
)
],
outputs=[
gr.Audio(
label="Generated Speech",
type="numpy"
)
],
title="🎤 SpeechT5 Armenian Text-to-Speech",
description="""
Convert Armenian text to natural speech using SpeechT5.
**Instructions:**
1. Enter Armenian text in the input box
2. Click Submit to generate speech
3. Listen to the generated audio
**Tips:**
- Works best with standard Armenian orthography
- Shorter sentences produce better quality
- Include proper punctuation for natural pauses
""",
examples=[
["Բարև ձեզ, ինչպե՞ս եք:"],
["Այսօր գեղեցիկ օր է:"],
["Հայաստանն ունի հարուստ պատմություն:"],
["Երևանը Հայաստանի մայրաքաղաքն է:"],
["Արարատ լեռը Հայաստանի խորհրդանիշն է:"]
],
theme=gr.themes.Soft(),
allow_flagging="never", # Disable flagging to avoid schema issues
cache_examples=False # Disable example caching
)
return interface
def main():
"""Main application entry point."""
logger.info("Starting SpeechT5 Armenian TTS Application")
# Initialize pipeline
if not initialize_pipeline():
logger.error("Failed to initialize TTS pipeline - continuing with limited functionality")
# Create and launch interface
app = create_app()
# Launch with HF Spaces settings
app.launch(
share=False, # Don't create share link on HF Spaces
server_name="0.0.0.0",
server_port=7860,
show_error=True
)
if __name__ == "__main__":
main()