SpeechT5_hy / archive /app_minimal.py
Edmon02's picture
feat: Implement project organization plan and optimize TTS deployment
3f1840e
"""
SpeechT5 Armenian TTS - Minimal HF Spaces Version
================================================
Ultra-minimal version to avoid Gradio schema issues.
"""
import gradio as gr
import numpy as np
import logging
import os
import sys
# Setup basic logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Global pipeline
pipeline = None
def init_pipeline():
"""Initialize pipeline with maximum error protection."""
global pipeline
try:
# Add src path
current_dir = os.path.dirname(os.path.abspath(__file__))
src_path = os.path.join(current_dir, 'src')
if src_path not in sys.path:
sys.path.insert(0, src_path)
from src.pipeline import TTSPipeline
pipeline = TTSPipeline(
model_checkpoint="Edmon02/TTS_NB_2",
max_chunk_length=200,
use_mixed_precision=True
)
pipeline.optimize_for_production()
logger.info("Pipeline initialized")
return True
except Exception as e:
logger.error(f"Pipeline init failed: {e}")
# Create minimal fallback
class DummyPipeline:
def synthesize(self, text, **kwargs):
# Generate a simple tone
duration = min(len(text) * 0.1, 2.0)
sr = 16000
samples = int(duration * sr)
t = np.linspace(0, duration, samples)
audio = np.sin(2 * np.pi * 440 * t) * 0.1
return sr, (audio * 32767).astype(np.int16)
pipeline = DummyPipeline()
return False
def tts_function(text):
"""Main TTS function with minimal interface."""
if not text or not text.strip():
# Return short silence
return 16000, np.zeros(8000, dtype=np.int16)
try:
logger.info(f"Processing: {text[:30]}...")
sr, audio = pipeline.synthesize(
text=text,
speaker="BDL",
enable_chunking=True,
apply_audio_processing=True
)
logger.info(f"Generated {len(audio)} samples")
return sr, audio
except Exception as e:
logger.error(f"TTS error: {e}")
# Return silence on error
return 16000, np.zeros(8000, dtype=np.int16)
# Initialize pipeline
logger.info("Starting TTS app...")
success = init_pipeline()
status = "✅ Ready" if success else "⚠️ Test Mode"
# Create minimal interface
iface = gr.Interface(
fn=tts_function,
inputs=gr.inputs.Textbox(
lines=2,
placeholder="Enter Armenian text...",
label="Armenian Text"
),
outputs=gr.outputs.Audio(label="Speech"),
title=f"🎤 Armenian TTS {status}",
description="Convert Armenian text to speech.",
examples=[
"Բարև ձեզ:",
"Ինչպե՞ս եք:",
"Շնորհակալություն:",
]
)
# Launch with minimal config
if __name__ == "__main__":
iface.launch(
server_name="0.0.0.0",
server_port=7860
)