Spaces:

RinggAI
/

Ringg-TTS-v1.0

Running

App Files Files Community

utkarshshukla2912 commited on 19 days ago

Commit

69bfab8

1 Parent(s): 0df3466

base space

Browse files

Files changed (2) hide show

app.py +366 -0
logo.png +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,366 @@

+import gradio as gr
+import requests
+import json
+import os
+import base64
+# API Base URL
+BASE_URL = os.environ.get("BASE_URL", "http://localhost:8889")
+# Custom CSS for aesthetic design
+custom_css = """
+.health-status {
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    padding: 15px;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    border-radius: 12px;
+    margin-bottom: 20px;
+}
+.status-dot {
+    width: 20px;
+    height: 20px;
+    border-radius: 50%;
+    animation: pulse 2s infinite;
+    margin-top: 25px;
+    display: inline-block;
+}
+.status-dot-green {
+    background-color: #10b981;
+    box-shadow: 0 0 20px rgba(16, 185, 129, 0.6);
+}
+.status-dot-red {
+    background-color: #ef4444;
+    box-shadow: 0 0 20px rgba(239, 68, 68, 0.6);
+}
+@keyframes pulse {
+    0%, 100% {
+        opacity: 1;
+        transform: scale(1);
+    }
+    50% {
+        opacity: 0.6;
+        transform: scale(1.1);
+    }
+}
+.metric-card {
+    background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
+    padding: 15px;
+    border-radius: 10px;
+    margin: 5px;
+    color: white;
+    text-align: center;
+}
+.metric-label {
+    font-size: 12px;
+    opacity: 0.9;
+    margin-bottom: 5px;
+}
+.metric-value {
+    font-size: 24px;
+    font-weight: bold;
+}
+.voice-card {
+    background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
+    padding: 10px;
+    border-radius: 8px;
+    margin: 5px 0;
+}
+.gradio-container {
+    max-width: 1200px !important;
+    margin: auto !important;
+}
+.main-title {
+    text-align: center;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    background-clip: text;
+    font-size: 48px;
+    font-weight: bold;
+    margin-bottom: 20px;
+    margin-top: 0;
+}
+.subtitle {
+    text-align: center;
+    color: #666;
+    font-size: 18px;
+    margin-bottom: 30px;
+}
+/* Title bar behind the heading text */
+.title-bar {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 15px;
+    margin-bottom: 20px;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    border-radius: 12px;
+    padding: 8px 16px;
+    height: 60px;           /* compact header height */
+    min-height: 60px;
+    overflow: hidden;
+}
+/* Make title text solid and place gradient behind it */
+.title-bar .main-title {
+    background: none !important;
+    -webkit-text-fill-color: #ffffff !important;
+    color: #ffffff !important;
+    margin: 0;
+    font-size: 22px;        /* smaller title for compact header */
+    line-height: 1.2;
+}
+.logo {
+    height: 18px !important; /* ensure the image itself is small */
+    width: auto !important;
+    display: inline-block;
+    margin-right: 8px;
+    object-fit: contain;
+    flex-shrink: 0;
+}
+"""
+def check_health():
+    """Check API health status"""
+    try:
+        response = requests.get(f"{BASE_URL}/health", timeout=5)
+        if response.status_code == 200:
+            data = response.json()
+            if data.get("status") == "healthy":
+                return True, ""
+        return False, "❌ Service unhealthy"
+    except Exception as e:
+        return False, f"❌ Connection failed: {str(e)}"
+def get_voices():
+    """Fetch available voices from API"""
+    try:
+        response = requests.get(f"{BASE_URL}/voices", timeout=10)
+        if response.status_code == 200:
+            voices_data = response.json().get("voices", {})
+            # Create a list of tuples (display_name, voice_id)
+            voices = []
+            for voice_id, voice_info in voices_data.items():
+                name = voice_info.get("name", "Unknown")
+                gender = voice_info.get("gender", "N/A")
+                display_name = f"{name} ({gender})"
+                voices.append((display_name, voice_id))
+            return sorted(voices, key=lambda x: x[0])
+        return []
+    except Exception as e:
+        print(f"Error fetching voices: {e}")
+        return []
+def synthesize_speech(text, voice_id):
+    """Synthesize speech from text"""
+    if not text or not text.strip():
+        return None, "⚠️ Please enter some text", "", "", "", "", "", ""
+    if not voice_id:
+        return None, "⚠️ Please select a voice", "", "", "", "", "", ""
+    try:
+        payload = {"text": text, "voice_id": voice_id}
+        response = requests.post(
+            f"{BASE_URL}/synthesize",
+            headers={"Content-Type": "application/json"},
+            json=payload,
+            timeout=30,
+        )
+        if response.status_code == 200:
+            result = response.json()
+            if result.get("success"):
+                audio_url = result.get("audio_url", "")
+                metrics = result.get("metrics", {})
+                # Format metrics
+                total_time = f"{metrics.get('t', 0):.3f}s"
+                rtf = f"{metrics.get('rtf', 0):.4f}"
+                wav_duration = f"{metrics.get('wav_seconds', 0):.2f}s"
+                vocoder_time = f"{metrics.get('t_vocoder', 0):.3f}s"
+                no_vocoder_time = f"{metrics.get('t_no_vocoder', 0):.3f}s"
+                rtf_no_vocoder = f"{metrics.get('rtf_no_vocoder', 0):.4f}"
+                status_msg = "✅ Audio generated successfully!"
+                return (
+                    audio_url,
+                    status_msg,
+                    total_time,
+                    rtf,
+                    wav_duration,
+                    vocoder_time,
+                    no_vocoder_time,
+                    rtf_no_vocoder,
+                )
+            else:
+                error_msg = result.get("message", "Unknown error")
+                return None, f"❌ Synthesis failed: {error_msg}", "", "", "", "", "", ""
+        else:
+            return (
+                None,
+                f"❌ API returned status code: {response.status_code}",
+                "",
+                "",
+                "",
+                "",
+                "",
+                "",
+            )
+    except Exception as e:
+        return None, f"❌ Error: {str(e)}", "", "", "", "", "", ""
+def get_health_indicator():
+    """Get HTML for health status indicator"""
+    is_healthy, status_text = check_health()
+    dot_class = "status-dot-green" if is_healthy else "status-dot-red"
+    html = f"""
+    <div class="status-dot {dot_class}"></div>
+    """
+    return html
+# Create Gradio interface
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
+    # Title with Health Status
+    def get_title_with_status():
+        is_healthy, _ = check_health()
+        dot_class = "status-dot-green" if is_healthy else "status-dot-red"
+        return f"""
+        <div class="title-bar">
+            <img src="https://storage.googleapis.com/desivocal-prod/desi-vocal/logo.png" width="50" height="50">
+            <h1 class='main-title'>RinggAI - Text-to-Speech</h1>
+            <div class="status-dot {dot_class}" style="margin-top: 0;"></div>
+        </div>
+        """
+    health_status = gr.HTML(value=get_title_with_status())
+    # Text Input
+    text_input = gr.Textbox(
+        label="Your text",
+        placeholder="Type or paste your text here...",
+        lines=6,
+        max_lines=10,
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            # Voice Selection
+            voices = get_voices()
+            voice_choices = {display: vid for display, vid in voices}
+            voice_dropdown = gr.Dropdown(
+                choices=list(voice_choices.keys()),
+                label="Choose a voice style",
+                info=f"{len(voices)} voices available",
+                value=list(voice_choices.keys())[0] if voices else None,
+            )
+        with gr.Column(scale=1):
+            # Status Message
+            # status_output = gr.Markdown("ℹ️ Ready to generate speech")
+            # Audio Output
+            audio_output = gr.Audio(label="Listen to your audio", type="filepath")
+            # Metrics Display (hidden until available)
+            metrics_header = gr.Markdown("### 📊 Generation Metrics", visible=False)
+            metrics_output = gr.Code(
+                label="Metrics", language="json", interactive=False, visible=False
+            )
+    generate_btn = gr.Button("🎬 Generate Speech", variant="primary", size="lg")
+    # Footer
+    gr.Markdown("---")
+    gr.Markdown("### 🙏 Acknowledgements")
+    gr.Markdown("- Based on [ZipVoice](https://github.com/k2-fsa/ZipVoice)")
+    gr.Markdown(
+        "- Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions"
+    )
+    # Event Handlers
+    def on_generate(text, voice_display):
+        voice_id = voice_choices.get(voice_display)
+        audio_url, _status, t_time, rtf, wav_dur, voc_time, no_voc_time, rtf_no_voc = (
+            synthesize_speech(text, voice_id)
+        )
+        # Download audio if URL is available
+        audio_file = None
+        if audio_url:
+            try:
+                audio_response = requests.get(audio_url, timeout=30)
+                if audio_response.status_code == 200:
+                    # Save to temporary file
+                    audio_file = "/tmp/generated_audio.wav"
+                    with open(audio_file, "wb") as f:
+                        f.write(audio_response.content)
+            except Exception as e:
+                _status = f"⚠️ Audio generated but download failed: {str(e)}"
+        # Format metrics as JSON string (only if available)
+        has_metrics = any([t_time, rtf, wav_dur, voc_time, no_voc_time, rtf_no_voc])
+        metrics_json = ""
+        if has_metrics:
+            metrics_json = json.dumps(
+                {
+                    "total_time": t_time,
+                    "rtf": rtf,
+                    "audio_duration": wav_dur,
+                    "vocoder_time": voc_time,
+                    "no_vocoder_time": no_voc_time,
+                    "rtf_no_vocoder": rtf_no_voc,
+                },
+                indent=2,
+            )
+        return (
+            audio_file,
+            gr.update(visible=has_metrics),
+            gr.update(value=metrics_json, visible=has_metrics),
+        )
+    generate_btn.click(
+        fn=on_generate,
+        inputs=[text_input, voice_dropdown],
+        outputs=[
+            audio_output,
+            # status_output,
+            metrics_header,
+            metrics_output,
+        ],
+    )
+    # Refresh health status every 120 seconds
+    demo.load(lambda: get_title_with_status(), outputs=[health_status], every=120)
+if __name__ == "__main__":
+    demo.queue()
+    demo.launch(share=False, server_name="0.0.0.0", server_port=7860)

logo.png ADDED Viewed