Spaces:

RinggAI
/

STT

Running

App Files Files Community

utkarshshukla2912 commited on 16 days ago

Commit

3361f15

1 Parent(s): b672ef4

formated looks

Browse files

Files changed (6) hide show

.gitignore +1 -0
.python-version +1 -0
app.py +65 -182
pyproject.toml +12 -0
requirements.txt +1 -0
uv.lock +0 -0

.gitignore CHANGED Viewed

@@ -2,6 +2,7 @@
 api_backend.py
 *.nemo
 model_files/
 # Python
 __pycache__/

 api_backend.py
 *.nemo
 model_files/
+.env
 # Python
 __pycache__/

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.10

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-#updated
 """
 Ringg Parrot STT V1 🦜 - Hugging Face Space (Frontend)
 Makes API calls to private inference endpoint via ngrok
@@ -9,10 +9,13 @@ import os
 import base64
 from pathlib import Path
-#os.environ.setdefault("GRADIO_API_INFO_ENABLED", "false")
 import gradio as gr
 import requests
 LOGO_BASE64 = ""
@@ -26,120 +29,6 @@ if logo_path.exists():
 DEFAULT_LOGO_URL = "https://storage.googleapis.com/desivocal-prod/desi-vocal/logo.png"
 LOGO_URL = os.environ.get("STT_LOGO_URL", DEFAULT_LOGO_URL).strip()
-# Custom CSS for Ringg branding
-custom_css = """
-.gradio-container {
-  font-family: 'Inter', sans-serif;
-  max-width: 950px;
-  margin: 0 auto;
-}
-.main-header {
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  gap: 20px;
-  flex-wrap: nowrap;
-  padding: 20px;
-  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-  color: white;
-  border-radius: 10px;
-  margin-bottom: 20px;
-  max-width: 900px;
-  margin-left: auto;
-  margin-right: auto;
-}
-.main-header .main-logo {
-  height: 60px;
-  width: 60px;
-  flex-shrink: 0;
-  display: flex;
-  align-items: center;
-  justify-content: center;
-}
-.main-header .main-logo img {
-  max-height: 100%;
-  max-width: 100%;
-  object-fit: contain;
-}
-.main-header .main-logo.main-logo--placeholder {
-  background-color: rgba(255, 255, 255, 0.2);
-  border-radius: 12px;
-}
-.main-header .main-text {
-  text-align: left;
-  display: flex;
-  flex-direction: column;
-  justify-content: center;
-  min-width: 0;
-}
-.main-header .main-text h1 {
-  margin: 0 0 6px;
-}
-.main-header .main-text p {
-  margin: 0;
-}
-@media (max-width: 640px) {
-  .main-header {
-    flex-wrap: wrap;
-  }
-  .main-header .main-text {
-    text-align: center;
-    width: 100%;
-  }
-}
-.status-dot {
-  display: inline-block;
-  width: 8px;
-  height: 8px;
-  border-radius: 50%;
-  margin-left: 8px;
-}
-.status-dot.healthy {
-  background-color: #22c55e;
-  animation: pulse-green 2s ease-in-out infinite;
-}
-.status-dot.error {
-  background-color: #ef4444;
-  animation: pulse-red 2s ease-in-out infinite;
-}
-@keyframes pulse-green {
-  0% {
-    box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.7);
-  }
-  70% {
-    box-shadow: 0 0 0 6px rgba(34, 197, 94, 0);
-  }
-  100% {
-    box-shadow: 0 0 0 0 rgba(34, 197, 94, 0);
-  }
-}
-@keyframes pulse-red {
-  0% {
-    box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.7);
-  }
-  70% {
-    box-shadow: 0 0 0 6px rgba(239, 68, 68, 0);
-  }
-  100% {
-    box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
-  }
-}
-"""
 # Backend API endpoint (ngrok URL)
 # You can update this via Hugging Face Space Secrets
 API_ENDPOINT = os.environ.get("STT_API_ENDPOINT", "")
@@ -224,8 +113,8 @@ def create_interface():
         if not text or text.startswith("❌") or text.startswith("⏱"):
             return text or "⚠️ No speech detected—try a clearer recording."
-        footer = "(Served via API • Remote backend)"
-        return f"{text}\n\n{footer}"
     def check_api_status():
         """Check API health status"""
@@ -234,32 +123,53 @@ def create_interface():
     # Create interface
     with gr.Blocks(
-        title="Ringg Parrot STT V1 🦜", theme=gr.themes.Soft(), css=custom_css
     ) as demo:
-        status_class = "healthy" if health_status["status"] == "healthy" else "error"
-        if LOGO_URL:
-            logo_html = (
-                f'<div class="main-logo"><img src="{LOGO_URL}" alt="Ringg Logo"></div>'
             )
-        elif LOGO_BASE64:
-            logo_html = f'<div class="main-logo"><img src="data:image/png;base64,{LOGO_BASE64}" alt="Ringg Logo"></div>'
-        else:
-            logo_html = '<div class="main-logo main-logo--placeholder"></div>'
-        gr.Markdown(f"""
-        <div class="main-header">
-        {logo_html}
-        <div class="main-text">
-        <h1>Ringg Parrot STT V1 🦜</h1>
-        <p>High-Accuracy Hindi Speech-to-Text <span class="status-dot {status_class}"></span></p>
-        </div>
-        </div>
-        """)
         gr.Markdown(
             """
-            # 🎯 Performance Benchmarks
-            #### **Ringg Parrot STT V1** Ranks **1st** Among Top Models, Outperforming OpenAI Whisper Large-v3 and Other Leading Solutions.
             """
         )
@@ -277,52 +187,23 @@ def create_interface():
                 interactive=False,
             )
-        gr.Markdown(
-            """
-            -----------------
-            # 📁 Upload an audio file for transcription (supports WAV, MP3, FLAC, M4A, etc.)
-            """
-        )
-        with gr.Row():
-            audio_input = gr.Audio(
-                label="📁 Upload Audio File",
-                type="filepath",
-                sources=["upload"],
-                scale=3,
-            )
-            transcribe_btn = gr.Button(
-                "Transcribe", variant="primary", size="sm", scale=1
-            )
-        file_output = gr.Textbox(
-            label="Transcription Result",
-            lines=6,
-            interactive=True,
-            placeholder="Upload a file and click Transcribe...",
-        )
-        transcribe_btn.click(
-            transcribe_audio,
-            inputs=audio_input,
-            outputs=file_output,
-            concurrency_limit=1,
-        )
-        gr.Markdown(
-            """
-            ### ✨ Features
-            - 🌐 **Hindi Support**: Accurate transcription for Hindi audio
-            - 🎯 **High Accuracy**: Competitive with leading ASR models
-            - 📁 **File Upload**: Support for various audio formats (WAV, MP3, FLAC, etc.)
-            - ⚡ **Fast Processing**: Optimized for quick transcription
-            """
-        )
         gr.Markdown(
             """
-            ### ⚠️ Benchmark Disclaimer
             - Evaluated on a modified FLEURS subset to ensure consistent Hindi coverage
             - Dataset issues include inaudible segments and repeated sentences caused by interruptions
             - Background noise is prominent across many clips, impacting recognition quality
@@ -334,7 +215,9 @@ def create_interface():
         gr.Markdown(
             """
-            # 🙏 Acknowledgements
             - Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions
             - Built with [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) models
             """

 #!/usr/bin/env python3
+# updated
 """
 Ringg Parrot STT V1 🦜 - Hugging Face Space (Frontend)
 Makes API calls to private inference endpoint via ngrok
 import base64
 from pathlib import Path
+# os.environ.setdefault("GRADIO_API_INFO_ENABLED", "false")
 import gradio as gr
 import requests
+from dotenv import load_dotenv
+load_dotenv()  # reads variables from a .env file and sets them in os.environ
 LOGO_BASE64 = ""
 DEFAULT_LOGO_URL = "https://storage.googleapis.com/desivocal-prod/desi-vocal/logo.png"
 LOGO_URL = os.environ.get("STT_LOGO_URL", DEFAULT_LOGO_URL).strip()
 # Backend API endpoint (ngrok URL)
 # You can update this via Hugging Face Space Secrets
 API_ENDPOINT = os.environ.get("STT_API_ENDPOINT", "")
         if not text or text.startswith("❌") or text.startswith("⏱"):
             return text or "⚠️ No speech detected—try a clearer recording."
+        # footer = "(Served via API • Remote backend)"
+        return text
     def check_api_status():
         """Check API health status"""
     # Create interface
     with gr.Blocks(
+        theme=gr.themes.Base(
+            font=[gr.themes.GoogleFont("Source Sans Pro"), "Arial", "sans-serif"]
+        ),
+        css=".gradio-container {max-width: none !important;}",
     ) as demo:
+        gr.HTML("""
+                <div style="display: flex; align-items: center; gap: 10px;">
+                    <img style="width: 50px; height: 50px; background-color: white; border-radius: 10%;" src="https://storage.googleapis.com/desivocal-prod/desi-vocal/ringg.svg" alt="Logo">
+                    <h1 style="margin: 0;">Ringg Parrot STT V1.0 🦜</h1>
+                </div>
+                """)
+        gr.Markdown(
+            """
+            ## 📁 Upload an audio file for transcription (supports WAV, MP3, FLAC, M4A, etc.)
+            """
+        )
+        with gr.Row():
+            with gr.Column():
+                audio_input = gr.Audio(
+                    label="📁 Upload Audio File",
+                    type="filepath",
+                    sources=["upload"],
+                )
+                transcribe_btn = gr.Button("Transcribe", variant="primary", size="lg")
+            file_output = gr.Textbox(
+                label="Transcription Result",
+                lines=12,
+                interactive=False,
             )
+        transcribe_btn.click(
+            transcribe_audio,
+            inputs=audio_input,
+            outputs=file_output,
+            concurrency_limit=1,
+        )
         gr.Markdown(
             """
+            <br>
+            ## 🎯 Performance Benchmarks
+            **Ringg Parrot STT V1** Ranks **1st** Among Top Models, Outperforming OpenAI Whisper Large-v3 and Other Leading Solutions.
             """
         )
                 interactive=False,
             )
+        # gr.Markdown(
+        #     """
+        #     <br>
+        #     ##  ✨ Features
+        #     - **Hindi Support**: Accurate transcription for Hindi audio
+        #     - **High Accuracy**: Competitive with leading ASR models
+        #     - **File Upload**: Support for various audio formats (WAV, MP3, FLAC, etc.)
+        #     - **Fast Processing**: Optimized for quick transcription
+        #     """
+        # )
         gr.Markdown(
             """
+            <br>
+            ##  ⚠️ Benchmark Disclaimer
             - Evaluated on a modified FLEURS subset to ensure consistent Hindi coverage
             - Dataset issues include inaudible segments and repeated sentences caused by interruptions
             - Background noise is prominent across many clips, impacting recognition quality
         gr.Markdown(
             """
+            <br>
+            ##  🙏 Acknowledgements
             - Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions
             - Built with [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) models
             """

pyproject.toml ADDED Viewed

	@@ -0,0 +1,12 @@

+[project]
+name = "stt"
+version = "0.1.0"
+requires-python = ">=3.10"
+dependencies = [
+    "gradio==5.49.1",
+    "gradio-client==1.13.3",
+    "huggingface-hub==1.0.1",
+    "pandas==2.3.3",
+    "python-dotenv>=1.2.1",
+    "requests==2.32.5",
+]

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ gradio-client==1.13.3
 pandas==2.3.3
 requests==2.32.5
 huggingface-hub==1.0.1

 pandas==2.3.3
 requests==2.32.5
 huggingface-hub==1.0.1
+python-dotenv

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff