utkarshshukla2912 commited on
Commit
3361f15
·
1 Parent(s): b672ef4

formated looks

Browse files
Files changed (6) hide show
  1. .gitignore +1 -0
  2. .python-version +1 -0
  3. app.py +65 -182
  4. pyproject.toml +12 -0
  5. requirements.txt +1 -0
  6. uv.lock +0 -0
.gitignore CHANGED
@@ -2,6 +2,7 @@
2
  api_backend.py
3
  *.nemo
4
  model_files/
 
5
 
6
  # Python
7
  __pycache__/
 
2
  api_backend.py
3
  *.nemo
4
  model_files/
5
+ .env
6
 
7
  # Python
8
  __pycache__/
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.10
app.py CHANGED
@@ -1,5 +1,5 @@
1
  #!/usr/bin/env python3
2
- #updated
3
  """
4
  Ringg Parrot STT V1 🦜 - Hugging Face Space (Frontend)
5
  Makes API calls to private inference endpoint via ngrok
@@ -9,10 +9,13 @@ import os
9
  import base64
10
  from pathlib import Path
11
 
12
- #os.environ.setdefault("GRADIO_API_INFO_ENABLED", "false")
13
 
14
  import gradio as gr
15
  import requests
 
 
 
16
 
17
 
18
  LOGO_BASE64 = ""
@@ -26,120 +29,6 @@ if logo_path.exists():
26
  DEFAULT_LOGO_URL = "https://storage.googleapis.com/desivocal-prod/desi-vocal/logo.png"
27
  LOGO_URL = os.environ.get("STT_LOGO_URL", DEFAULT_LOGO_URL).strip()
28
 
29
- # Custom CSS for Ringg branding
30
- custom_css = """
31
- .gradio-container {
32
- font-family: 'Inter', sans-serif;
33
- max-width: 950px;
34
- margin: 0 auto;
35
- }
36
-
37
- .main-header {
38
- display: flex;
39
- align-items: center;
40
- justify-content: center;
41
- gap: 20px;
42
- flex-wrap: nowrap;
43
- padding: 20px;
44
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
45
- color: white;
46
- border-radius: 10px;
47
- margin-bottom: 20px;
48
- max-width: 900px;
49
- margin-left: auto;
50
- margin-right: auto;
51
- }
52
-
53
- .main-header .main-logo {
54
- height: 60px;
55
- width: 60px;
56
- flex-shrink: 0;
57
- display: flex;
58
- align-items: center;
59
- justify-content: center;
60
- }
61
-
62
- .main-header .main-logo img {
63
- max-height: 100%;
64
- max-width: 100%;
65
- object-fit: contain;
66
- }
67
-
68
- .main-header .main-logo.main-logo--placeholder {
69
- background-color: rgba(255, 255, 255, 0.2);
70
- border-radius: 12px;
71
- }
72
-
73
- .main-header .main-text {
74
- text-align: left;
75
- display: flex;
76
- flex-direction: column;
77
- justify-content: center;
78
- min-width: 0;
79
- }
80
-
81
- .main-header .main-text h1 {
82
- margin: 0 0 6px;
83
- }
84
-
85
- .main-header .main-text p {
86
- margin: 0;
87
- }
88
-
89
- @media (max-width: 640px) {
90
- .main-header {
91
- flex-wrap: wrap;
92
- }
93
-
94
- .main-header .main-text {
95
- text-align: center;
96
- width: 100%;
97
- }
98
- }
99
-
100
- .status-dot {
101
- display: inline-block;
102
- width: 8px;
103
- height: 8px;
104
- border-radius: 50%;
105
- margin-left: 8px;
106
- }
107
-
108
- .status-dot.healthy {
109
- background-color: #22c55e;
110
- animation: pulse-green 2s ease-in-out infinite;
111
- }
112
-
113
- .status-dot.error {
114
- background-color: #ef4444;
115
- animation: pulse-red 2s ease-in-out infinite;
116
- }
117
-
118
- @keyframes pulse-green {
119
- 0% {
120
- box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.7);
121
- }
122
- 70% {
123
- box-shadow: 0 0 0 6px rgba(34, 197, 94, 0);
124
- }
125
- 100% {
126
- box-shadow: 0 0 0 0 rgba(34, 197, 94, 0);
127
- }
128
- }
129
-
130
- @keyframes pulse-red {
131
- 0% {
132
- box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.7);
133
- }
134
- 70% {
135
- box-shadow: 0 0 0 6px rgba(239, 68, 68, 0);
136
- }
137
- 100% {
138
- box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
139
- }
140
- }
141
- """
142
-
143
  # Backend API endpoint (ngrok URL)
144
  # You can update this via Hugging Face Space Secrets
145
  API_ENDPOINT = os.environ.get("STT_API_ENDPOINT", "")
@@ -224,8 +113,8 @@ def create_interface():
224
  if not text or text.startswith("❌") or text.startswith("⏱"):
225
  return text or "⚠️ No speech detected—try a clearer recording."
226
 
227
- footer = "(Served via API • Remote backend)"
228
- return f"{text}\n\n{footer}"
229
 
230
  def check_api_status():
231
  """Check API health status"""
@@ -234,32 +123,53 @@ def create_interface():
234
 
235
  # Create interface
236
  with gr.Blocks(
237
- title="Ringg Parrot STT V1 🦜", theme=gr.themes.Soft(), css=custom_css
 
 
 
238
  ) as demo:
239
- status_class = "healthy" if health_status["status"] == "healthy" else "error"
240
- if LOGO_URL:
241
- logo_html = (
242
- f'<div class="main-logo"><img src="{LOGO_URL}" alt="Ringg Logo"></div>'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  )
244
- elif LOGO_BASE64:
245
- logo_html = f'<div class="main-logo"><img src="data:image/png;base64,{LOGO_BASE64}" alt="Ringg Logo"></div>'
246
- else:
247
- logo_html = '<div class="main-logo main-logo--placeholder"></div>'
248
-
249
- gr.Markdown(f"""
250
- <div class="main-header">
251
- {logo_html}
252
- <div class="main-text">
253
- <h1>Ringg Parrot STT V1 🦜</h1>
254
- <p>High-Accuracy Hindi Speech-to-Text <span class="status-dot {status_class}"></span></p>
255
- </div>
256
- </div>
257
- """)
258
 
259
  gr.Markdown(
260
  """
261
- # 🎯 Performance Benchmarks
262
- #### **Ringg Parrot STT V1** Ranks **1st** Among Top Models, Outperforming OpenAI Whisper Large-v3 and Other Leading Solutions.
 
 
263
  """
264
  )
265
 
@@ -277,52 +187,23 @@ def create_interface():
277
  interactive=False,
278
  )
279
 
280
- gr.Markdown(
281
- """
282
- -----------------
283
- # 📁 Upload an audio file for transcription (supports WAV, MP3, FLAC, M4A, etc.)
284
- """
285
- )
286
-
287
- with gr.Row():
288
- audio_input = gr.Audio(
289
- label="📁 Upload Audio File",
290
- type="filepath",
291
- sources=["upload"],
292
- scale=3,
293
- )
294
-
295
- transcribe_btn = gr.Button(
296
- "Transcribe", variant="primary", size="sm", scale=1
297
- )
298
-
299
- file_output = gr.Textbox(
300
- label="Transcription Result",
301
- lines=6,
302
- interactive=True,
303
- placeholder="Upload a file and click Transcribe...",
304
- )
305
-
306
- transcribe_btn.click(
307
- transcribe_audio,
308
- inputs=audio_input,
309
- outputs=file_output,
310
- concurrency_limit=1,
311
- )
312
 
313
- gr.Markdown(
314
- """
315
- ### Features
316
- - 🌐 **Hindi Support**: Accurate transcription for Hindi audio
317
- - 🎯 **High Accuracy**: Competitive with leading ASR models
318
- - 📁 **File Upload**: Support for various audio formats (WAV, MP3, FLAC, etc.)
319
- - ⚡ **Fast Processing**: Optimized for quick transcription
320
- """
321
- )
322
 
323
  gr.Markdown(
324
  """
325
- ### ⚠️ Benchmark Disclaimer
 
 
326
  - Evaluated on a modified FLEURS subset to ensure consistent Hindi coverage
327
  - Dataset issues include inaudible segments and repeated sentences caused by interruptions
328
  - Background noise is prominent across many clips, impacting recognition quality
@@ -334,7 +215,9 @@ def create_interface():
334
 
335
  gr.Markdown(
336
  """
337
- # 🙏 Acknowledgements
 
 
338
  - Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions
339
  - Built with [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) models
340
  """
 
1
  #!/usr/bin/env python3
2
+ # updated
3
  """
4
  Ringg Parrot STT V1 🦜 - Hugging Face Space (Frontend)
5
  Makes API calls to private inference endpoint via ngrok
 
9
  import base64
10
  from pathlib import Path
11
 
12
+ # os.environ.setdefault("GRADIO_API_INFO_ENABLED", "false")
13
 
14
  import gradio as gr
15
  import requests
16
+ from dotenv import load_dotenv
17
+
18
+ load_dotenv() # reads variables from a .env file and sets them in os.environ
19
 
20
 
21
  LOGO_BASE64 = ""
 
29
  DEFAULT_LOGO_URL = "https://storage.googleapis.com/desivocal-prod/desi-vocal/logo.png"
30
  LOGO_URL = os.environ.get("STT_LOGO_URL", DEFAULT_LOGO_URL).strip()
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  # Backend API endpoint (ngrok URL)
33
  # You can update this via Hugging Face Space Secrets
34
  API_ENDPOINT = os.environ.get("STT_API_ENDPOINT", "")
 
113
  if not text or text.startswith("❌") or text.startswith("⏱"):
114
  return text or "⚠️ No speech detected—try a clearer recording."
115
 
116
+ # footer = "(Served via API • Remote backend)"
117
+ return text
118
 
119
  def check_api_status():
120
  """Check API health status"""
 
123
 
124
  # Create interface
125
  with gr.Blocks(
126
+ theme=gr.themes.Base(
127
+ font=[gr.themes.GoogleFont("Source Sans Pro"), "Arial", "sans-serif"]
128
+ ),
129
+ css=".gradio-container {max-width: none !important;}",
130
  ) as demo:
131
+ gr.HTML("""
132
+ <div style="display: flex; align-items: center; gap: 10px;">
133
+ <img style="width: 50px; height: 50px; background-color: white; border-radius: 10%;" src="https://storage.googleapis.com/desivocal-prod/desi-vocal/ringg.svg" alt="Logo">
134
+ <h1 style="margin: 0;">Ringg Parrot STT V1.0 🦜</h1>
135
+ </div>
136
+ """)
137
+
138
+ gr.Markdown(
139
+ """
140
+ ## 📁 Upload an audio file for transcription (supports WAV, MP3, FLAC, M4A, etc.)
141
+ """
142
+ )
143
+
144
+ with gr.Row():
145
+ with gr.Column():
146
+ audio_input = gr.Audio(
147
+ label="📁 Upload Audio File",
148
+ type="filepath",
149
+ sources=["upload"],
150
+ )
151
+
152
+ transcribe_btn = gr.Button("Transcribe", variant="primary", size="lg")
153
+
154
+ file_output = gr.Textbox(
155
+ label="Transcription Result",
156
+ lines=12,
157
+ interactive=False,
158
  )
159
+
160
+ transcribe_btn.click(
161
+ transcribe_audio,
162
+ inputs=audio_input,
163
+ outputs=file_output,
164
+ concurrency_limit=1,
165
+ )
 
 
 
 
 
 
 
166
 
167
  gr.Markdown(
168
  """
169
+ <br>
170
+
171
+ ## 🎯 Performance Benchmarks
172
+ **Ringg Parrot STT V1** Ranks **1st** Among Top Models, Outperforming OpenAI Whisper Large-v3 and Other Leading Solutions.
173
  """
174
  )
175
 
 
187
  interactive=False,
188
  )
189
 
190
+ # gr.Markdown(
191
+ # """
192
+ # <br>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
+ # ## ✨ Features
195
+ # - **Hindi Support**: Accurate transcription for Hindi audio
196
+ # - **High Accuracy**: Competitive with leading ASR models
197
+ # - **File Upload**: Support for various audio formats (WAV, MP3, FLAC, etc.)
198
+ # - **Fast Processing**: Optimized for quick transcription
199
+ # """
200
+ # )
 
 
201
 
202
  gr.Markdown(
203
  """
204
+ <br>
205
+
206
+ ## ⚠️ Benchmark Disclaimer
207
  - Evaluated on a modified FLEURS subset to ensure consistent Hindi coverage
208
  - Dataset issues include inaudible segments and repeated sentences caused by interruptions
209
  - Background noise is prominent across many clips, impacting recognition quality
 
215
 
216
  gr.Markdown(
217
  """
218
+ <br>
219
+
220
+ ## 🙏 Acknowledgements
221
  - Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions
222
  - Built with [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) models
223
  """
pyproject.toml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "stt"
3
+ version = "0.1.0"
4
+ requires-python = ">=3.10"
5
+ dependencies = [
6
+ "gradio==5.49.1",
7
+ "gradio-client==1.13.3",
8
+ "huggingface-hub==1.0.1",
9
+ "pandas==2.3.3",
10
+ "python-dotenv>=1.2.1",
11
+ "requests==2.32.5",
12
+ ]
requirements.txt CHANGED
@@ -3,4 +3,5 @@ gradio-client==1.13.3
3
  pandas==2.3.3
4
  requests==2.32.5
5
  huggingface-hub==1.0.1
 
6
 
 
3
  pandas==2.3.3
4
  requests==2.32.5
5
  huggingface-hub==1.0.1
6
+ python-dotenv
7
 
uv.lock ADDED
The diff for this file is too large to render. See raw diff