utkarshshukla2912 commited on
Commit
69bfab8
·
1 Parent(s): 0df3466

base space

Browse files
Files changed (2) hide show
  1. app.py +366 -0
  2. logo.png +0 -0
app.py ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import json
4
+ import os
5
+ import base64
6
+
7
+ # API Base URL
8
+ BASE_URL = os.environ.get("BASE_URL", "http://localhost:8889")
9
+
10
+ # Custom CSS for aesthetic design
11
+ custom_css = """
12
+ .health-status {
13
+ display: flex;
14
+ align-items: center;
15
+ gap: 10px;
16
+ padding: 15px;
17
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
18
+ border-radius: 12px;
19
+ margin-bottom: 20px;
20
+ }
21
+
22
+ .status-dot {
23
+ width: 20px;
24
+ height: 20px;
25
+ border-radius: 50%;
26
+ animation: pulse 2s infinite;
27
+ margin-top: 25px;
28
+ display: inline-block;
29
+ }
30
+
31
+ .status-dot-green {
32
+ background-color: #10b981;
33
+ box-shadow: 0 0 20px rgba(16, 185, 129, 0.6);
34
+ }
35
+
36
+ .status-dot-red {
37
+ background-color: #ef4444;
38
+ box-shadow: 0 0 20px rgba(239, 68, 68, 0.6);
39
+ }
40
+
41
+ @keyframes pulse {
42
+ 0%, 100% {
43
+ opacity: 1;
44
+ transform: scale(1);
45
+ }
46
+ 50% {
47
+ opacity: 0.6;
48
+ transform: scale(1.1);
49
+ }
50
+ }
51
+
52
+ .metric-card {
53
+ background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
54
+ padding: 15px;
55
+ border-radius: 10px;
56
+ margin: 5px;
57
+ color: white;
58
+ text-align: center;
59
+ }
60
+
61
+ .metric-label {
62
+ font-size: 12px;
63
+ opacity: 0.9;
64
+ margin-bottom: 5px;
65
+ }
66
+
67
+ .metric-value {
68
+ font-size: 24px;
69
+ font-weight: bold;
70
+ }
71
+
72
+ .voice-card {
73
+ background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
74
+ padding: 10px;
75
+ border-radius: 8px;
76
+ margin: 5px 0;
77
+ }
78
+
79
+ .gradio-container {
80
+ max-width: 1200px !important;
81
+ margin: auto !important;
82
+ }
83
+
84
+ .main-title {
85
+ text-align: center;
86
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
87
+ -webkit-background-clip: text;
88
+ -webkit-text-fill-color: transparent;
89
+ background-clip: text;
90
+ font-size: 48px;
91
+ font-weight: bold;
92
+ margin-bottom: 20px;
93
+ margin-top: 0;
94
+ }
95
+
96
+ .subtitle {
97
+ text-align: center;
98
+ color: #666;
99
+ font-size: 18px;
100
+ margin-bottom: 30px;
101
+ }
102
+
103
+ /* Title bar behind the heading text */
104
+ .title-bar {
105
+ display: flex;
106
+ align-items: center;
107
+ justify-content: center;
108
+ gap: 15px;
109
+ margin-bottom: 20px;
110
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
111
+ border-radius: 12px;
112
+ padding: 8px 16px;
113
+ height: 60px; /* compact header height */
114
+ min-height: 60px;
115
+ overflow: hidden;
116
+ }
117
+
118
+ /* Make title text solid and place gradient behind it */
119
+ .title-bar .main-title {
120
+ background: none !important;
121
+ -webkit-text-fill-color: #ffffff !important;
122
+ color: #ffffff !important;
123
+ margin: 0;
124
+ font-size: 22px; /* smaller title for compact header */
125
+ line-height: 1.2;
126
+ }
127
+
128
+ .logo {
129
+ height: 18px !important; /* ensure the image itself is small */
130
+ width: auto !important;
131
+ display: inline-block;
132
+ margin-right: 8px;
133
+ object-fit: contain;
134
+ flex-shrink: 0;
135
+ }
136
+ """
137
+
138
+
139
+ def check_health():
140
+ """Check API health status"""
141
+ try:
142
+ response = requests.get(f"{BASE_URL}/health", timeout=5)
143
+ if response.status_code == 200:
144
+ data = response.json()
145
+ if data.get("status") == "healthy":
146
+ return True, ""
147
+ return False, "❌ Service unhealthy"
148
+ except Exception as e:
149
+ return False, f"❌ Connection failed: {str(e)}"
150
+
151
+
152
+ def get_voices():
153
+ """Fetch available voices from API"""
154
+ try:
155
+ response = requests.get(f"{BASE_URL}/voices", timeout=10)
156
+ if response.status_code == 200:
157
+ voices_data = response.json().get("voices", {})
158
+ # Create a list of tuples (display_name, voice_id)
159
+ voices = []
160
+ for voice_id, voice_info in voices_data.items():
161
+ name = voice_info.get("name", "Unknown")
162
+ gender = voice_info.get("gender", "N/A")
163
+ display_name = f"{name} ({gender})"
164
+ voices.append((display_name, voice_id))
165
+ return sorted(voices, key=lambda x: x[0])
166
+ return []
167
+ except Exception as e:
168
+ print(f"Error fetching voices: {e}")
169
+ return []
170
+
171
+
172
+ def synthesize_speech(text, voice_id):
173
+ """Synthesize speech from text"""
174
+ if not text or not text.strip():
175
+ return None, "⚠️ Please enter some text", "", "", "", "", "", ""
176
+
177
+ if not voice_id:
178
+ return None, "⚠️ Please select a voice", "", "", "", "", "", ""
179
+
180
+ try:
181
+ payload = {"text": text, "voice_id": voice_id}
182
+
183
+ response = requests.post(
184
+ f"{BASE_URL}/synthesize",
185
+ headers={"Content-Type": "application/json"},
186
+ json=payload,
187
+ timeout=30,
188
+ )
189
+
190
+ if response.status_code == 200:
191
+ result = response.json()
192
+
193
+ if result.get("success"):
194
+ audio_url = result.get("audio_url", "")
195
+ metrics = result.get("metrics", {})
196
+
197
+ # Format metrics
198
+ total_time = f"{metrics.get('t', 0):.3f}s"
199
+ rtf = f"{metrics.get('rtf', 0):.4f}"
200
+ wav_duration = f"{metrics.get('wav_seconds', 0):.2f}s"
201
+ vocoder_time = f"{metrics.get('t_vocoder', 0):.3f}s"
202
+ no_vocoder_time = f"{metrics.get('t_no_vocoder', 0):.3f}s"
203
+ rtf_no_vocoder = f"{metrics.get('rtf_no_vocoder', 0):.4f}"
204
+
205
+ status_msg = "✅ Audio generated successfully!"
206
+
207
+ return (
208
+ audio_url,
209
+ status_msg,
210
+ total_time,
211
+ rtf,
212
+ wav_duration,
213
+ vocoder_time,
214
+ no_vocoder_time,
215
+ rtf_no_vocoder,
216
+ )
217
+ else:
218
+ error_msg = result.get("message", "Unknown error")
219
+ return None, f"❌ Synthesis failed: {error_msg}", "", "", "", "", "", ""
220
+ else:
221
+ return (
222
+ None,
223
+ f"❌ API returned status code: {response.status_code}",
224
+ "",
225
+ "",
226
+ "",
227
+ "",
228
+ "",
229
+ "",
230
+ )
231
+
232
+ except Exception as e:
233
+ return None, f"❌ Error: {str(e)}", "", "", "", "", "", ""
234
+
235
+
236
+ def get_health_indicator():
237
+ """Get HTML for health status indicator"""
238
+ is_healthy, status_text = check_health()
239
+ dot_class = "status-dot-green" if is_healthy else "status-dot-red"
240
+
241
+ html = f"""
242
+ <div class="status-dot {dot_class}"></div>
243
+ """
244
+ return html
245
+
246
+
247
+ # Create Gradio interface
248
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
249
+ # Title with Health Status
250
+ def get_title_with_status():
251
+ is_healthy, _ = check_health()
252
+ dot_class = "status-dot-green" if is_healthy else "status-dot-red"
253
+ return f"""
254
+ <div class="title-bar">
255
+ <img src="https://storage.googleapis.com/desivocal-prod/desi-vocal/logo.png" width="50" height="50">
256
+ <h1 class='main-title'>RinggAI - Text-to-Speech</h1>
257
+ <div class="status-dot {dot_class}" style="margin-top: 0;"></div>
258
+ </div>
259
+ """
260
+
261
+ health_status = gr.HTML(value=get_title_with_status())
262
+
263
+ # Text Input
264
+ text_input = gr.Textbox(
265
+ label="Your text",
266
+ placeholder="Type or paste your text here...",
267
+ lines=6,
268
+ max_lines=10,
269
+ )
270
+
271
+ with gr.Row():
272
+ with gr.Column(scale=1):
273
+ # Voice Selection
274
+ voices = get_voices()
275
+ voice_choices = {display: vid for display, vid in voices}
276
+
277
+ voice_dropdown = gr.Dropdown(
278
+ choices=list(voice_choices.keys()),
279
+ label="Choose a voice style",
280
+ info=f"{len(voices)} voices available",
281
+ value=list(voice_choices.keys())[0] if voices else None,
282
+ )
283
+
284
+ with gr.Column(scale=1):
285
+ # Status Message
286
+ # status_output = gr.Markdown("ℹ️ Ready to generate speech")
287
+
288
+ # Audio Output
289
+
290
+ audio_output = gr.Audio(label="Listen to your audio", type="filepath")
291
+
292
+ # Metrics Display (hidden until available)
293
+ metrics_header = gr.Markdown("### 📊 Generation Metrics", visible=False)
294
+
295
+ metrics_output = gr.Code(
296
+ label="Metrics", language="json", interactive=False, visible=False
297
+ )
298
+
299
+ generate_btn = gr.Button("🎬 Generate Speech", variant="primary", size="lg")
300
+ # Footer
301
+ gr.Markdown("---")
302
+ gr.Markdown("### 🙏 Acknowledgements")
303
+ gr.Markdown("- Based on [ZipVoice](https://github.com/k2-fsa/ZipVoice)")
304
+ gr.Markdown(
305
+ "- Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions"
306
+ )
307
+
308
+ # Event Handlers
309
+ def on_generate(text, voice_display):
310
+ voice_id = voice_choices.get(voice_display)
311
+ audio_url, _status, t_time, rtf, wav_dur, voc_time, no_voc_time, rtf_no_voc = (
312
+ synthesize_speech(text, voice_id)
313
+ )
314
+
315
+ # Download audio if URL is available
316
+ audio_file = None
317
+ if audio_url:
318
+ try:
319
+ audio_response = requests.get(audio_url, timeout=30)
320
+ if audio_response.status_code == 200:
321
+ # Save to temporary file
322
+ audio_file = "/tmp/generated_audio.wav"
323
+ with open(audio_file, "wb") as f:
324
+ f.write(audio_response.content)
325
+ except Exception as e:
326
+ _status = f"⚠️ Audio generated but download failed: {str(e)}"
327
+
328
+ # Format metrics as JSON string (only if available)
329
+ has_metrics = any([t_time, rtf, wav_dur, voc_time, no_voc_time, rtf_no_voc])
330
+ metrics_json = ""
331
+ if has_metrics:
332
+ metrics_json = json.dumps(
333
+ {
334
+ "total_time": t_time,
335
+ "rtf": rtf,
336
+ "audio_duration": wav_dur,
337
+ "vocoder_time": voc_time,
338
+ "no_vocoder_time": no_voc_time,
339
+ "rtf_no_vocoder": rtf_no_voc,
340
+ },
341
+ indent=2,
342
+ )
343
+
344
+ return (
345
+ audio_file,
346
+ gr.update(visible=has_metrics),
347
+ gr.update(value=metrics_json, visible=has_metrics),
348
+ )
349
+
350
+ generate_btn.click(
351
+ fn=on_generate,
352
+ inputs=[text_input, voice_dropdown],
353
+ outputs=[
354
+ audio_output,
355
+ # status_output,
356
+ metrics_header,
357
+ metrics_output,
358
+ ],
359
+ )
360
+
361
+ # Refresh health status every 120 seconds
362
+ demo.load(lambda: get_title_with_status(), outputs=[health_status], every=120)
363
+
364
+ if __name__ == "__main__":
365
+ demo.queue()
366
+ demo.launch(share=False, server_name="0.0.0.0", server_port=7860)
logo.png ADDED