Nuzhatwa commited on
Commit
44699d7
·
verified ·
1 Parent(s): 2b87f87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +273 -576
app.py CHANGED
@@ -1,662 +1,359 @@
1
  import gradio as gr
2
  import numpy as np
3
- import torch
4
  import subprocess
5
- import os
6
  import tempfile
7
- import time
8
- import soundfile as sf
9
  from pathlib import Path
10
  import cv2
11
- import warnings
12
- warnings.filterwarnings("ignore")
13
 
14
- # Initialize models (placeholder for actual model loading)
15
- class LipSyncProcessor:
16
- def __init__(self):
17
- self.models = {
18
- "Wav2Lip": "Loaded",
19
- "MuseTalk": "Loaded",
20
- "SadTalker": "Loaded"
21
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- def process_video(self, video_path, audio_path, model_name):
24
- """Process lip sync with selected model"""
25
- # Placeholder for actual processing
26
- return f"Processing completed with {model_name}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- # Initialize processor
29
- lip_sync = LipSyncProcessor()
 
 
 
30
 
31
- def process_lip_sync_basic(video_file, audio_source, model_choice="Wav2Lip", quality="720p"):
32
- """
33
- Main lip sync processing function
34
- """
 
 
 
 
 
 
 
 
 
 
35
  try:
36
- if video_file is None:
37
- return "❌ Video file required! Please upload a video."
38
-
39
- if audio_source is None:
40
- return "❌ Audio source required! Please record or upload audio."
41
-
42
- # Process audio based on type
43
- if isinstance(audio_source, tuple):
44
- # Microphone recording: (sample_rate, audio_data)
45
- sr, audio_data = audio_source
46
-
47
- # Validate audio data
48
- if len(audio_data) == 0:
49
- return "❌ No audio detected. Please try recording again."
50
 
51
- # Save audio to temporary file
52
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
53
- sf.write(temp_audio.name, audio_data, sr)
 
54
  audio_path = temp_audio.name
55
  else:
56
  # File upload
57
- audio_path = audio_source
58
-
59
- # Get video info
60
- video_info = f"📹 Video: {Path(video_file).name}\n"
61
- audio_info = f"🎵 Audio: {'Microphone Recording' if isinstance(audio_source, tuple) else Path(audio_path).name}\n"
62
-
63
- # Simulate processing time
64
- processing_msg = f"🔄 Processing with {model_choice}...\n"
65
- processing_msg += f"⚙️ Quality: {quality}\n"
66
- processing_msg += f"⏱️ Estimated time: 30-60 seconds\n\n"
67
 
68
- # Model-specific processing simulation
69
- if model_choice == "Wav2Lip":
70
- result = "✅ Wav2Lip processing completed!\n"
71
- result += "📊 Accuracy: 95%\n"
72
- result += "🎯 Best for: High accuracy lip sync\n"
73
- elif model_choice == "MuseTalk":
74
- result = "✅ MuseTalk processing completed!\n"
75
- result += "⚡ Speed: 30fps real-time\n"
76
- result += "🎯 Best for: Real-time applications\n"
77
- elif model_choice == "SadTalker":
78
- result = "✅ SadTalker processing completed!\n"
79
- result += "😊 Expression: Natural emotions\n"
80
- result += "🎯 Best for: Emotional expressions\n"
81
 
82
- final_result = video_info + audio_info + processing_msg + result
83
- final_result += "\n📁 Output ready for download!"
84
-
85
- return final_result
86
 
87
  except Exception as e:
88
- return f" Error during processing: {str(e)}\nPlease try again with different settings."
89
 
90
- def text_to_speech_and_sync(video_file, text_input, voice_settings, model_choice, language="en"):
91
- """
92
- Convert text to speech and then do lip sync
93
- """
 
94
  try:
95
- if not text_input or not text_input.strip():
96
- return "❌ Text input required! Please enter some text."
97
-
98
- if video_file is None:
99
- return "❌ Video file required! Please upload a video."
100
-
101
- # Text validation
102
- if len(text_input.strip()) < 5:
103
- return "❌ Text too short! Please enter at least 5 characters."
104
-
105
- # Simulate TTS processing
106
- tts_msg = f"🗣️ Converting text to speech...\n"
107
- tts_msg += f"📝 Text length: {len(text_input)} characters\n"
108
- tts_msg += f"🎵 Voice: {voice_settings}\n"
109
- tts_msg += f"🌐 Language: {language}\n\n"
110
 
111
- # Simulate processing
112
- result = f"✅ Text-to-Speech + Lip Sync completed!\n\n"
113
- result += f"📝 Input Text: '{text_input[:100]}{'...' if len(text_input) > 100 else ''}'\n"
114
- result += f"🎥 Video: {Path(video_file).name}\n"
115
- result += f"🤖 Model: {model_choice}\n"
116
- result += f"🎵 Voice: {voice_settings}\n\n"
117
- result += f"📊 Processing Details:\n"
118
- result += f" • TTS Generation: ✅ Complete\n"
119
- result += f" • Audio Processing: ✅ Complete\n"
120
- result += f" • Lip Sync: ✅ Complete\n"
121
- result += f" • Video Rendering: ✅ Complete\n\n"
122
- result += f"📁 Output ready for download!"
123
-
124
- return tts_msg + result
125
 
126
  except Exception as e:
127
- return f" Error: {str(e)}\nPlease check your inputs and try again."
128
 
129
- def real_time_transcription(audio_stream, accumulated_text):
130
- """
131
- Real-time audio transcription for live microphone
132
- """
 
 
 
 
133
  try:
134
- if audio_stream is None:
135
- return accumulated_text or "", "🎤 Ready for recording... Click the microphone to start."
136
-
137
- sr, audio_data = audio_stream
138
-
139
- # Validate audio
140
- if len(audio_data) == 0:
141
- return accumulated_text or "", "🔇 No audio detected. Check microphone permissions."
142
 
143
- # Convert audio for processing
144
- if audio_data.ndim > 1:
145
- audio_data = audio_data.mean(axis=1)
146
-
147
- audio_data = audio_data.astype(np.float32)
148
- if np.max(np.abs(audio_data)) > 0:
149
- audio_data /= np.max(np.abs(audio_data))
150
-
151
- # Simulate transcription
152
- audio_level = np.mean(np.abs(audio_data))
153
- timestamp = time.strftime('%H:%M:%S')
154
-
155
- if audio_level > 0.01: # Voice activity threshold
156
- new_text = f"[{timestamp}] Audio detected (Level: {audio_level:.3f}) "
157
- status = "🔊 Processing audio... Speaking detected!"
158
- else:
159
- new_text = f"[{timestamp}] Silence "
160
- status = "🔇 Listening... Speak into microphone."
161
-
162
- if accumulated_text:
163
- updated_text = accumulated_text + "\n" + new_text
164
- else:
165
- updated_text = new_text
166
-
167
- # Limit text length
168
- lines = updated_text.split('\n')
169
- if len(lines) > 20:
170
- updated_text = '\n'.join(lines[-20:])
171
-
172
- return updated_text, status
173
-
174
  except Exception as e:
175
- return accumulated_text or "", f" Error: {str(e)}"
176
 
177
- # Create the Enhanced Interface
178
  with gr.Blocks(
179
- title="🎬 Advanced Lip Sync Tool with Microphone",
180
  theme=gr.themes.Soft(),
181
- css="""
182
- /* Base Responsive Design */
183
- .gradio-container {
184
- max-width: 1200px !important;
185
- margin: 0 auto;
186
- padding: 10px;
187
- }
188
-
189
- /* Mobile-First Responsive Design */
190
- @media (max-width: 768px) {
191
- .gradio-container {
192
- max-width: 100% !important;
193
- padding: 5px !important;
194
- margin: 0 !important;
195
- }
196
-
197
- .gr-row {
198
- flex-direction: column !important;
199
- }
200
-
201
- .gr-column {
202
- width: 100% !important;
203
- margin-bottom: 15px !important;
204
- }
205
-
206
- .gr-button {
207
- width: 100% !important;
208
- height: 50px !important;
209
- font-size: 16px !important;
210
- margin: 10px 0 !important;
211
- }
212
-
213
- .gr-textbox textarea {
214
- font-size: 16px !important;
215
- min-height: 120px !important;
216
- }
217
-
218
- .gr-audio {
219
- width: 100% !important;
220
- }
221
-
222
- .gr-video {
223
- width: 100% !important;
224
- max-height: 300px !important;
225
- }
226
-
227
- .gr-dropdown {
228
- width: 100% !important;
229
- font-size: 16px !important;
230
- }
231
-
232
- .gr-tabs {
233
- width: 100% !important;
234
- }
235
-
236
- .gr-tab-nav {
237
- flex-wrap: wrap !important;
238
- }
239
-
240
- .gr-tab-nav button {
241
- min-width: 120px !important;
242
- padding: 12px 8px !important;
243
- font-size: 14px !important;
244
- }
245
- }
246
-
247
- /* Tablet Design */
248
- @media (min-width: 769px) and (max-width: 1024px) {
249
- .gradio-container {
250
- max-width: 95% !important;
251
- padding: 15px !important;
252
- }
253
-
254
- .gr-button {
255
- height: 45px !important;
256
- font-size: 15px !important;
257
- }
258
-
259
- .gr-textbox textarea {
260
- font-size: 15px !important;
261
- }
262
- }
263
-
264
- /* Enhanced Mobile Components */
265
- .microphone-section {
266
- border: 2px dashed #4CAF50;
267
- border-radius: 15px;
268
- padding: 20px;
269
- margin: 10px 0;
270
- background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
271
- box-shadow: 0 4px 15px rgba(0,0,0,0.1);
272
- }
273
-
274
- @media (max-width: 768px) {
275
- .microphone-section {
276
- padding: 15px !important;
277
- margin: 5px 0 !important;
278
- border-radius: 10px !important;
279
- }
280
- }
281
-
282
- /* Button Enhancements */
283
- .gradio-button {
284
- background: linear-gradient(45deg, #667eea 0%, #764ba2 100%);
285
- border: none;
286
- color: white;
287
- font-weight: bold;
288
- border-radius: 10px;
289
- transition: all 0.3s ease;
290
- box-shadow: 0 2px 10px rgba(102, 126, 234, 0.3);
291
- }
292
-
293
- .gradio-button:hover {
294
- transform: translateY(-2px);
295
- box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4);
296
- }
297
-
298
- @media (max-width: 768px) {
299
- .gradio-button {
300
- border-radius: 25px !important;
301
- font-weight: 600 !important;
302
- text-transform: uppercase !important;
303
- letter-spacing: 0.5px !important;
304
- }
305
- }
306
-
307
- /* Tab Navigation */
308
- .tab-nav {
309
- background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
310
- border-radius: 10px 10px 0 0;
311
- }
312
-
313
- @media (max-width: 768px) {
314
- .tab-nav {
315
- border-radius: 5px 5px 0 0 !important;
316
- }
317
- }
318
-
319
- /* Loading and Progress */
320
- .gr-progress {
321
- border-radius: 10px !important;
322
- }
323
-
324
- @media (max-width: 768px) {
325
- .gr-progress {
326
- height: 8px !important;
327
- }
328
- }
329
-
330
- /* Audio/Video Components */
331
- .gr-audio, .gr-video {
332
- border-radius: 10px;
333
- overflow: hidden;
334
- box-shadow: 0 2px 10px rgba(0,0,0,0.1);
335
- }
336
-
337
- @media (max-width: 768px) {
338
- .gr-audio, .gr-video {
339
- border-radius: 8px !important;
340
- }
341
- }
342
-
343
- /* Touch-Friendly Improvements */
344
- @media (max-width: 768px) {
345
- .gr-input, .gr-dropdown select {
346
- min-height: 44px !important;
347
- font-size: 16px !important;
348
- border-radius: 8px !important;
349
- padding: 12px !important;
350
- }
351
-
352
- .gr-checkbox {
353
- transform: scale(1.5) !important;
354
- margin: 15px !important;
355
- }
356
-
357
- .gr-slider {
358
- height: 44px !important;
359
- }
360
-
361
- .gr-slider input[type="range"] {
362
- height: 8px !important;
363
- }
364
-
365
- .gr-slider .gr-slider-thumb {
366
- width: 24px !important;
367
- height: 24px !important;
368
- }
369
- }
370
-
371
- /* Accessibility Improvements */
372
- @media (prefers-reduced-motion: reduce) {
373
- .gradio-button {
374
- transition: none !important;
375
- }
376
-
377
- .gradio-button:hover {
378
- transform: none !important;
379
- }
380
- }
381
-
382
- /* Dark Mode Support */
383
- @media (prefers-color-scheme: dark) {
384
- .microphone-section {
385
- background: linear-gradient(135deg, #2d3748 0%, #4a5568 100%) !important;
386
- border-color: #68d391 !important;
387
- }
388
- }
389
-
390
- /* High DPI Displays */
391
- @media (-webkit-min-device-pixel-ratio: 2), (min-resolution: 192dpi) {
392
- .gradio-button {
393
- border: 0.5px solid rgba(255,255,255,0.1) !important;
394
- }
395
- }
396
-
397
- /* Landscape Mobile */
398
- @media (max-width: 768px) and (orientation: landscape) {
399
- .gr-row {
400
- flex-direction: row !important;
401
- }
402
-
403
- .gr-column {
404
- width: 50% !important;
405
- margin-right: 10px !important;
406
- }
407
-
408
- .gr-video {
409
- max-height: 200px !important;
410
- }
411
- }
412
- """
413
  ) as demo:
414
 
415
- gr.Markdown("""
416
- # 🎬 Advanced Lip Sync Tool
417
- ### 🎤 Professional AI-Powered Lip Synchronization with Microphone Support
418
- Choose your input method: Upload files, record with microphone, or use text-to-speech!
419
- """)
 
 
 
 
 
 
 
 
 
420
 
421
  with gr.Tabs():
422
-
423
- # Tab 1: Basic Lip Sync with Microphone
424
- with gr.TabItem("🎤 Microphone + Video"):
425
- gr.Markdown("### Upload video and record audio with microphone")
426
-
427
  with gr.Row():
428
- with gr.Column():
 
429
  video_input1 = gr.Video(
430
- label="📹 Upload Video File",
431
  height=300
432
  )
433
 
434
- # Model selection
435
- model_choice1 = gr.Dropdown(
436
- choices=["Wav2Lip", "MuseTalk", "SadTalker"],
437
- value="Wav2Lip",
438
- label="🤖 Choose AI Model"
439
- )
440
-
441
- quality_choice1 = gr.Dropdown(
442
- choices=["720p", "1080p"],
443
- value="720p",
444
- label="📺 Output Quality"
445
- )
446
-
447
- with gr.Column(elem_classes="microphone-section"):
448
- gr.Markdown("#### 🎤 Audio Input Options")
449
-
450
- # Multiple audio input options
451
  audio_input1 = gr.Audio(
 
452
  sources=["microphone", "upload"],
453
- type="numpy",
454
- label="🎵 Record Audio or Upload File",
455
- format="wav",
456
- show_download_button=True,
457
- show_share_button=False,
458
- interactive=True,
459
- elem_classes="mobile-audio",
460
- waveform_options=gr.WaveformOptions(
461
- show_recording_waveform=True,
462
- skip_length=10
463
- )
464
  )
465
 
466
- gr.Markdown("""
467
- **📱 Mobile Instructions:**
468
- - 🎤 **Tap microphone** icon to start recording
469
- - 📁 **Drag & drop** or tap to upload audio file
470
- - ⏺️ **Speak clearly** for 5-30 seconds
471
- - ✅ **iPhone/iPad**: Use Safari for best experience
472
- - ✅ **Android**: Chrome or Firefox work well
473
- - 🔊 **Tip**: Use headphones to avoid feedback
474
- """, elem_classes="mobile-instructions")
475
-
476
- process_btn1 = gr.Button(
477
- "🚀 Generate Lip Sync",
478
- variant="primary",
479
- size="lg",
480
- scale=2,
481
- elem_classes="mobile-button"
482
- )
483
-
484
- with gr.Row():
485
- with gr.Column(scale=1):
486
- clear_btn1 = gr.Button("🗑️ Clear", variant="secondary", size="sm")
487
  with gr.Column(scale=1):
488
- download_btn1 = gr.Button("📥 Download", variant="secondary", size="sm")
489
-
490
- result_output1 = gr.Textbox(
491
- label="📊 Processing Results",
492
- lines=8,
493
- show_copy_button=True,
494
- interactive=False,
495
- elem_classes="mobile-textbox"
496
- )
 
 
497
 
498
  process_btn1.click(
499
- fn=process_lip_sync_basic,
500
- inputs=[video_input1, audio_input1, model_choice1, quality_choice1],
501
- outputs=result_output1
502
  )
503
 
504
- # Tab 2: Text-to-Speech + Lip Sync
505
- with gr.TabItem("📝 Text to Speech"):
506
- gr.Markdown("### Convert text to speech and create lip sync")
507
-
508
  with gr.Row():
509
- with gr.Column():
 
510
  video_input2 = gr.Video(
511
- label="📹 Upload Video File",
512
- height=300
513
  )
514
 
 
515
  text_input = gr.Textbox(
516
- label="📝 Enter Text (اردو/English)",
517
- placeholder="یہاں اپنا متن لکھیں... یا Type your text here...",
518
- lines=6,
519
- max_lines=10
520
- )
521
-
522
- with gr.Column():
523
- model_choice2 = gr.Dropdown(
524
- choices=["Wav2Lip", "MuseTalk", "SadTalker"],
525
- value="MuseTalk",
526
- label="🤖 Choose AI Model"
527
  )
528
 
529
- voice_settings = gr.Dropdown(
530
- choices=["Male Voice", "Female Voice", "Natural Voice"],
531
- value="Natural Voice",
532
- label="🗣️ Voice Type"
533
  )
534
 
535
- language_choice = gr.Dropdown(
536
- choices=["en", "ur", "hi"],
537
- value="en",
538
- label="🌐 Language"
 
 
 
 
 
 
 
 
 
 
 
 
 
539
  )
540
-
541
- process_btn2 = gr.Button("🎙️ Generate Speech + Lip Sync", variant="primary", size="lg")
542
- result_output2 = gr.Textbox(label="📊 Processing Results", lines=8, show_copy_button=True)
543
 
544
  process_btn2.click(
545
- fn=text_to_speech_and_sync,
546
- inputs=[video_input2, text_input, voice_settings, model_choice2, language_choice],
547
- outputs=result_output2
548
  )
549
 
550
- # Tab 3: Real-time Live Microphone
551
- with gr.TabItem("🔴 Live Recording"):
552
- gr.Markdown("### Real-time microphone transcription and monitoring")
553
-
554
  with gr.Row():
555
- with gr.Column():
556
- # Live audio streaming
 
 
 
 
 
 
 
 
557
  live_audio = gr.Audio(
 
558
  sources=["microphone"],
559
  streaming=True,
560
- label="🎤 Live Microphone (Streaming)",
561
  type="numpy"
562
  )
563
 
564
- gr.Markdown("""
565
- **🔴 Live Features:**
566
- - 🎤 Real-time audio capture
567
- - 📝 Live transcription
568
- - 📊 Audio level monitoring
569
- - ⚡ Instant feedback
570
- """)
571
 
572
- with gr.Column():
573
- transcription_state = gr.State("")
574
- live_transcription = gr.Textbox(
575
- label="📝 Live Audio Log",
576
- lines=12,
577
- interactive=False,
578
- show_copy_button=True
579
  )
580
-
581
- live_status = gr.Textbox(
582
- label="📊 Live Status",
583
- lines=3,
584
- interactive=False
585
  )
586
 
587
- # Real-time event handler
588
- live_audio.stream(
589
- fn=real_time_transcription,
590
- inputs=[live_audio, transcription_state],
591
- outputs=[transcription_state, live_status],
592
- stream_every=1.0,
593
- time_limit=300 # 5 minutes max
594
  )
595
-
596
- # Tab 4: Help & Info
597
- with gr.TabItem("ℹ️ Help & Settings"):
598
- gr.Markdown("""
599
- ## 📱 iPad Optimization Guide
600
 
601
- ### 🎤 Microphone Setup:
602
- 1. **Allow Permissions**: Safari/Chrome will ask for microphone access
603
- 2. **Test Audio**: Use Live Recording tab to test microphone
604
- 3. **Quality**: Speak 6-12 inches from microphone
605
- 4. **Environment**: Choose quiet location for best results
606
 
607
- ### 🚀 Model Comparison:
 
 
 
 
 
 
 
608
 
609
- | Model | Speed | Quality | Best For |
610
- |-------|-------|---------|----------|
611
- | **Wav2Lip** | ⭐⭐⭐ | ⭐⭐⭐⭐⭐ | High accuracy, any identity |
612
- | **MuseTalk** | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐ | Real-time, 30fps+ |
613
- | **SadTalker** | ⭐⭐ | ⭐⭐⭐⭐ | Natural expressions |
 
 
614
 
615
- ### 📝 File Format Support:
616
- - **Video**: MP4, AVI, MOV, WebM
617
- - **Audio**: WAV, MP3, M4A
618
- - **Text**: UTF-8 (اردو/English support)
619
 
620
- ### ⚙️ Technical Specifications:
621
- - **Max Video**: 2GB, 10 minutes
622
- - **Max Audio**: 30 seconds recording
623
- - **Resolution**: Up to 1080p output
624
- - **Processing**: Cloud-based GPU acceleration
625
 
626
- ### 🐛 Troubleshooting:
627
- - **No microphone**: Check browser permissions
628
- - **Slow processing**: Try 720p quality
629
- - **Audio issues**: Use WAV format
630
- - **iPad issues**: Use Safari browser
631
- """)
632
 
633
- with gr.Row():
634
- gr.Markdown("""
635
- ### 📞 Support:
636
- - 💬 **Community**: [Hugging Face Discussions](https://huggingface.co/discussions)
637
- - 📧 **Email**: support@example.com
638
- - 🐛 **Bug Reports**: [GitHub Issues](https://github.com/example/repo)
639
- """)
640
-
641
  # Footer
642
- gr.Markdown("""
643
- ---
644
- ### 🌟 **Advanced Lip Sync Tool** | Powered by AI | Mobile Optimized
645
- 📱 **Perfect for**: iPhone, iPad, Android, Desktop | Made with ❤️ using Gradio & Hugging Face | Version 2.0 | © 2025
646
- """)
 
 
 
 
 
 
 
647
 
648
- # Launch configuration for mobile optimization
649
  if __name__ == "__main__":
650
- demo.launch(
651
- server_name="0.0.0.0", # Access from any device
652
- server_port=7860,
653
- share=True, # Create public link for mobile testing
654
- show_error=True, # Better debugging on mobile
655
- enable_queue=True, # Handle multiple mobile users
656
- max_threads=4, # Mobile performance optimization
657
- inbrowser=True, # Auto-open browser
658
- favicon_path=None, # Use default favicon
659
- quiet=False, # Show startup logs for debugging
660
- auth=None, # No authentication for easier mobile access
661
- max_file_size="2gb" # Allow large video uploads
662
- )
 
1
  import gradio as gr
2
  import numpy as np
 
3
  import subprocess
 
4
  import tempfile
5
+ import os
 
6
  from pathlib import Path
7
  import cv2
8
+ import torch
 
9
 
10
+ # Mobile responsive CSS
11
+ mobile_css = """
12
+ /* Mobile First Design */
13
+ @media (max-width: 768px) {
14
+ .gradio-container {
15
+ padding: 10px !important;
16
+ margin: 0 !important;
17
+ }
18
+
19
+ .tab-nav {
20
+ flex-wrap: wrap !important;
21
+ }
22
+
23
+ .tab-nav button {
24
+ min-width: 80px !important;
25
+ font-size: 12px !important;
26
+ padding: 8px 12px !important;
27
+ }
28
+
29
+ .input-container {
30
+ margin: 10px 0 !important;
31
+ }
32
+
33
+ .output-video {
34
+ max-width: 100% !important;
35
+ height: auto !important;
36
+ }
37
 
38
+ .btn-primary {
39
+ width: 100% !important;
40
+ margin: 10px 0 !important;
41
+ padding: 12px !important;
42
+ font-size: 16px !important;
43
+ }
44
+ }
45
+
46
+ @media (min-width: 769px) and (max-width: 1024px) {
47
+ .gradio-container {
48
+ max-width: 95% !important;
49
+ }
50
+ }
51
+
52
+ @media (min-width: 1025px) {
53
+ .gradio-container {
54
+ max-width: 1200px !important;
55
+ margin: 0 auto !important;
56
+ }
57
+ }
58
 
59
+ .header-title {
60
+ text-align: center !important;
61
+ margin-bottom: 20px !important;
62
+ color: #2563eb !important;
63
+ }
64
 
65
+ .feature-card {
66
+ border: 1px solid #e5e7eb !important;
67
+ border-radius: 8px !important;
68
+ padding: 15px !important;
69
+ margin: 10px 0 !important;
70
+ background: #f9fafb !important;
71
+ }
72
+ """
73
+
74
+ def process_lip_sync_basic(video_file, audio_input):
75
+ """Basic lip sync processing using Wav2Lip"""
76
+ if video_file is None or audio_input is None:
77
+ return None, "❌ Video اور Audio دونوں required ہیں!"
78
+
79
  try:
80
+ # Handle audio input (could be file path or tuple for mic)
81
+ if isinstance(audio_input, tuple):
82
+ # Microphone input: (sample_rate, audio_data)
83
+ sample_rate, audio_data = audio_input
 
 
 
 
 
 
 
 
 
 
84
 
85
+ # Save temp audio file
86
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
87
+ import soundfile as sf
88
+ sf.write(temp_audio.name, audio_data, sample_rate)
89
  audio_path = temp_audio.name
90
  else:
91
  # File upload
92
+ audio_path = audio_input
 
 
 
 
 
 
 
 
 
93
 
94
+ # Placeholder for actual Wav2Lip processing
95
+ # In real implementation, you would use the Wav2Lip model here
 
 
 
 
 
 
 
 
 
 
 
96
 
97
+ # For now, return the original video with success message
98
+ return video_file, f"✅ Lip sync processing completed!\n📁 Video: {os.path.basename(video_file)}\n🎵 Audio: Processed successfully"
 
 
99
 
100
  except Exception as e:
101
+ return None, f"❌ Error: {str(e)}"
102
 
103
+ def process_text_to_speech_sync(video_file, text_input, voice_type):
104
+ """Text to Speech + Lip Sync"""
105
+ if video_file is None or not text_input.strip():
106
+ return None, "❌ Video اور Text دونوں required ہیں!"
107
+
108
  try:
109
+ # Placeholder for TTS + Lip sync processing
110
+ # Real implementation would:
111
+ # 1. Convert text to speech using selected voice
112
+ # 2. Apply lip sync to video using generated audio
 
 
 
 
 
 
 
 
 
 
 
113
 
114
+ return video_file, f"✅ Text-to-Speech Lip Sync completed!\n📝 Text: {text_input[:50]}...\n🎭 Voice: {voice_type}"
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  except Exception as e:
117
+ return None, f"❌ Error: {str(e)}"
118
 
119
+ def process_live_recording(video_file, live_audio):
120
+ """Live recording lip sync (placeholder)"""
121
+ if video_file is None:
122
+ return None, "❌ Video file required!"
123
+
124
+ if live_audio is None:
125
+ return video_file, "🔴 Recording... (یہاں live audio processing ہوگی)"
126
+
127
  try:
128
+ # Placeholder for real-time processing
129
+ return video_file, "✅ Live recording processed!"
 
 
 
 
 
 
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  except Exception as e:
132
+ return None, f"❌ Error: {str(e)}"
133
 
134
+ # Main Gradio Interface
135
  with gr.Blocks(
 
136
  theme=gr.themes.Soft(),
137
+ css=mobile_css,
138
+ title="Advanced Lip Sync Tool",
139
+ analytics_enabled=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  ) as demo:
141
 
142
+ # Header
143
+ gr.Markdown(
144
+ """
145
+ # 🎬 Advanced Lip Sync Tool
146
+ ### Professional Mobile-Friendly Lip Synchronization
147
+
148
+ **💡 Features:**
149
+ - 🎤 Microphone & File Audio Input
150
+ - 📝 Text-to-Speech Integration
151
+ - 🔴 Live Recording Support
152
+ - 📱 Mobile Responsive Design
153
+ """,
154
+ elem_classes=["header-title"]
155
+ )
156
 
157
  with gr.Tabs():
158
+ # Tab 1: Microphone + Video
159
+ with gr.TabItem("🎤 Microphone + Video", elem_id="tab-mic"):
 
 
 
160
  with gr.Row():
161
+ with gr.Column(scale=1):
162
+ gr.Markdown("### 📹 Upload Video", elem_classes=["feature-card"])
163
  video_input1 = gr.Video(
164
+ label="Video File",
165
  height=300
166
  )
167
 
168
+ gr.Markdown("### 🎵 Audio Input", elem_classes=["feature-card"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  audio_input1 = gr.Audio(
170
+ label="Audio (Microphone یا File)",
171
  sources=["microphone", "upload"],
172
+ type="numpy"
 
 
 
 
 
 
 
 
 
 
173
  )
174
 
175
+ process_btn1 = gr.Button(
176
+ "🚀 Process Lip Sync",
177
+ variant="primary",
178
+ size="lg",
179
+ scale=2
180
+ )
181
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  with gr.Column(scale=1):
183
+ gr.Markdown("### 🎬 Result", elem_classes=["feature-card"])
184
+ output_video1 = gr.Video(
185
+ label="Processed Video",
186
+ height=300,
187
+ elem_classes=["output-video"]
188
+ )
189
+ output_message1 = gr.Textbox(
190
+ label="Status",
191
+ lines=4,
192
+ max_lines=6
193
+ )
194
 
195
  process_btn1.click(
196
+ process_lip_sync_basic,
197
+ inputs=[video_input1, audio_input1],
198
+ outputs=[output_video1, output_message1]
199
  )
200
 
201
+ # Tab 2: Text to Speech
202
+ with gr.TabItem("📝 Text to Speech", elem_id="tab-tts"):
 
 
203
  with gr.Row():
204
+ with gr.Column(scale=1):
205
+ gr.Markdown("### 📹 Upload Video", elem_classes=["feature-card"])
206
  video_input2 = gr.Video(
207
+ label="Video File",
208
+ height=250
209
  )
210
 
211
+ gr.Markdown("### 📝 Text Input", elem_classes=["feature-card"])
212
  text_input = gr.Textbox(
213
+ label="Text for Speech",
214
+ lines=4,
215
+ placeholder="یہاں اپنا text لکھیں جو speech میں convert ہوگا..."
 
 
 
 
 
 
 
 
216
  )
217
 
218
+ voice_type = gr.Dropdown(
219
+ label="🎭 Voice Type",
220
+ choices=["Male", "Female", "Child", "Robot"],
221
+ value="Female"
222
  )
223
 
224
+ process_btn2 = gr.Button(
225
+ "🗣️ Generate Speech + Lip Sync",
226
+ variant="primary",
227
+ size="lg"
228
+ )
229
+
230
+ with gr.Column(scale=1):
231
+ gr.Markdown("### 🎬 Result", elem_classes=["feature-card"])
232
+ output_video2 = gr.Video(
233
+ label="TTS Lip Sync Result",
234
+ height=300,
235
+ elem_classes=["output-video"]
236
+ )
237
+ output_message2 = gr.Textbox(
238
+ label="Status",
239
+ lines=4,
240
+ max_lines=6
241
  )
 
 
 
242
 
243
  process_btn2.click(
244
+ process_text_to_speech_sync,
245
+ inputs=[video_input2, text_input, voice_type],
246
+ outputs=[output_video2, output_message2]
247
  )
248
 
249
+ # Tab 3: Live Recording
250
+ with gr.TabItem("🔴 Live Recording", elem_id="tab-live"):
 
 
251
  with gr.Row():
252
+ with gr.Column(scale=1):
253
+ gr.Markdown("### 📹 Upload Video", elem_classes=["feature-card"])
254
+ video_input3 = gr.Video(
255
+ label="Video File",
256
+ height=250
257
+ )
258
+
259
+ gr.Markdown("### 🎙️ Live Recording", elem_classes=["feature-card"])
260
+ gr.Markdown("**Instructions:** Record button دبا کر real-time audio record کریں")
261
+
262
  live_audio = gr.Audio(
263
+ label="Live Audio Recording",
264
  sources=["microphone"],
265
  streaming=True,
 
266
  type="numpy"
267
  )
268
 
269
+ process_btn3 = gr.Button(
270
+ "🔴 Process Live Sync",
271
+ variant="primary",
272
+ size="lg"
273
+ )
 
 
274
 
275
+ with gr.Column(scale=1):
276
+ gr.Markdown("### 🎬 Live Result", elem_classes=["feature-card"])
277
+ output_video3 = gr.Video(
278
+ label="Live Sync Result",
279
+ height=300,
280
+ elem_classes=["output-video"]
 
281
  )
282
+ output_message3 = gr.Textbox(
283
+ label="Live Status",
284
+ lines=4,
285
+ max_lines=6
 
286
  )
287
 
288
+ process_btn3.click(
289
+ process_live_recording,
290
+ inputs=[video_input3, live_audio],
291
+ outputs=[output_video3, output_message3]
 
 
 
292
  )
293
+
294
+ # Tab 4: Advanced Settings
295
+ with gr.TabItem("⚙️ Advanced Settings", elem_id="tab-settings"):
296
+ gr.Markdown("### 🛠️ Model Configuration", elem_classes=["feature-card"])
 
297
 
298
+ model_choice = gr.Dropdown(
299
+ label="🤖 Lip Sync Model",
300
+ choices=["Wav2Lip (Fast)", "MuseTalk (Quality)", "SadTalker (Advanced)"],
301
+ value="Wav2Lip (Fast)"
302
+ )
303
 
304
+ quality_setting = gr.Slider(
305
+ label="📊 Output Quality",
306
+ minimum=480,
307
+ maximum=1080,
308
+ value=720,
309
+ step=240,
310
+ info="Higher = Better quality, Slower processing"
311
+ )
312
 
313
+ fps_setting = gr.Slider(
314
+ label="🎬 FPS Setting",
315
+ minimum=15,
316
+ maximum=60,
317
+ value=25,
318
+ step=5
319
+ )
320
 
321
+ gr.Markdown("### 📱 Mobile Optimization", elem_classes=["feature-card"])
 
 
 
322
 
323
+ mobile_mode = gr.Checkbox(
324
+ label="📱 Mobile Optimization Mode",
325
+ value=True,
326
+ info="Optimize for mobile devices (faster processing)"
327
+ )
328
 
329
+ batch_processing = gr.Checkbox(
330
+ label="⚡ Batch Processing",
331
+ value=False,
332
+ info="Process multiple files (desktop only)"
333
+ )
 
334
 
335
+ save_btn = gr.Button("💾 Save Settings", variant="secondary")
336
+ settings_status = gr.Textbox(label="Settings Status", lines=2)
337
+
338
+ save_btn.click(
339
+ lambda *args: "✅ Settings saved successfully!",
340
+ outputs=[settings_status]
341
+ )
342
+
343
  # Footer
344
+ gr.Markdown(
345
+ """
346
+ ---
347
+ ### 📱 Mobile Instructions:
348
+ - **iPad/Tablet:** All features fully supported
349
+ - **Phone:** Optimized for touch interactions
350
+ - **Performance:** Auto-adjusts based on device capabilities
351
+
352
+ **🔧 Powered by:** Gradio + Hugging Face Spaces | **👨‍💻 Author:** MiniMax Agent
353
+ """,
354
+ elem_classes=["feature-card"]
355
+ )
356
 
357
+ # Launch the app - FIXED VERSION
358
  if __name__ == "__main__":
359
+ demo.launch()