sheikhed commited on
Commit
da25681
·
verified ·
1 Parent(s): 82e496e

Upload 7 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ models/Ava.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ models/Maria.mp4 filter=lfs diff=lfs merge=lfs -text
38
+ models/Minh.mp4 filter=lfs diff=lfs merge=lfs -text
39
+ models/Sonam.mp4 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import json
4
+ import time
5
+ import subprocess
6
+ import gradio as gr
7
+ import uuid
8
+ from dotenv import load_dotenv
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ # API Keys
14
+ A_KEY = os.getenv("A_KEY")
15
+ B_KEY = os.getenv("B_KEY")
16
+
17
+ # URLs
18
+ API_URL = os.getenv("API_URL")
19
+ UPLOAD_URL = os.getenv("UPLOAD_URL")
20
+
21
+ def get_voices():
22
+ url = "https://api.elevenlabs.io/v1/voices"
23
+ headers = {
24
+ "Accept": "application/json",
25
+ "xi-api-key": A_KEY
26
+ }
27
+
28
+ response = requests.get(url, headers=headers)
29
+ if response.status_code != 200:
30
+ return []
31
+ return [(voice['name'], voice['voice_id']) for voice in response.json().get('voices', [])]
32
+
33
+ def get_video_models():
34
+ return [f for f in os.listdir("models") if f.endswith((".mp4", ".avi", ".mov"))]
35
+
36
+ def text_to_speech(voice_id, text, session_id):
37
+ url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
38
+
39
+ headers = {
40
+ "Accept": "audio/mpeg",
41
+ "Content-Type": "application/json",
42
+ "xi-api-key": A_KEY
43
+ }
44
+
45
+ data = {
46
+ "text": text,
47
+ "model_id": "eleven_turbo_v2_5",
48
+ "voice_settings": {
49
+ "stability": 0.5,
50
+ "similarity_boost": 0.5
51
+ }
52
+ }
53
+
54
+ response = requests.post(url, json=data, headers=headers)
55
+ if response.status_code != 200:
56
+ return None
57
+
58
+ # Save temporary audio file with session ID
59
+ audio_file_path = f'temp_voice_{session_id}.mp3'
60
+ with open(audio_file_path, 'wb') as audio_file:
61
+ audio_file.write(response.content)
62
+ return audio_file_path
63
+
64
+ def upload_file(file_path):
65
+ with open(file_path, 'rb') as file:
66
+ files = {'fileToUpload': (os.path.basename(file_path), file)}
67
+ data = {'reqtype': 'fileupload'}
68
+ response = requests.post(UPLOAD_URL, files=files, data=data)
69
+
70
+ if response.status_code == 200:
71
+ return response.text.strip()
72
+ return None
73
+
74
+ def lipsync_api_call(video_url, audio_url):
75
+ headers = {
76
+ "Content-Type": "application/json",
77
+ "x-api-key": B_KEY
78
+ }
79
+
80
+ data = {
81
+ "audioUrl": audio_url,
82
+ "videoUrl": video_url,
83
+ "maxCredits": 1000,
84
+ "model": "sync-1.6.0",
85
+ "synergize": True,
86
+ "pads": [0, 5, 0, 0],
87
+ "synergizerStrength": 1
88
+ }
89
+
90
+ response = requests.post(API_URL, headers=headers, data=json.dumps(data))
91
+ return response.json()
92
+
93
+ def check_job_status(job_id):
94
+ headers = {"x-api-key": B_KEY}
95
+ max_attempts = 30 # Limit the number of attempts
96
+
97
+ for _ in range(max_attempts):
98
+ response = requests.get(f"{API_URL}/{job_id}", headers=headers)
99
+ data = response.json()
100
+
101
+ if data["status"] == "COMPLETED":
102
+ return data["videoUrl"]
103
+ elif data["status"] == "FAILED":
104
+ return None
105
+
106
+ time.sleep(10)
107
+ return None
108
+
109
+ def combine_audio_video(video_path, audio_path, output_path):
110
+ cmd = [
111
+ 'ffmpeg', '-i', video_path, '-i', audio_path,
112
+ '-map', '0:v', '-map', '1:a',
113
+ '-c:v', 'copy', '-c:a', 'aac',
114
+ '-shortest', '-y', output_path
115
+ ]
116
+ subprocess.run(cmd, check=True)
117
+
118
+ def process_video(voice, model, text, progress=gr.Progress()):
119
+ session_id = str(uuid.uuid4()) # Generate a unique session ID
120
+ progress(0, desc="Generating speech...")
121
+ audio_path = text_to_speech(voice, text, session_id)
122
+ if not audio_path:
123
+ return None, "Failed to generate speech audio."
124
+
125
+ progress(0.2, desc="Processing video...")
126
+ video_path = os.path.join("models", model)
127
+
128
+ try:
129
+ progress(0.3, desc="Uploading files...")
130
+ video_url = upload_file(video_path)
131
+ audio_url = upload_file(audio_path)
132
+
133
+ if not video_url or not audio_url:
134
+ raise Exception("Failed to upload files")
135
+
136
+ progress(0.4, desc="Initiating lipsync...")
137
+ job_data = lipsync_api_call(video_url, audio_url)
138
+
139
+ if "error" in job_data or "message" in job_data:
140
+ raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))
141
+
142
+ job_id = job_data["id"]
143
+
144
+ progress(0.5, desc="Processing lipsync...")
145
+ result_url = check_job_status(job_id)
146
+
147
+ if result_url:
148
+ progress(0.9, desc="Downloading result...")
149
+ response = requests.get(result_url)
150
+ output_path = f"output_{session_id}.mp4"
151
+ with open(output_path, "wb") as f:
152
+ f.write(response.content)
153
+ progress(1.0, desc="Complete!")
154
+ return output_path, "Lipsync completed successfully!"
155
+ else:
156
+ raise Exception("Lipsync processing failed or timed out")
157
+
158
+ except Exception as e:
159
+ progress(0.8, desc="Falling back to simple combination...")
160
+ try:
161
+ output_path = f"output_{session_id}.mp4"
162
+ combine_audio_video(video_path, audio_path, output_path)
163
+ progress(1.0, desc="Complete!")
164
+ return output_path, f"Used fallback method. Original error: {str(e)}"
165
+ except Exception as fallback_error:
166
+ return None, f"All methods failed. Error: {str(fallback_error)}"
167
+ finally:
168
+ # Cleanup
169
+ if os.path.exists(audio_path):
170
+ os.remove(audio_path)
171
+
172
+ def create_interface():
173
+ voices = get_voices()
174
+ models = get_video_models()
175
+
176
+ with gr.Blocks() as app:
177
+ gr.Markdown("# JSON Train")
178
+ with gr.Row():
179
+ with gr.Column():
180
+ voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select", value=voices[0][0] if voices else None)
181
+ model_dropdown = gr.Dropdown(choices=models, label="Select", value=models[0] if models else None)
182
+ text_input = gr.Textbox(label="json", lines=3)
183
+ generate_btn = gr.Button("Generate Video")
184
+ with gr.Column():
185
+ video_output = gr.Video(label="Generated Video")
186
+ status_output = gr.Textbox(label="Status", interactive=False)
187
+
188
+ def on_generate(voice_name, model_name, text):
189
+ voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
190
+ if not voice_id:
191
+ return None, "Invalid voice selected."
192
+ return process_video(voice_id, model_name, text)
193
+
194
+ generate_btn.click(
195
+ fn=on_generate,
196
+ inputs=[voice_dropdown, model_dropdown, text_input],
197
+ outputs=[video_output, status_output]
198
+ )
199
+
200
+ return app
201
+
202
+ if __name__ == "__main__":
203
+ app = create_interface()
204
+ app.launch()
models/Ava.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7061c276f710102e04b2a8a8cac40cb570a3a5e4ef46a49135f55a7726f8543
3
+ size 13530669
models/Lan.mp4 ADDED
Binary file (992 kB). View file
 
models/Maria.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4b960ecaee32807375f8770089565b99f0e454c5bcf940b0927adc211204bec
3
+ size 17503551
models/Minh.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3888418f8d895093ba7da7bf98013d6bc43d78529ae707f6ce891314fd8792f
3
+ size 1228064
models/Sonam.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a4afc8b3bc405f3307051bbb532478b3abfc9766e9ad21105248c8bf9ee5030
3
+ size 30486669
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ requests
2
+ gradio