sheikhed commited on
Commit
dc2c5a4
·
verified ·
1 Parent(s): f69554c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -62
app.py CHANGED
@@ -5,13 +5,8 @@ import subprocess
5
  import gradio as gr
6
  import uuid
7
  import os
8
- import logging
9
  from dotenv import load_dotenv
10
 
11
- # Set up logging
12
- logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
13
- logger = logging.getLogger(__name__)
14
-
15
  # Load environment variables
16
  load_dotenv()
17
 
@@ -35,7 +30,6 @@ def get_voices():
35
  ]
36
 
37
  def text_to_speech(voice, text, session_id):
38
- logger.info(f"Starting text-to-speech conversion for session {session_id}")
39
  url = "https://api.openai.com/v1/audio/speech"
40
 
41
  headers = {
@@ -49,34 +43,27 @@ def text_to_speech(voice, text, session_id):
49
  "voice": voice
50
  }
51
 
52
- logger.debug(f"Sending request to OpenAI TTS API for session {session_id}")
53
  response = requests.post(url, json=data, headers=headers)
54
  if response.status_code != 200:
55
- logger.error(f"Failed to generate speech audio for session {session_id}. Status code: {response.status_code}")
56
  return None
57
 
58
  # Save temporary audio file with session ID
59
  audio_file_path = f'tempvoice{session_id}.mp3'
60
  with open(audio_file_path, 'wb') as audio_file:
61
  audio_file.write(response.content)
62
- logger.info(f"Audio file saved: {audio_file_path}")
63
  return audio_file_path
64
 
65
  def upload_file(file_path):
66
- logger.info(f"Uploading file: {file_path}")
67
  with open(file_path, 'rb') as file:
68
  files = {'fileToUpload': (os.path.basename(file_path), file)}
69
  data = {'reqtype': 'fileupload'}
70
  response = requests.post(UPLOAD_URL, files=files, data=data)
71
 
72
  if response.status_code == 200:
73
- logger.info(f"File uploaded successfully: {file_path}")
74
  return response.text.strip()
75
- logger.error(f"Failed to upload file: {file_path}. Status code: {response.status_code}")
76
  return None
77
 
78
  def lipsync_api_call(video_url, audio_url):
79
- logger.info(f"Initiating lip-sync API call with video: {video_url} and audio: {audio_url}")
80
  headers = {
81
  "Authorization": f"Bearer {REPLICATE_API_TOKEN}",
82
  "Content-Type": "application/json",
@@ -91,50 +78,38 @@ def lipsync_api_call(video_url, audio_url):
91
  }
92
  }
93
 
94
- logger.debug(f"Sending request to Replicate API with data: {json.dumps(data)}")
95
  response = requests.post(REPLICATE_API_URL, headers=headers, json=data)
96
- logger.debug(f"Received response from Replicate API: {response.text}")
97
  return response.json()
98
 
99
  def check_job_status(prediction_id):
100
- logger.info(f"Checking job status for prediction ID: {prediction_id}")
101
  headers = {"Authorization": f"Bearer {REPLICATE_API_TOKEN}"}
102
  max_attempts = 30 # Limit the number of attempts
103
 
104
- for attempt in range(max_attempts):
105
- logger.debug(f"Attempt {attempt + 1} to check job status")
106
  response = requests.get(f"{REPLICATE_API_URL}/{prediction_id}", headers=headers)
107
  data = response.json()
108
- logger.debug(f"Job status response: {json.dumps(data)}")
109
 
110
  if data["status"] == "succeeded":
111
- logger.info(f"Job completed successfully for prediction ID: {prediction_id}")
112
  return data["output"]
113
  elif data["status"] == "failed":
114
- logger.error(f"Job failed for prediction ID: {prediction_id}")
115
  return None
116
 
117
- logger.info(f"Job still in progress. Waiting for 10 seconds before next check.")
118
  time.sleep(10)
119
-
120
- logger.warning(f"Max attempts reached for prediction ID: {prediction_id}")
121
  return None
122
 
123
  def get_media_duration(file_path):
124
- logger.info(f"Getting media duration for: {file_path}")
125
  cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
126
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
127
- duration = float(result.stdout.strip())
128
- logger.info(f"Media duration: {duration} seconds")
129
- return duration
130
 
131
  def combine_audio_video(video_path, audio_path, output_path):
132
- logger.info(f"Combining audio and video: video={video_path}, audio={audio_path}, output={output_path}")
133
  video_duration = get_media_duration(video_path)
134
  audio_duration = get_media_duration(audio_path)
135
 
136
  if video_duration > audio_duration:
137
- logger.info("Video longer than audio. Trimming video.")
138
  cmd = [
139
  'ffmpeg', '-i', video_path, '-i', audio_path,
140
  '-t', str(audio_duration), # Trim video to audio duration
@@ -143,60 +118,53 @@ def combine_audio_video(video_path, audio_path, output_path):
143
  '-y', output_path
144
  ]
145
  else:
146
- logger.info("Audio longer than video. Looping video.")
147
- loop_count = int(audio_duration // video_duration) + 1
148
  cmd = [
149
  'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
150
- '-t', str(audio_duration),
151
  '-map', '0:v', '-map', '1:a',
152
  '-c:v', 'copy', '-c:a', 'aac',
153
  '-shortest', '-y', output_path
154
  ]
155
 
156
- logger.debug(f"Running ffmpeg command: {' '.join(cmd)}")
157
  subprocess.run(cmd, check=True)
158
- logger.info(f"Audio and video combined successfully: {output_path}")
159
 
160
  def create_video_from_image(image_url, session_id):
161
- logger.info(f"Creating video from image: {image_url}")
162
  response = requests.get(image_url)
163
  image_path = f"tempimage{session_id}.jpg"
164
  with open(image_path, "wb") as f:
165
  f.write(response.content)
166
- logger.info(f"Image downloaded: {image_path}")
167
 
 
168
  video_path = f"tempvideo{session_id}.mp4"
169
  cmd = [
170
  'ffmpeg', '-loop', '1', '-i', image_path,
171
- '-vf', 'scale=trunc(iw/2)*2:trunc(ih/2)*2',
172
  '-c:v', 'libx264', '-t', '10', '-pix_fmt', 'yuv420p',
173
  video_path
174
  ]
175
- logger.debug(f"Running ffmpeg command: {' '.join(cmd)}")
176
  subprocess.run(cmd, check=True)
177
- logger.info(f"Video created from image: {video_path}")
178
 
 
179
  os.remove(image_path)
180
- logger.info(f"Temporary image file removed: {image_path}")
181
 
182
  return video_path
183
 
184
  def process_video(voice, url, text, progress=gr.Progress()):
185
- session_id = str(uuid.uuid4())
186
- logger.info(f"Starting video processing for session {session_id}")
187
  progress(0, desc="Generating speech...")
188
  audio_path = text_to_speech(voice, text, session_id)
189
  if not audio_path:
190
- logger.error(f"Failed to generate speech audio for session {session_id}")
191
  return None, "Failed to generate speech audio."
192
 
193
  progress(0.2, desc="Processing media...")
194
 
195
  try:
196
- logger.info(f"Checking content type of URL: {url}")
197
  response = requests.head(url)
198
  content_type = response.headers.get('Content-Type', '')
199
- logger.info(f"Content type of URL: {content_type}")
200
 
201
  if content_type.startswith('image'):
202
  progress(0.3, desc="Converting image to video...")
@@ -205,49 +173,42 @@ def process_video(voice, url, text, progress=gr.Progress()):
205
  else:
206
  video_url = url
207
 
208
- logger.info(f"Video URL: {video_url}")
209
  progress(0.4, desc="Uploading audio...")
210
  audio_url = upload_file(audio_path)
211
- logger.info(f"Audio URL: {audio_url}")
212
 
213
  if not audio_url or not video_url:
214
  raise Exception("Failed to upload audio or video file")
215
 
216
  progress(0.5, desc="Initiating lipsync...")
217
  job_data = lipsync_api_call(video_url, audio_url)
218
- logger.info(f"Lipsync job data: {json.dumps(job_data)}")
219
 
220
  if "error" in job_data:
221
  raise Exception(job_data.get("error", "Unknown error"))
222
 
223
  prediction_id = job_data["id"]
224
- logger.info(f"Lipsync prediction ID: {prediction_id}")
225
 
226
  progress(0.6, desc="Processing lipsync...")
227
  result_url = check_job_status(prediction_id)
228
 
229
  if result_url:
230
- logger.info(f"Lipsync result URL: {result_url}")
231
  progress(0.9, desc="Downloading result...")
232
  response = requests.get(result_url)
233
  output_path = f"output{session_id}.mp4"
234
- with open(output_path, 'wb') as f:
235
  f.write(response.content)
236
- logger.info(f"Lipsync result saved to: {output_path}")
237
  progress(1.0, desc="Complete!")
238
  return output_path, "Lipsync completed successfully!"
239
  else:
240
  raise Exception("Lipsync processing failed or timed out")
241
 
242
  except Exception as e:
243
- logger.error(f"Error during lipsync process: {str(e)}")
244
  progress(0.8, desc="Falling back to simple combination...")
245
  try:
246
  if 'video_path' not in locals():
247
- logger.info("Downloading video from URL")
248
  video_response = requests.get(video_url)
249
  video_path = f"tempvideo{session_id}.mp4"
250
- with open(video_path, 'wb') as f:
251
  f.write(video_response.content)
252
 
253
  output_path = f"output{session_id}.mp4"
@@ -255,16 +216,13 @@ def process_video(voice, url, text, progress=gr.Progress()):
255
  progress(1.0, desc="Complete!")
256
  return output_path, f"Used fallback method. Original error: {str(e)}"
257
  except Exception as fallback_error:
258
- logger.error(f"Fallback method failed: {str(fallback_error)}")
259
  return None, f"All methods failed. Error: {str(fallback_error)}"
260
  finally:
261
  # Cleanup
262
  if os.path.exists(audio_path):
263
  os.remove(audio_path)
264
- logger.info(f"Removed temporary audio file: {audio_path}")
265
  if os.path.exists(f"tempvideo{session_id}.mp4"):
266
  os.remove(f"tempvideo{session_id}.mp4")
267
- logger.info(f"Removed temporary video file: tempvideo{session_id}.mp4")
268
 
269
  def create_interface():
270
  voices = get_voices()
@@ -281,10 +239,8 @@ def create_interface():
281
  video_output = gr.Video(label="Generated Video")
282
  status_output = gr.Textbox(label="Status", interactive=False)
283
  def on_generate(voice_name, url, text):
284
- logger.info(f"Generation started with voice: {voice_name}, URL: {url}")
285
  voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
286
  if not voice_id:
287
- logger.error(f"Invalid voice selected: {voice_name}")
288
  return None, "Invalid voice selected."
289
  return process_video(voice_id, url, text)
290
  generate_btn.click(
@@ -295,6 +251,5 @@ def create_interface():
295
  return app
296
 
297
  if __name__ == "__main__":
298
- logger.info("Starting the application")
299
  app = create_interface()
300
  app.launch()
 
5
  import gradio as gr
6
  import uuid
7
  import os
 
8
  from dotenv import load_dotenv
9
 
 
 
 
 
10
  # Load environment variables
11
  load_dotenv()
12
 
 
30
  ]
31
 
32
  def text_to_speech(voice, text, session_id):
 
33
  url = "https://api.openai.com/v1/audio/speech"
34
 
35
  headers = {
 
43
  "voice": voice
44
  }
45
 
 
46
  response = requests.post(url, json=data, headers=headers)
47
  if response.status_code != 200:
 
48
  return None
49
 
50
  # Save temporary audio file with session ID
51
  audio_file_path = f'tempvoice{session_id}.mp3'
52
  with open(audio_file_path, 'wb') as audio_file:
53
  audio_file.write(response.content)
 
54
  return audio_file_path
55
 
56
  def upload_file(file_path):
 
57
  with open(file_path, 'rb') as file:
58
  files = {'fileToUpload': (os.path.basename(file_path), file)}
59
  data = {'reqtype': 'fileupload'}
60
  response = requests.post(UPLOAD_URL, files=files, data=data)
61
 
62
  if response.status_code == 200:
 
63
  return response.text.strip()
 
64
  return None
65
 
66
  def lipsync_api_call(video_url, audio_url):
 
67
  headers = {
68
  "Authorization": f"Bearer {REPLICATE_API_TOKEN}",
69
  "Content-Type": "application/json",
 
78
  }
79
  }
80
 
 
81
  response = requests.post(REPLICATE_API_URL, headers=headers, json=data)
 
82
  return response.json()
83
 
84
  def check_job_status(prediction_id):
 
85
  headers = {"Authorization": f"Bearer {REPLICATE_API_TOKEN}"}
86
  max_attempts = 30 # Limit the number of attempts
87
 
88
+ for _ in range(max_attempts):
 
89
  response = requests.get(f"{REPLICATE_API_URL}/{prediction_id}", headers=headers)
90
  data = response.json()
 
91
 
92
  if data["status"] == "succeeded":
 
93
  return data["output"]
94
  elif data["status"] == "failed":
 
95
  return None
96
 
 
97
  time.sleep(10)
 
 
98
  return None
99
 
100
  def get_media_duration(file_path):
101
+ # Fetch media duration using ffprobe
102
  cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
103
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
104
+ return float(result.stdout.strip())
 
 
105
 
106
  def combine_audio_video(video_path, audio_path, output_path):
107
+ # Get durations of both video and audio
108
  video_duration = get_media_duration(video_path)
109
  audio_duration = get_media_duration(audio_path)
110
 
111
  if video_duration > audio_duration:
112
+ # Trim video to match the audio length
113
  cmd = [
114
  'ffmpeg', '-i', video_path, '-i', audio_path,
115
  '-t', str(audio_duration), # Trim video to audio duration
 
118
  '-y', output_path
119
  ]
120
  else:
121
+ # Loop video if it's shorter than audio
122
+ loop_count = int(audio_duration // video_duration) + 1 # Calculate how many times to loop
123
  cmd = [
124
  'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
125
+ '-t', str(audio_duration), # Match the duration of the final video with the audio
126
  '-map', '0:v', '-map', '1:a',
127
  '-c:v', 'copy', '-c:a', 'aac',
128
  '-shortest', '-y', output_path
129
  ]
130
 
 
131
  subprocess.run(cmd, check=True)
 
132
 
133
  def create_video_from_image(image_url, session_id):
134
+ # Download the image
135
  response = requests.get(image_url)
136
  image_path = f"tempimage{session_id}.jpg"
137
  with open(image_path, "wb") as f:
138
  f.write(response.content)
 
139
 
140
+ # Create a 10-second video from the image
141
  video_path = f"tempvideo{session_id}.mp4"
142
  cmd = [
143
  'ffmpeg', '-loop', '1', '-i', image_path,
144
+ '-vf', 'scale=trunc(iw/2)*2:trunc(ih/2)*2', # Ensure width and height are divisible by 2
145
  '-c:v', 'libx264', '-t', '10', '-pix_fmt', 'yuv420p',
146
  video_path
147
  ]
 
148
  subprocess.run(cmd, check=True)
 
149
 
150
+ # Clean up the temporary image file
151
  os.remove(image_path)
 
152
 
153
  return video_path
154
 
155
  def process_video(voice, url, text, progress=gr.Progress()):
156
+ session_id = str(uuid.uuid4()) # Generate a unique session ID
 
157
  progress(0, desc="Generating speech...")
158
  audio_path = text_to_speech(voice, text, session_id)
159
  if not audio_path:
 
160
  return None, "Failed to generate speech audio."
161
 
162
  progress(0.2, desc="Processing media...")
163
 
164
  try:
165
+ # Check if the URL is an image
166
  response = requests.head(url)
167
  content_type = response.headers.get('Content-Type', '')
 
168
 
169
  if content_type.startswith('image'):
170
  progress(0.3, desc="Converting image to video...")
 
173
  else:
174
  video_url = url
175
 
 
176
  progress(0.4, desc="Uploading audio...")
177
  audio_url = upload_file(audio_path)
 
178
 
179
  if not audio_url or not video_url:
180
  raise Exception("Failed to upload audio or video file")
181
 
182
  progress(0.5, desc="Initiating lipsync...")
183
  job_data = lipsync_api_call(video_url, audio_url)
 
184
 
185
  if "error" in job_data:
186
  raise Exception(job_data.get("error", "Unknown error"))
187
 
188
  prediction_id = job_data["id"]
 
189
 
190
  progress(0.6, desc="Processing lipsync...")
191
  result_url = check_job_status(prediction_id)
192
 
193
  if result_url:
 
194
  progress(0.9, desc="Downloading result...")
195
  response = requests.get(result_url)
196
  output_path = f"output{session_id}.mp4"
197
+ with open(output_path, "wb") as f:
198
  f.write(response.content)
 
199
  progress(1.0, desc="Complete!")
200
  return output_path, "Lipsync completed successfully!"
201
  else:
202
  raise Exception("Lipsync processing failed or timed out")
203
 
204
  except Exception as e:
 
205
  progress(0.8, desc="Falling back to simple combination...")
206
  try:
207
  if 'video_path' not in locals():
208
+ # Download the video from the URL if it wasn't created from an image
209
  video_response = requests.get(video_url)
210
  video_path = f"tempvideo{session_id}.mp4"
211
+ with open(video_path, "wb") as f:
212
  f.write(video_response.content)
213
 
214
  output_path = f"output{session_id}.mp4"
 
216
  progress(1.0, desc="Complete!")
217
  return output_path, f"Used fallback method. Original error: {str(e)}"
218
  except Exception as fallback_error:
 
219
  return None, f"All methods failed. Error: {str(fallback_error)}"
220
  finally:
221
  # Cleanup
222
  if os.path.exists(audio_path):
223
  os.remove(audio_path)
 
224
  if os.path.exists(f"tempvideo{session_id}.mp4"):
225
  os.remove(f"tempvideo{session_id}.mp4")
 
226
 
227
  def create_interface():
228
  voices = get_voices()
 
239
  video_output = gr.Video(label="Generated Video")
240
  status_output = gr.Textbox(label="Status", interactive=False)
241
  def on_generate(voice_name, url, text):
 
242
  voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
243
  if not voice_id:
 
244
  return None, "Invalid voice selected."
245
  return process_video(voice_id, url, text)
246
  generate_btn.click(
 
251
  return app
252
 
253
  if __name__ == "__main__":
 
254
  app = create_interface()
255
  app.launch()