JacobLinCool commited on
Commit
12b59af
1 Parent(s): f325db1

feat: demuxed audio download

Browse files
Files changed (1) hide show
  1. app.py +35 -10
app.py CHANGED
@@ -160,7 +160,7 @@ def diarize_audio(task_id: str):
160
  return filtered_segments
161
 
162
 
163
- def generate_clips(task_id: str, speaker: str) -> Tuple[str, str]:
164
  video = path.join("task", task_id, "video.mp4")
165
  if not path.exists(video):
166
  raise gr.Error("Video file not found")
@@ -205,7 +205,25 @@ def generate_clips(task_id: str, speaker: str) -> Tuple[str, str]:
205
  for file in files:
206
  zipf.write(path.join(segments, file), file)
207
 
208
- return mp4, segments_zip
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
 
211
  with gr.Blocks() as app:
@@ -219,6 +237,7 @@ with gr.Blocks() as app:
219
  original_video = gr.Video(label="Upload a video", show_download_button=True)
220
  preprocess_btn = gr.Button(value="Pre Process", variant="primary")
221
  preprocess_btn_label = gr.Markdown("Press the button!")
 
222
 
223
  with gr.Column(visible=False) as preprocess_output:
224
  gr.Markdown(
@@ -228,9 +247,10 @@ with gr.Blocks() as app:
228
  Next, let's remove the background music from the audio.
229
  """
230
  )
231
- task_id = gr.Textbox(label="Task ID", visible=False)
232
- extracted_audio = gr.Audio(label="Extracted Audio", type="filepath")
233
- extracted_audio_spec = gr.Image(label="Extracted Audio Spectrogram")
 
234
 
235
  extract_vocals_btn = gr.Button(
236
  value="Remove Background Music", variant="primary"
@@ -238,8 +258,9 @@ with gr.Blocks() as app:
238
  extract_vocals_btn_label = gr.Markdown("Press the button!")
239
 
240
  with gr.Column(visible=False) as extract_vocals_output:
241
- vocals = gr.Audio(label="Vocals", type="filepath")
242
- vocals_spec = gr.Image(label="Vocals Spectrogram")
 
243
 
244
  diarize_btn = gr.Button(value="Diarize", variant="primary")
245
  diarize_btn_label = gr.Markdown("Press the button!")
@@ -250,8 +271,9 @@ with gr.Blocks() as app:
250
  Now you can select the speaker from the dropdown below to generate the clips of the speaker.
251
  """
252
  )
253
- speaker_select = gr.Dropdown(label="Speaker", choices=[])
254
- diarization_result = gr.Markdown("")
 
255
 
256
  generate_clips_btn = gr.Button(value="Generate Clips", variant="primary")
257
  generate_clips_btn_label = gr.Markdown("Press the button!")
@@ -259,6 +281,7 @@ with gr.Blocks() as app:
259
  with gr.Column(visible=False) as generate_clips_output:
260
  speaker_clip = gr.Video(label="Speaker Clip")
261
  speaker_clip_zip = gr.File(label="Download Audio Segments")
 
262
 
263
  def preprocess(video: str):
264
  task_id_val, extracted_audio_val = extract_audio(video)
@@ -336,11 +359,12 @@ with gr.Blocks() as app:
336
  )
337
 
338
  def generate_clips_fn(task_id: str, speaker: str):
339
- speaker_clip_val, zip_val = generate_clips(task_id, speaker)
340
  return {
341
  generate_clips_output: gr.Column(visible=True),
342
  speaker_clip: speaker_clip_val,
343
  speaker_clip_zip: zip_val,
 
344
  generate_clips_btn_label: gr.Markdown("", visible=False),
345
  }
346
 
@@ -351,6 +375,7 @@ with gr.Blocks() as app:
351
  generate_clips_output,
352
  speaker_clip,
353
  speaker_clip_zip,
 
354
  generate_clips_btn_label,
355
  ],
356
  api_name="generate_clips",
 
160
  return filtered_segments
161
 
162
 
163
+ def generate_clips(task_id: str, speaker: str) -> Tuple[str, str, str]:
164
  video = path.join("task", task_id, "video.mp4")
165
  if not path.exists(video):
166
  raise gr.Error("Video file not found")
 
205
  for file in files:
206
  zipf.write(path.join(segments, file), file)
207
 
208
+ vocals = path.join("task", task_id, "htdemucs", "extracted_48k", "vocals.wav")
209
+ vocal_segments = path.join("task", task_id, f"{speaker}_vocals")
210
+ if not path.exists(vocal_segments):
211
+ os.makedirs(vocal_segments)
212
+ for i, segment in enumerate(filtered_segments[speaker]):
213
+ start = segment["start"]
214
+ end = segment["end"]
215
+ name = path.join(vocal_segments, f"{i}_{start:.2f}_{end:.2f}.wav")
216
+ cmd = f"ffmpeg -i {vocals} -ss {start} -to {end} -f wav {name}"
217
+ os.system(cmd)
218
+
219
+ vocal_segments_zip = path.join("task", task_id, f"{speaker}_vocals.zip")
220
+ if not path.exists(vocal_segments_zip):
221
+ with zipfile.ZipFile(vocal_segments_zip, "w") as zipf:
222
+ files = [f for f in os.listdir(vocal_segments) if f.endswith(".wav")]
223
+ for file in files:
224
+ zipf.write(path.join(vocal_segments, file), file)
225
+
226
+ return mp4, segments_zip, vocal_segments_zip
227
 
228
 
229
  with gr.Blocks() as app:
 
237
  original_video = gr.Video(label="Upload a video", show_download_button=True)
238
  preprocess_btn = gr.Button(value="Pre Process", variant="primary")
239
  preprocess_btn_label = gr.Markdown("Press the button!")
240
+ task_id = gr.Textbox(label="Task ID", visible=False)
241
 
242
  with gr.Column(visible=False) as preprocess_output:
243
  gr.Markdown(
 
247
  Next, let's remove the background music from the audio.
248
  """
249
  )
250
+
251
+ with gr.Row():
252
+ extracted_audio = gr.Audio(label="Extracted Audio", type="filepath")
253
+ extracted_audio_spec = gr.Image(label="Extracted Audio Spectrogram")
254
 
255
  extract_vocals_btn = gr.Button(
256
  value="Remove Background Music", variant="primary"
 
258
  extract_vocals_btn_label = gr.Markdown("Press the button!")
259
 
260
  with gr.Column(visible=False) as extract_vocals_output:
261
+ with gr.Row():
262
+ vocals = gr.Audio(label="Vocals", type="filepath")
263
+ vocals_spec = gr.Image(label="Vocals Spectrogram")
264
 
265
  diarize_btn = gr.Button(value="Diarize", variant="primary")
266
  diarize_btn_label = gr.Markdown("Press the button!")
 
271
  Now you can select the speaker from the dropdown below to generate the clips of the speaker.
272
  """
273
  )
274
+ with gr.Row():
275
+ speaker_select = gr.Dropdown(label="Speaker", choices=[])
276
+ diarization_result = gr.Markdown("", height=400)
277
 
278
  generate_clips_btn = gr.Button(value="Generate Clips", variant="primary")
279
  generate_clips_btn_label = gr.Markdown("Press the button!")
 
281
  with gr.Column(visible=False) as generate_clips_output:
282
  speaker_clip = gr.Video(label="Speaker Clip")
283
  speaker_clip_zip = gr.File(label="Download Audio Segments")
284
+ speaker_clip_vocal_zip = gr.File(label="Download Vocal Segments")
285
 
286
  def preprocess(video: str):
287
  task_id_val, extracted_audio_val = extract_audio(video)
 
359
  )
360
 
361
  def generate_clips_fn(task_id: str, speaker: str):
362
+ speaker_clip_val, zip_val, vocal_zip_val = generate_clips(task_id, speaker)
363
  return {
364
  generate_clips_output: gr.Column(visible=True),
365
  speaker_clip: speaker_clip_val,
366
  speaker_clip_zip: zip_val,
367
+ speaker_clip_vocal_zip: vocal_zip_val,
368
  generate_clips_btn_label: gr.Markdown("", visible=False),
369
  }
370
 
 
375
  generate_clips_output,
376
  speaker_clip,
377
  speaker_clip_zip,
378
+ speaker_clip_vocal_zip,
379
  generate_clips_btn_label,
380
  ],
381
  api_name="generate_clips",