Spaces:
Running
on
Zero
Running
on
Zero
JacobLinCool
commited on
Commit
•
12b59af
1
Parent(s):
f325db1
feat: demuxed audio download
Browse files
app.py
CHANGED
@@ -160,7 +160,7 @@ def diarize_audio(task_id: str):
|
|
160 |
return filtered_segments
|
161 |
|
162 |
|
163 |
-
def generate_clips(task_id: str, speaker: str) -> Tuple[str, str]:
|
164 |
video = path.join("task", task_id, "video.mp4")
|
165 |
if not path.exists(video):
|
166 |
raise gr.Error("Video file not found")
|
@@ -205,7 +205,25 @@ def generate_clips(task_id: str, speaker: str) -> Tuple[str, str]:
|
|
205 |
for file in files:
|
206 |
zipf.write(path.join(segments, file), file)
|
207 |
|
208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
|
211 |
with gr.Blocks() as app:
|
@@ -219,6 +237,7 @@ with gr.Blocks() as app:
|
|
219 |
original_video = gr.Video(label="Upload a video", show_download_button=True)
|
220 |
preprocess_btn = gr.Button(value="Pre Process", variant="primary")
|
221 |
preprocess_btn_label = gr.Markdown("Press the button!")
|
|
|
222 |
|
223 |
with gr.Column(visible=False) as preprocess_output:
|
224 |
gr.Markdown(
|
@@ -228,9 +247,10 @@ with gr.Blocks() as app:
|
|
228 |
Next, let's remove the background music from the audio.
|
229 |
"""
|
230 |
)
|
231 |
-
|
232 |
-
|
233 |
-
|
|
|
234 |
|
235 |
extract_vocals_btn = gr.Button(
|
236 |
value="Remove Background Music", variant="primary"
|
@@ -238,8 +258,9 @@ with gr.Blocks() as app:
|
|
238 |
extract_vocals_btn_label = gr.Markdown("Press the button!")
|
239 |
|
240 |
with gr.Column(visible=False) as extract_vocals_output:
|
241 |
-
|
242 |
-
|
|
|
243 |
|
244 |
diarize_btn = gr.Button(value="Diarize", variant="primary")
|
245 |
diarize_btn_label = gr.Markdown("Press the button!")
|
@@ -250,8 +271,9 @@ with gr.Blocks() as app:
|
|
250 |
Now you can select the speaker from the dropdown below to generate the clips of the speaker.
|
251 |
"""
|
252 |
)
|
253 |
-
|
254 |
-
|
|
|
255 |
|
256 |
generate_clips_btn = gr.Button(value="Generate Clips", variant="primary")
|
257 |
generate_clips_btn_label = gr.Markdown("Press the button!")
|
@@ -259,6 +281,7 @@ with gr.Blocks() as app:
|
|
259 |
with gr.Column(visible=False) as generate_clips_output:
|
260 |
speaker_clip = gr.Video(label="Speaker Clip")
|
261 |
speaker_clip_zip = gr.File(label="Download Audio Segments")
|
|
|
262 |
|
263 |
def preprocess(video: str):
|
264 |
task_id_val, extracted_audio_val = extract_audio(video)
|
@@ -336,11 +359,12 @@ with gr.Blocks() as app:
|
|
336 |
)
|
337 |
|
338 |
def generate_clips_fn(task_id: str, speaker: str):
|
339 |
-
speaker_clip_val, zip_val = generate_clips(task_id, speaker)
|
340 |
return {
|
341 |
generate_clips_output: gr.Column(visible=True),
|
342 |
speaker_clip: speaker_clip_val,
|
343 |
speaker_clip_zip: zip_val,
|
|
|
344 |
generate_clips_btn_label: gr.Markdown("", visible=False),
|
345 |
}
|
346 |
|
@@ -351,6 +375,7 @@ with gr.Blocks() as app:
|
|
351 |
generate_clips_output,
|
352 |
speaker_clip,
|
353 |
speaker_clip_zip,
|
|
|
354 |
generate_clips_btn_label,
|
355 |
],
|
356 |
api_name="generate_clips",
|
|
|
160 |
return filtered_segments
|
161 |
|
162 |
|
163 |
+
def generate_clips(task_id: str, speaker: str) -> Tuple[str, str, str]:
|
164 |
video = path.join("task", task_id, "video.mp4")
|
165 |
if not path.exists(video):
|
166 |
raise gr.Error("Video file not found")
|
|
|
205 |
for file in files:
|
206 |
zipf.write(path.join(segments, file), file)
|
207 |
|
208 |
+
vocals = path.join("task", task_id, "htdemucs", "extracted_48k", "vocals.wav")
|
209 |
+
vocal_segments = path.join("task", task_id, f"{speaker}_vocals")
|
210 |
+
if not path.exists(vocal_segments):
|
211 |
+
os.makedirs(vocal_segments)
|
212 |
+
for i, segment in enumerate(filtered_segments[speaker]):
|
213 |
+
start = segment["start"]
|
214 |
+
end = segment["end"]
|
215 |
+
name = path.join(vocal_segments, f"{i}_{start:.2f}_{end:.2f}.wav")
|
216 |
+
cmd = f"ffmpeg -i {vocals} -ss {start} -to {end} -f wav {name}"
|
217 |
+
os.system(cmd)
|
218 |
+
|
219 |
+
vocal_segments_zip = path.join("task", task_id, f"{speaker}_vocals.zip")
|
220 |
+
if not path.exists(vocal_segments_zip):
|
221 |
+
with zipfile.ZipFile(vocal_segments_zip, "w") as zipf:
|
222 |
+
files = [f for f in os.listdir(vocal_segments) if f.endswith(".wav")]
|
223 |
+
for file in files:
|
224 |
+
zipf.write(path.join(vocal_segments, file), file)
|
225 |
+
|
226 |
+
return mp4, segments_zip, vocal_segments_zip
|
227 |
|
228 |
|
229 |
with gr.Blocks() as app:
|
|
|
237 |
original_video = gr.Video(label="Upload a video", show_download_button=True)
|
238 |
preprocess_btn = gr.Button(value="Pre Process", variant="primary")
|
239 |
preprocess_btn_label = gr.Markdown("Press the button!")
|
240 |
+
task_id = gr.Textbox(label="Task ID", visible=False)
|
241 |
|
242 |
with gr.Column(visible=False) as preprocess_output:
|
243 |
gr.Markdown(
|
|
|
247 |
Next, let's remove the background music from the audio.
|
248 |
"""
|
249 |
)
|
250 |
+
|
251 |
+
with gr.Row():
|
252 |
+
extracted_audio = gr.Audio(label="Extracted Audio", type="filepath")
|
253 |
+
extracted_audio_spec = gr.Image(label="Extracted Audio Spectrogram")
|
254 |
|
255 |
extract_vocals_btn = gr.Button(
|
256 |
value="Remove Background Music", variant="primary"
|
|
|
258 |
extract_vocals_btn_label = gr.Markdown("Press the button!")
|
259 |
|
260 |
with gr.Column(visible=False) as extract_vocals_output:
|
261 |
+
with gr.Row():
|
262 |
+
vocals = gr.Audio(label="Vocals", type="filepath")
|
263 |
+
vocals_spec = gr.Image(label="Vocals Spectrogram")
|
264 |
|
265 |
diarize_btn = gr.Button(value="Diarize", variant="primary")
|
266 |
diarize_btn_label = gr.Markdown("Press the button!")
|
|
|
271 |
Now you can select the speaker from the dropdown below to generate the clips of the speaker.
|
272 |
"""
|
273 |
)
|
274 |
+
with gr.Row():
|
275 |
+
speaker_select = gr.Dropdown(label="Speaker", choices=[])
|
276 |
+
diarization_result = gr.Markdown("", height=400)
|
277 |
|
278 |
generate_clips_btn = gr.Button(value="Generate Clips", variant="primary")
|
279 |
generate_clips_btn_label = gr.Markdown("Press the button!")
|
|
|
281 |
with gr.Column(visible=False) as generate_clips_output:
|
282 |
speaker_clip = gr.Video(label="Speaker Clip")
|
283 |
speaker_clip_zip = gr.File(label="Download Audio Segments")
|
284 |
+
speaker_clip_vocal_zip = gr.File(label="Download Vocal Segments")
|
285 |
|
286 |
def preprocess(video: str):
|
287 |
task_id_val, extracted_audio_val = extract_audio(video)
|
|
|
359 |
)
|
360 |
|
361 |
def generate_clips_fn(task_id: str, speaker: str):
|
362 |
+
speaker_clip_val, zip_val, vocal_zip_val = generate_clips(task_id, speaker)
|
363 |
return {
|
364 |
generate_clips_output: gr.Column(visible=True),
|
365 |
speaker_clip: speaker_clip_val,
|
366 |
speaker_clip_zip: zip_val,
|
367 |
+
speaker_clip_vocal_zip: vocal_zip_val,
|
368 |
generate_clips_btn_label: gr.Markdown("", visible=False),
|
369 |
}
|
370 |
|
|
|
375 |
generate_clips_output,
|
376 |
speaker_clip,
|
377 |
speaker_clip_zip,
|
378 |
+
speaker_clip_vocal_zip,
|
379 |
generate_clips_btn_label,
|
380 |
],
|
381 |
api_name="generate_clips",
|