goldpulpy commited on
Commit
b67dfdb
1 Parent(s): 62d3621

Edit requirements.txt and app

Browse files
Files changed (2) hide show
  1. app.py +132 -5
  2. requirements.txt +22 -0
app.py CHANGED
@@ -1,5 +1,132 @@
1
- import torch
2
- print(f"Is CUDA available: {torch.cuda.is_available()}")
3
- # True
4
- print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
5
- # Tesla T4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import subprocess
3
+ import os
4
+ import gradio
5
+ import gradio as gr
6
+ import shutil
7
+
8
+ current_dir = os.path.dirname(os.path.abspath(__file__))
9
+
10
+
11
+ def convert(segment_length, video, audio, progress=gradio.Progress()):
12
+ if segment_length is None:
13
+ segment_length=0
14
+ print(video, audio)
15
+
16
+ if segment_length != 0:
17
+ video_segments = cut_video_segments(video, segment_length)
18
+ audio_segments = cut_audio_segments(audio, segment_length)
19
+ else:
20
+ video_dir = 'temp/video'
21
+ audio_dir = 'temp/audio'
22
+
23
+ os.makedirs(video_dir, exist_ok=True)
24
+ os.makedirs(audio_dir, exist_ok=True)
25
+
26
+ video_path = os.path.join(video_dir, os.path.basename(video))
27
+ shutil.move(video, video_path)
28
+ video_segments = [video_path]
29
+
30
+ audio_path = os.path.join(audio_dir, os.path.basename(audio))
31
+ shutil.move(audio, audio_path)
32
+ audio_segments = [audio_path]
33
+
34
+ processed_segments = []
35
+ for i, (video_seg, audio_seg) in progress.tqdm(enumerate(zip(video_segments, audio_segments))):
36
+ processed_output = process_segment(video_seg, audio_seg, i)
37
+ processed_segments.append(processed_output)
38
+
39
+ output_file = f"results/output_{random.randint(0,1000)}.mp4"
40
+ concatenate_videos(processed_segments, output_file)
41
+
42
+ # Remove temporary files
43
+ cleanup_temp_files(video_segments + audio_segments)
44
+
45
+ # Return the concatenated video file
46
+ return output_file
47
+
48
+
49
+ def cleanup_temp_files(file_list):
50
+ for file_path in file_list:
51
+ if os.path.isfile(file_path):
52
+ os.remove(file_path)
53
+
54
+
55
+ def cut_video_segments(video_file, segment_length):
56
+ temp_directory = 'temp/audio'
57
+ shutil.rmtree(temp_directory, ignore_errors=True)
58
+ shutil.os.makedirs(temp_directory, exist_ok=True)
59
+ segment_template = f"{temp_directory}/{random.randint(0,1000)}_%03d.mp4"
60
+ command = ["ffmpeg", "-i", video_file, "-c", "copy", "-f",
61
+ "segment", "-segment_time", str(segment_length), segment_template]
62
+ subprocess.run(command, check=True)
63
+
64
+ video_segments = [segment_template %
65
+ i for i in range(len(os.listdir(temp_directory)))]
66
+ return video_segments
67
+
68
+
69
+ def cut_audio_segments(audio_file, segment_length):
70
+ temp_directory = 'temp/video'
71
+ shutil.rmtree(temp_directory, ignore_errors=True)
72
+ shutil.os.makedirs(temp_directory, exist_ok=True)
73
+ segment_template = f"{temp_directory}/{random.randint(0,1000)}_%03d.mp3"
74
+ command = ["ffmpeg", "-i", audio_file, "-f", "segment",
75
+ "-segment_time", str(segment_length), segment_template]
76
+ subprocess.run(command, check=True)
77
+
78
+ audio_segments = [segment_template %
79
+ i for i in range(len(os.listdir(temp_directory)))]
80
+ return audio_segments
81
+
82
+
83
+ def process_segment(video_seg, audio_seg, i):
84
+ output_file = f"results/{random.randint(10,100000)}_{i}.mp4"
85
+ command = ["python", "inference.py", "--face", video_seg,
86
+ "--audio", audio_seg, "--outfile", output_file]
87
+ subprocess.run(command, check=True)
88
+
89
+ return output_file
90
+
91
+
92
+ def concatenate_videos(video_segments, output_file):
93
+ with open("segments.txt", "w") as file:
94
+ for segment in video_segments:
95
+ file.write(f"file '{segment}'\n")
96
+ command = ["ffmpeg", "-f", "concat", "-i",
97
+ "segments.txt", "-c", "copy", output_file]
98
+ subprocess.run(command, check=True)
99
+
100
+
101
+ with gradio.Blocks(
102
+ title="Audio-based Lip Synchronization",
103
+ theme=gr.themes.Base(
104
+ primary_hue=gr.themes.colors.green,
105
+ font=["Source Sans Pro", "Arial", "sans-serif"],
106
+ font_mono=['JetBrains mono', "Consolas", 'Courier New']
107
+ ),
108
+ ) as demo:
109
+ with gradio.Row():
110
+ gradio.Markdown("# Audio-based Lip Synchronization")
111
+ with gradio.Row():
112
+ with gradio.Column():
113
+ with gradio.Row():
114
+ seg = gradio.Number(
115
+ label="segment length (Second), 0 for no segmentation")
116
+ with gradio.Row():
117
+ with gradio.Column():
118
+ v = gradio.Video(label='SOurce Face')
119
+
120
+ with gradio.Column():
121
+ a = gradio.Audio(
122
+ type='filepath', label='Target Audio')
123
+
124
+ with gradio.Row():
125
+ btn = gradio.Button(value="Synthesize",variant="primary")
126
+
127
+ with gradio.Column():
128
+ o = gradio.Video(label="Output Video")
129
+
130
+ btn.click(fn=convert, inputs=[seg, v, a], outputs=[o])
131
+
132
+ demo.queue().launch()
requirements.txt CHANGED
@@ -1,2 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  torch==2.0.1
2
  torchvision==0.15.2
 
1
+ basicsr==1.4.2
2
+ dlib==19.24.2
3
+ ninja
4
+ docopt==0.6.2
5
+ dominate==2.8.0
6
+ easydict==1.10
7
+ einops==0.7.0
8
+ face_alignment==1.4.1
9
+ facexlib==0.3.0
10
+ gradio==3.46.1
11
+ imageio==2.31.5
12
+ insightface==0.7.3
13
+ iou==0.1.0
14
+ kornia==0.7.0
15
+ librosa==0.8.0
16
+ matplotlib==3.7.1
17
+ menpo==0.11.0
18
+ mxnet==1.9.1
19
+ numpy==1.23.5
20
+ onnx==1.14.1
21
+ onnxruntime==1.16.0
22
+ onnxsim==0.4.33
23
  torch==2.0.1
24
  torchvision==0.15.2