seawolf2357 commited on
Commit
4e30061
1 Parent(s): 9ca8e1f

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +43 -0
  2. app (14).py +181 -0
  3. gitattributes (1) +35 -0
Dockerfile ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+
5
+ # Set the MKL_THREADING_LAYER environment variable to GNU
6
+ ENV MKL_THREADING_LAYER=GNU
7
+
8
+ RUN apt-get update && apt-get install -y git wget libgl1-mesa-glx libglib2.0-0 ffmpeg libx264-dev build-essential cmake
9
+
10
+ RUN useradd -m -u 1000 user
11
+
12
+ USER user
13
+
14
+ ENV HOME=/home/user \
15
+ PATH=/home/user/.local/bin:$PATH \
16
+ PYTHONPATH=$HOME/app \
17
+ PYTHONUNBUFFERED=1 \
18
+ GRADIO_ALLOW_FLAGGING=never \
19
+ GRADIO_NUM_PORTS=1 \
20
+ GRADIO_SERVER_NAME=0.0.0.0 \
21
+ GRADIO_THEME=huggingface \
22
+ GRADIO_SHARE=False \
23
+ SYSTEM=spaces
24
+
25
+ # Set the working directory to the user's home directory
26
+ WORKDIR $HOME/app
27
+
28
+ RUN git clone -b dev https://github.com/fffiloni/dreamtalk $HOME/app
29
+
30
+ RUN wget https://huggingface.co/camenduru/dreamtalk/resolve/main/damo/dreamtalk/checkpoints/denoising_network.pth -O $HOME/app/checkpoints/denoising_network.pth
31
+ RUN wget https://huggingface.co/camenduru/dreamtalk/resolve/main/damo/dreamtalk/checkpoints/renderer.pt -O $HOME/app/checkpoints/renderer.pt
32
+
33
+ # Install dependencies
34
+ RUN pip install --no-cache-dir urllib3==1.26.6 transformers==4.28.1 dlib yacs scipy scikit-image scikit-learn PyYAML Pillow numpy opencv-python imageio ffmpeg-python av moviepy gradio
35
+
36
+ COPY app.py .
37
+
38
+ # Set the environment variable to specify the GPU device
39
+ ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
40
+ ENV CUDA_VISIBLE_DEVICES=0
41
+
42
+ # Run your app.py script
43
+ CMD ["python", "app.py"]
app (14).py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ from moviepy.editor import VideoFileClip
4
+ import datetime
5
+
6
+ def convert_to_mp4_with_aac(input_path, output_path):
7
+ # Load the video
8
+ video = VideoFileClip(input_path)
9
+
10
+ # Set the output format to mp4 with AAC codec
11
+ video.write_videofile(output_path, codec="libx264", audio_codec="aac")
12
+
13
+ return output_path
14
+
15
+
16
+ # Function to check if the audio file path exists in the list
17
+ def check_file_exists(file_path, audio_list):
18
+ return file_path in audio_list
19
+
20
+ def load_audio(audio_listed):
21
+ if audio_listed is None:
22
+ return None
23
+ else:
24
+ return f"data/audio/{audio_listed}"
25
+
26
+ def execute_command(command: str) -> None:
27
+ subprocess.run(command, check=True)
28
+
29
+ def infer(audio_input, image_path, emotional_style):
30
+ # Get the current timestamp
31
+ timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
32
+
33
+ output_name = f"lipsynced_result_{timestamp}"
34
+
35
+ command = [
36
+ f"python",
37
+ f"inference_for_demo_video.py",
38
+ f"--wav_path={audio_input}",
39
+ f"--style_clip_path=data/style_clip/3DMM/{emotional_style}",
40
+ f"--pose_path=data/pose/RichardShelby_front_neutral_level1_001.mat",
41
+ f"--image_path={image_path}",
42
+ f"--cfg_scale=1.0",
43
+ f"--max_gen_len=30",
44
+ f"--output_name={output_name}"
45
+ ]
46
+
47
+ execute_command(command)
48
+
49
+ # Convert video to compatible codecs
50
+ input_file = f"output_video/{output_name}.mp4"
51
+ output_file = f"{output_name}.mp4"
52
+
53
+ result = convert_to_mp4_with_aac(input_file, output_file)
54
+
55
+ return result
56
+
57
+ css="""
58
+ #col-container{
59
+ margin: 0 auto;
60
+ max-width: 940px;
61
+ }
62
+ #project-links{
63
+ margin: 0 0 12px !important;
64
+ column-gap: 8px;
65
+ display: flex;
66
+ justify-content: center;
67
+ flex-wrap: nowrap;
68
+ flex-direction: row;
69
+ align-items: center;
70
+ }
71
+ #run-btn{
72
+ border: var(--button-border-width) solid var(--button-primary-border-color);
73
+ background: var(--button-primary-background-fill);
74
+ color: var(--button-primary-text-color);
75
+ }
76
+ #run-btn:hover{
77
+ border-color: var(--button-primary-border-color-hover);
78
+ background: var(--button-primary-background-fill-hover);
79
+ color: var(--button-primary-text-color-hover);
80
+ }
81
+ """
82
+ with gr.Blocks(css=css) as demo:
83
+ with gr.Column(elem_id="col-container"):
84
+ gr.HTML("""
85
+ <h2 style="text-align: center;">DreamTalk</h2>
86
+ <p style="text-align: center;">When Expressive Talking Head Generation Meets Diffusion Probabilistic Models</p>
87
+ <p style="margin:12px auto;display: flex;justify-content: center;">
88
+ <a href="https://huggingface.co/spaces/fffiloni/dreamtalk?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" alt="Duplicate this Space"></a>
89
+ </p>
90
+
91
+ """)
92
+ with gr.Row():
93
+ with gr.Column():
94
+ image_path = gr.Image(label="Image", type="filepath", sources=["upload"])
95
+ audio_input = gr.Audio(label="Audio input", type="filepath", sources=["upload"], value="data/audio/acknowledgement_english.m4a")
96
+ with gr.Row():
97
+ audio_list = gr.Dropdown(
98
+ label="Choose an audio (optional)",
99
+ choices=[
100
+ "German1.wav", "German2.wav", "German3.wav", "German4.wav",
101
+ "acknowledgement_chinese.m4a", "acknowledgement_english.m4a",
102
+ "chinese1_haierlizhi.wav", "chinese2_guanyu.wav",
103
+ "french1.wav", "french2.wav", "french3.wav",
104
+ "italian1.wav", "italian2.wav", "italian3.wav",
105
+ "japan1.wav", "japan2.wav", "japan3.wav",
106
+ "korean1.wav", "korean2.wav", "korean3.wav",
107
+ "noisy_audio_cafeter_snr_0.wav", "noisy_audio_meeting_snr_0.wav", "noisy_audio_meeting_snr_10.wav", "noisy_audio_meeting_snr_20.wav", "noisy_audio_narrative.wav", "noisy_audio_office_snr_0.wav", "out_of_domain_narrative.wav",
108
+ "spanish1.wav", "spanish2.wav", "spanish3.wav"
109
+ ],
110
+ value = "acknowledgement_english.m4a"
111
+ )
112
+ audio_list.change(
113
+ fn = load_audio,
114
+ inputs = [audio_list],
115
+ outputs = [audio_input]
116
+ )
117
+ emotional_style = gr.Dropdown(
118
+ label = "emotional style",
119
+ choices = [
120
+ "M030_front_angry_level3_001.mat",
121
+ "M030_front_contempt_level3_001.mat",
122
+ "M030_front_disgusted_level3_001.mat",
123
+ "M030_front_fear_level3_001.mat",
124
+ "M030_front_happy_level3_001.mat",
125
+ "M030_front_neutral_level1_001.mat",
126
+ "M030_front_sad_level3_001.mat",
127
+ "M030_front_surprised_level3_001.mat",
128
+ "W009_front_angry_level3_001.mat",
129
+ "W009_front_contempt_level3_001.mat",
130
+ "W009_front_disgusted_level3_001.mat",
131
+ "W009_front_fear_level3_001.mat",
132
+ "W009_front_happy_level3_001.mat",
133
+ "W009_front_neutral_level1_001.mat",
134
+ "W009_front_sad_level3_001.mat",
135
+ "W009_front_surprised_level3_001.mat",
136
+ "W011_front_angry_level3_001.mat",
137
+ "W011_front_contempt_level3_001.mat",
138
+ "W011_front_disgusted_level3_001.mat",
139
+ "W011_front_fear_level3_001.mat",
140
+ "W011_front_happy_level3_001.mat",
141
+ "W011_front_neutral_level1_001.mat",
142
+ "W011_front_sad_level3_001.mat",
143
+ "W011_front_surprised_level3_001.mat"
144
+ ],
145
+ value = "M030_front_neutral_level1_001.mat"
146
+ )
147
+ gr.Examples(
148
+ examples = [
149
+ "data/src_img/uncropped/face3.png",
150
+ "data/src_img/uncropped/male_face.png",
151
+ "data/src_img/uncropped/uncut_src_img.jpg",
152
+ "data/src_img/cropped/chpa5.png",
153
+ "data/src_img/cropped/cut_img.png",
154
+ "data/src_img/cropped/f30.png",
155
+ "data/src_img/cropped/menglu2.png",
156
+ "data/src_img/cropped/nscu2.png",
157
+ "data/src_img/cropped/zp1.png",
158
+ "data/src_img/cropped/zt12.png"
159
+ ],
160
+ inputs=[image_path],
161
+ examples_per_page=5
162
+ )
163
+ with gr.Row():
164
+ gr.ClearButton([audio_input, image_path, audio_list])
165
+ run_btn = gr.Button("Run", elem_id="run-btn")
166
+ with gr.Column():
167
+ output_video = gr.Video(format="mp4")
168
+ gr.HTML("""
169
+ <p id="project-links" align="center">
170
+ <a href='https://dreamtalk-project.github.io/'><img src='https://img.shields.io/badge/Project-Page-Green'></a> <a href='https://arxiv.org/abs/2312.09767'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a> <a href='https://youtu.be/VF4vlE6ZqWQ'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
171
+ </p>
172
+ <img src="https://github.com/ali-vilab/dreamtalk/raw/main/media/teaser.gif" style="margin: 0 auto;border-radius: 10px;" />
173
+ """)
174
+
175
+ run_btn.click(
176
+ fn = infer,
177
+ inputs = [audio_input, image_path, emotional_style],
178
+ outputs = [output_video]
179
+ )
180
+
181
+ demo.queue().launch()
gitattributes (1) ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text