Spaces:
Configuration error
Configuration error
seawolf2357
commited on
Commit
•
4e30061
1
Parent(s):
9ca8e1f
Upload 3 files
Browse files- Dockerfile +43 -0
- app (14).py +181 -0
- gitattributes (1) +35 -0
Dockerfile
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
|
2 |
+
|
3 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
4 |
+
|
5 |
+
# Set the MKL_THREADING_LAYER environment variable to GNU
|
6 |
+
ENV MKL_THREADING_LAYER=GNU
|
7 |
+
|
8 |
+
RUN apt-get update && apt-get install -y git wget libgl1-mesa-glx libglib2.0-0 ffmpeg libx264-dev build-essential cmake
|
9 |
+
|
10 |
+
RUN useradd -m -u 1000 user
|
11 |
+
|
12 |
+
USER user
|
13 |
+
|
14 |
+
ENV HOME=/home/user \
|
15 |
+
PATH=/home/user/.local/bin:$PATH \
|
16 |
+
PYTHONPATH=$HOME/app \
|
17 |
+
PYTHONUNBUFFERED=1 \
|
18 |
+
GRADIO_ALLOW_FLAGGING=never \
|
19 |
+
GRADIO_NUM_PORTS=1 \
|
20 |
+
GRADIO_SERVER_NAME=0.0.0.0 \
|
21 |
+
GRADIO_THEME=huggingface \
|
22 |
+
GRADIO_SHARE=False \
|
23 |
+
SYSTEM=spaces
|
24 |
+
|
25 |
+
# Set the working directory to the user's home directory
|
26 |
+
WORKDIR $HOME/app
|
27 |
+
|
28 |
+
RUN git clone -b dev https://github.com/fffiloni/dreamtalk $HOME/app
|
29 |
+
|
30 |
+
RUN wget https://huggingface.co/camenduru/dreamtalk/resolve/main/damo/dreamtalk/checkpoints/denoising_network.pth -O $HOME/app/checkpoints/denoising_network.pth
|
31 |
+
RUN wget https://huggingface.co/camenduru/dreamtalk/resolve/main/damo/dreamtalk/checkpoints/renderer.pt -O $HOME/app/checkpoints/renderer.pt
|
32 |
+
|
33 |
+
# Install dependencies
|
34 |
+
RUN pip install --no-cache-dir urllib3==1.26.6 transformers==4.28.1 dlib yacs scipy scikit-image scikit-learn PyYAML Pillow numpy opencv-python imageio ffmpeg-python av moviepy gradio
|
35 |
+
|
36 |
+
COPY app.py .
|
37 |
+
|
38 |
+
# Set the environment variable to specify the GPU device
|
39 |
+
ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
|
40 |
+
ENV CUDA_VISIBLE_DEVICES=0
|
41 |
+
|
42 |
+
# Run your app.py script
|
43 |
+
CMD ["python", "app.py"]
|
app (14).py
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import subprocess
|
3 |
+
from moviepy.editor import VideoFileClip
|
4 |
+
import datetime
|
5 |
+
|
6 |
+
def convert_to_mp4_with_aac(input_path, output_path):
|
7 |
+
# Load the video
|
8 |
+
video = VideoFileClip(input_path)
|
9 |
+
|
10 |
+
# Set the output format to mp4 with AAC codec
|
11 |
+
video.write_videofile(output_path, codec="libx264", audio_codec="aac")
|
12 |
+
|
13 |
+
return output_path
|
14 |
+
|
15 |
+
|
16 |
+
# Function to check if the audio file path exists in the list
|
17 |
+
def check_file_exists(file_path, audio_list):
|
18 |
+
return file_path in audio_list
|
19 |
+
|
20 |
+
def load_audio(audio_listed):
|
21 |
+
if audio_listed is None:
|
22 |
+
return None
|
23 |
+
else:
|
24 |
+
return f"data/audio/{audio_listed}"
|
25 |
+
|
26 |
+
def execute_command(command: str) -> None:
|
27 |
+
subprocess.run(command, check=True)
|
28 |
+
|
29 |
+
def infer(audio_input, image_path, emotional_style):
|
30 |
+
# Get the current timestamp
|
31 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
32 |
+
|
33 |
+
output_name = f"lipsynced_result_{timestamp}"
|
34 |
+
|
35 |
+
command = [
|
36 |
+
f"python",
|
37 |
+
f"inference_for_demo_video.py",
|
38 |
+
f"--wav_path={audio_input}",
|
39 |
+
f"--style_clip_path=data/style_clip/3DMM/{emotional_style}",
|
40 |
+
f"--pose_path=data/pose/RichardShelby_front_neutral_level1_001.mat",
|
41 |
+
f"--image_path={image_path}",
|
42 |
+
f"--cfg_scale=1.0",
|
43 |
+
f"--max_gen_len=30",
|
44 |
+
f"--output_name={output_name}"
|
45 |
+
]
|
46 |
+
|
47 |
+
execute_command(command)
|
48 |
+
|
49 |
+
# Convert video to compatible codecs
|
50 |
+
input_file = f"output_video/{output_name}.mp4"
|
51 |
+
output_file = f"{output_name}.mp4"
|
52 |
+
|
53 |
+
result = convert_to_mp4_with_aac(input_file, output_file)
|
54 |
+
|
55 |
+
return result
|
56 |
+
|
57 |
+
css="""
|
58 |
+
#col-container{
|
59 |
+
margin: 0 auto;
|
60 |
+
max-width: 940px;
|
61 |
+
}
|
62 |
+
#project-links{
|
63 |
+
margin: 0 0 12px !important;
|
64 |
+
column-gap: 8px;
|
65 |
+
display: flex;
|
66 |
+
justify-content: center;
|
67 |
+
flex-wrap: nowrap;
|
68 |
+
flex-direction: row;
|
69 |
+
align-items: center;
|
70 |
+
}
|
71 |
+
#run-btn{
|
72 |
+
border: var(--button-border-width) solid var(--button-primary-border-color);
|
73 |
+
background: var(--button-primary-background-fill);
|
74 |
+
color: var(--button-primary-text-color);
|
75 |
+
}
|
76 |
+
#run-btn:hover{
|
77 |
+
border-color: var(--button-primary-border-color-hover);
|
78 |
+
background: var(--button-primary-background-fill-hover);
|
79 |
+
color: var(--button-primary-text-color-hover);
|
80 |
+
}
|
81 |
+
"""
|
82 |
+
with gr.Blocks(css=css) as demo:
|
83 |
+
with gr.Column(elem_id="col-container"):
|
84 |
+
gr.HTML("""
|
85 |
+
<h2 style="text-align: center;">DreamTalk</h2>
|
86 |
+
<p style="text-align: center;">When Expressive Talking Head Generation Meets Diffusion Probabilistic Models</p>
|
87 |
+
<p style="margin:12px auto;display: flex;justify-content: center;">
|
88 |
+
<a href="https://huggingface.co/spaces/fffiloni/dreamtalk?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" alt="Duplicate this Space"></a>
|
89 |
+
</p>
|
90 |
+
|
91 |
+
""")
|
92 |
+
with gr.Row():
|
93 |
+
with gr.Column():
|
94 |
+
image_path = gr.Image(label="Image", type="filepath", sources=["upload"])
|
95 |
+
audio_input = gr.Audio(label="Audio input", type="filepath", sources=["upload"], value="data/audio/acknowledgement_english.m4a")
|
96 |
+
with gr.Row():
|
97 |
+
audio_list = gr.Dropdown(
|
98 |
+
label="Choose an audio (optional)",
|
99 |
+
choices=[
|
100 |
+
"German1.wav", "German2.wav", "German3.wav", "German4.wav",
|
101 |
+
"acknowledgement_chinese.m4a", "acknowledgement_english.m4a",
|
102 |
+
"chinese1_haierlizhi.wav", "chinese2_guanyu.wav",
|
103 |
+
"french1.wav", "french2.wav", "french3.wav",
|
104 |
+
"italian1.wav", "italian2.wav", "italian3.wav",
|
105 |
+
"japan1.wav", "japan2.wav", "japan3.wav",
|
106 |
+
"korean1.wav", "korean2.wav", "korean3.wav",
|
107 |
+
"noisy_audio_cafeter_snr_0.wav", "noisy_audio_meeting_snr_0.wav", "noisy_audio_meeting_snr_10.wav", "noisy_audio_meeting_snr_20.wav", "noisy_audio_narrative.wav", "noisy_audio_office_snr_0.wav", "out_of_domain_narrative.wav",
|
108 |
+
"spanish1.wav", "spanish2.wav", "spanish3.wav"
|
109 |
+
],
|
110 |
+
value = "acknowledgement_english.m4a"
|
111 |
+
)
|
112 |
+
audio_list.change(
|
113 |
+
fn = load_audio,
|
114 |
+
inputs = [audio_list],
|
115 |
+
outputs = [audio_input]
|
116 |
+
)
|
117 |
+
emotional_style = gr.Dropdown(
|
118 |
+
label = "emotional style",
|
119 |
+
choices = [
|
120 |
+
"M030_front_angry_level3_001.mat",
|
121 |
+
"M030_front_contempt_level3_001.mat",
|
122 |
+
"M030_front_disgusted_level3_001.mat",
|
123 |
+
"M030_front_fear_level3_001.mat",
|
124 |
+
"M030_front_happy_level3_001.mat",
|
125 |
+
"M030_front_neutral_level1_001.mat",
|
126 |
+
"M030_front_sad_level3_001.mat",
|
127 |
+
"M030_front_surprised_level3_001.mat",
|
128 |
+
"W009_front_angry_level3_001.mat",
|
129 |
+
"W009_front_contempt_level3_001.mat",
|
130 |
+
"W009_front_disgusted_level3_001.mat",
|
131 |
+
"W009_front_fear_level3_001.mat",
|
132 |
+
"W009_front_happy_level3_001.mat",
|
133 |
+
"W009_front_neutral_level1_001.mat",
|
134 |
+
"W009_front_sad_level3_001.mat",
|
135 |
+
"W009_front_surprised_level3_001.mat",
|
136 |
+
"W011_front_angry_level3_001.mat",
|
137 |
+
"W011_front_contempt_level3_001.mat",
|
138 |
+
"W011_front_disgusted_level3_001.mat",
|
139 |
+
"W011_front_fear_level3_001.mat",
|
140 |
+
"W011_front_happy_level3_001.mat",
|
141 |
+
"W011_front_neutral_level1_001.mat",
|
142 |
+
"W011_front_sad_level3_001.mat",
|
143 |
+
"W011_front_surprised_level3_001.mat"
|
144 |
+
],
|
145 |
+
value = "M030_front_neutral_level1_001.mat"
|
146 |
+
)
|
147 |
+
gr.Examples(
|
148 |
+
examples = [
|
149 |
+
"data/src_img/uncropped/face3.png",
|
150 |
+
"data/src_img/uncropped/male_face.png",
|
151 |
+
"data/src_img/uncropped/uncut_src_img.jpg",
|
152 |
+
"data/src_img/cropped/chpa5.png",
|
153 |
+
"data/src_img/cropped/cut_img.png",
|
154 |
+
"data/src_img/cropped/f30.png",
|
155 |
+
"data/src_img/cropped/menglu2.png",
|
156 |
+
"data/src_img/cropped/nscu2.png",
|
157 |
+
"data/src_img/cropped/zp1.png",
|
158 |
+
"data/src_img/cropped/zt12.png"
|
159 |
+
],
|
160 |
+
inputs=[image_path],
|
161 |
+
examples_per_page=5
|
162 |
+
)
|
163 |
+
with gr.Row():
|
164 |
+
gr.ClearButton([audio_input, image_path, audio_list])
|
165 |
+
run_btn = gr.Button("Run", elem_id="run-btn")
|
166 |
+
with gr.Column():
|
167 |
+
output_video = gr.Video(format="mp4")
|
168 |
+
gr.HTML("""
|
169 |
+
<p id="project-links" align="center">
|
170 |
+
<a href='https://dreamtalk-project.github.io/'><img src='https://img.shields.io/badge/Project-Page-Green'></a> <a href='https://arxiv.org/abs/2312.09767'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a> <a href='https://youtu.be/VF4vlE6ZqWQ'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
|
171 |
+
</p>
|
172 |
+
<img src="https://github.com/ali-vilab/dreamtalk/raw/main/media/teaser.gif" style="margin: 0 auto;border-radius: 10px;" />
|
173 |
+
""")
|
174 |
+
|
175 |
+
run_btn.click(
|
176 |
+
fn = infer,
|
177 |
+
inputs = [audio_input, image_path, emotional_style],
|
178 |
+
outputs = [output_video]
|
179 |
+
)
|
180 |
+
|
181 |
+
demo.queue().launch()
|
gitattributes (1)
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|