nightfury commited on
Commit
7dd8b48
1 Parent(s): dbd931b

Upload app_1.py

Browse files
Files changed (1) hide show
  1. app_1.py +220 -0
app_1.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys
2
+ import tempfile
3
+ import gradio as gr
4
+ from src.gradio_demo import LipSyncer
5
+ # from src.utils.text2speech import TTSTalker
6
+ from huggingface_hub import snapshot_download
7
+
8
+ def get_source_image(image):
9
+ return image
10
+
11
+ try:
12
+ import webui # in webui
13
+ in_webui = True
14
+ except:
15
+ in_webui = False
16
+
17
+
18
+ def toggle_audio_file(choice):
19
+ if choice == False:
20
+ return gr.update(visible=True), gr.update(visible=False)
21
+ else:
22
+ return gr.update(visible=False), gr.update(visible=True)
23
+
24
+ def ref_video_fn(path_of_ref_video):
25
+ if path_of_ref_video is not None:
26
+ return gr.update(value=True)
27
+ else:
28
+ return gr.update(value=False)
29
+
30
+ #def download_model():
31
+ # REPO_ID = 'vinthony/LipSyncer-V002rc'
32
+ # snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
33
+
34
+ def lipsyncer_demo():
35
+
36
+ #download_model()
37
+
38
+ lip_syncer = LipSyncer(lazy_load=True)
39
+ # tts_talker = TTSTalker()
40
+
41
+ with gr.Blocks(analytics_enabled=False) as lipsyncer_interface:
42
+ gr.Markdown("<div align='center'> <h2> LipSyncer: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
43
+ <a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </div>")
44
+
45
+
46
+ gr.Markdown("""
47
+ <b></b>
48
+ """)
49
+
50
+ with gr.Row().style(equal_height=False):
51
+ with gr.Column(variant='panel'):
52
+ with gr.Tabs(elem_id="lipsyncer_source_image"):
53
+ with gr.TabItem('Source image'):
54
+ with gr.Row():
55
+ source_image = gr.Image(label="Source image", source="upload", type="filepath", elem_id="img2img_image").style(width=512)
56
+
57
+
58
+ with gr.Tabs(elem_id="lipsyncer_driven_audio"):
59
+ with gr.TabItem('Driving Methods'):
60
+ gr.Markdown("Possible driving combinations: <br> 1. Audio only 2. Audio/IDLE Mode + Ref Video(pose, blink, pose+blink) 3. IDLE Mode only 4. Ref Video only (all) ")
61
+
62
+ with gr.Row():
63
+ driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
64
+ driven_audio_no = gr.Audio(label="Use IDLE mode, no audio is required", source="upload", type="filepath", visible=False)
65
+
66
+ with gr.Column():
67
+ use_idle_mode = gr.Checkbox(label="Use Idle Animation")
68
+ length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.")
69
+ use_idle_mode.change(toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no]) # todo
70
+
71
+ with gr.Row():
72
+ ref_video = gr.Video(label="Reference Video", source="upload", type="filepath", elem_id="vidref").style(width=512)
73
+
74
+ with gr.Column():
75
+ use_ref_video = gr.Checkbox(label="Use Reference Video")
76
+ ref_info = gr.Radio(['pose', 'blink','pose+blink', 'all'], value='pose', label='Reference Video',info="How to borrow from reference Video?((fully transfer, aka, video driving mode))")
77
+
78
+ ref_video.change(ref_video_fn, inputs=ref_video, outputs=[use_ref_video]) # todo
79
+
80
+
81
+ with gr.Column(variant='panel'):
82
+ with gr.Tabs(elem_id="lipsyncer_checkbox"):
83
+ with gr.TabItem('Settings'):
84
+ gr.Markdown("")
85
+ with gr.Column(variant='panel'):
86
+ # width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
87
+ # height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
88
+ with gr.Row():
89
+ pose_style = gr.Slider(minimum=0, maximum=45, step=1, label="Pose style", value=0) #
90
+ exp_weight = gr.Slider(minimum=0, maximum=3, step=0.1, label="expression scale", value=1) #
91
+ blink_every = gr.Checkbox(label="use eye blink", value=True)
92
+
93
+ with gr.Row():
94
+ size_of_image = gr.Radio([256, 512], value=256, label='face model resolution', info="use 256/512 model?") #
95
+ preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='preprocess', info="How to handle input image?")
96
+
97
+ with gr.Row():
98
+ is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion, works with preprocess `full`)")
99
+ facerender = gr.Radio(['facevid2vid','pirender'], value='facevid2vid', label='facerender', info="which face render?")
100
+
101
+ with gr.Row():
102
+ batch_size = gr.Slider(label="batch size in generation", step=1, maximum=10, value=1)
103
+ enhancer = gr.Checkbox(label="GFPGAN as Face enhancer")
104
+
105
+ submit = gr.Button('Generate', elem_id="lipsyncer_generate", variant='primary')
106
+
107
+ with gr.Tabs(elem_id="lipsyncer_genearted"):
108
+ gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
109
+
110
+
111
+
112
+ submit.click(
113
+ fn=lip_syncer.test,
114
+ inputs=[source_image,
115
+ driven_audio,
116
+ preprocess_type,
117
+ is_still_mode,
118
+ enhancer,
119
+ batch_size,
120
+ size_of_image,
121
+ pose_style,
122
+ facerender,
123
+ exp_weight,
124
+ use_ref_video,
125
+ ref_video,
126
+ ref_info,
127
+ use_idle_mode,
128
+ length_of_audio,
129
+ blink_every
130
+ ],
131
+ outputs=[gen_video]
132
+ )
133
+
134
+ with gr.Row():
135
+ examples = [
136
+ [
137
+ 'examples/source_image/full_body_1.png',
138
+ 'examples/driven_audio/bus.wav',
139
+ 'crop',
140
+ True,
141
+ False
142
+ ],
143
+ [
144
+ 'examples/source_image/full_body_2.png',
145
+ 'examples/driven_audio/japanese.wav',
146
+ 'crop',
147
+ False,
148
+ False
149
+ ],
150
+ [
151
+ 'examples/source_image/full3.png',
152
+ 'examples/driven_audio/deyu.wav',
153
+ 'crop',
154
+ False,
155
+ True
156
+ ],
157
+ [
158
+ 'examples/source_image/full4.jpeg',
159
+ 'examples/driven_audio/eluosi.wav',
160
+ 'full',
161
+ False,
162
+ True
163
+ ],
164
+ [
165
+ 'examples/source_image/full4.jpeg',
166
+ 'examples/driven_audio/imagine.wav',
167
+ 'full',
168
+ True,
169
+ True
170
+ ],
171
+ [
172
+ 'examples/source_image/full_body_1.png',
173
+ 'examples/driven_audio/bus.wav',
174
+ 'full',
175
+ True,
176
+ False
177
+ ],
178
+ [
179
+ 'examples/source_image/art_13.png',
180
+ 'examples/driven_audio/fayu.wav',
181
+ 'resize',
182
+ True,
183
+ False
184
+ ],
185
+ [
186
+ 'examples/source_image/art_5.png',
187
+ 'examples/driven_audio/news.wav',
188
+ 'resize',
189
+ False,
190
+ False
191
+ ],
192
+ [
193
+ 'examples/source_image/art_5.png',
194
+ 'examples/driven_audio/RD_Radio31_000.wav',
195
+ 'resize',
196
+ True,
197
+ True
198
+ ],
199
+ ]
200
+ gr.Examples(examples=examples,
201
+ inputs=[
202
+ source_image,
203
+ driven_audio,
204
+ preprocess_type,
205
+ is_still_mode,
206
+ enhancer],
207
+ outputs=[gen_video],
208
+ fn=lip_syncer.test,
209
+ cache_examples=os.getenv('SYSTEM') == 'spaces') #
210
+
211
+ return lipsyncer_interface
212
+
213
+
214
+ if __name__ == "__main__":
215
+
216
+ demo = lipsyncer_demo()
217
+ demo.queue(max_size=10)
218
+ demo.launch(debug=True)
219
+
220
+