PascalLiu commited on
Commit
f2e3b88
1 Parent(s): 53c99f8

finish image and video

Browse files
Files changed (1) hide show
  1. app.py +53 -15
app.py CHANGED
@@ -132,7 +132,7 @@ def drive_im(source_image, driving_image, adapt_scale):
132
  return img_as_ubyte(prediction[0])
133
 
134
 
135
- def drive_vi(source_image, driving_video, mode, find_best_frame, relative, adapt_scale):
136
  reader = imageio.get_reader(driving_video)
137
  fps = reader.get_meta_data()['fps']
138
  driving_video = []
@@ -150,8 +150,9 @@ def drive_vi(source_image, driving_video, mode, find_best_frame, relative, adapt
150
  source_image = resize(source_image, (256, 256))[..., :3]
151
  driving_video = [resize(frame, (256, 256))[..., :3] for frame in driving_video]
152
 
153
- if find_best_frame:
154
- i = find_best_frame_func(source_image, driving_video, cpu=cpu)
 
155
  print("Best frame: " + str(i))
156
  driving_forward = driving_video[i:]
157
  driving_backward = driving_video[:(i + 1)][::-1]
@@ -165,7 +166,7 @@ def drive_vi(source_image, driving_video, mode, find_best_frame, relative, adapt
165
  adapt_movement_scale=adapt_scale, cpu=cpu)
166
  result_video_path = "result_video.mp4"
167
  imageio.mimsave(result_video_path, [img_as_ubyte(frame) for frame in predictions], fps=fps)
168
- return result_video_path
169
 
170
 
171
  config = "config/vox-256.yaml"
@@ -174,7 +175,7 @@ cpu = True # decided by the deploying environment
174
 
175
  description = "We propose a Face Neural Volume Rendering (FNeVR) network for more realistic face animation, by taking the merits of 2D motion warping on facial expression transformation and 3D volume rendering on high-quality image synthesis in a unified framework.<br>[Paper](https://arxiv.org/abs/2209.10340) and [Code](https://github.com/zengbohan0217/FNeVR)"
176
  im_description = "We can animate a face portrait by a single image in this tab.<br>Please input the origin face and the driving face which provides pose and expression information, then we can obtain the virtual generated face.<br>We can select \"adaptive scale\" parameter for better optic flow estimation using adaptive movement scale based on convex hull of keypoints."
177
- vi_description = "We can animate a face portrait by a video in this tab.<br>Please input the origin face and the driving video which provides pose and expression information, then we can obtain the virtual generated video.<br>Please select inference mode (reenactment for different identities and reconstruction for the same identities).<br>We can select \"find best frame\" parameter to generate video from the frame that is the most alligned with source image, select \"relative motion\" paramter to use relative keypoint coordinates for preserving global object geometry, and select \"adaptive scale\" parameter for better optic flow estimation using adaptive movement scale based on convex hull of keypoints."
178
  acknowledgements = "This work was supported by “the Fundamental Research Funds for the Central Universities”, and the National Natural Science Foundation of China under Grant 62076016, Beijing Natural Science Foundation-Xiaomi Innovation Joint Fund L223024. Besides, we gratefully acknowledge the support of [MindSpore](https://www.mindspore.cn), CANN (Compute Architecture for Neural Networks) and Ascend AI processor used for this research.<br>Our FNeVR implementation is inspired by [FOMM](https://github.com/AliaksandrSiarohin/first-order-model) and [DECA](https://github.com/YadiraF/DECA). We appreciate the authors of these papers for making their codes available to the public."
179
 
180
  generator, kp_detector = load_checkpoints(config_path=config, checkpoint_path=checkpoint, cpu=cpu)
@@ -202,14 +203,28 @@ with gr.Blocks(title="Demostration of FNeVR") as demo:
202
  gr.Markdown("#### Parameter")
203
  inp3 = gr.Checkbox(value=True, label="adaptive scale")
204
 
205
- btn = gr.Button(value="Animate")
206
  with gr.Column():
207
  gr.Markdown("#### Output")
208
  outp = gr.Image(label="Generated face")
209
 
 
 
 
 
 
 
210
  gr.Examples([["sup-mat/driving.png", "sup-mat/source.png"]], [inp2, inp1])
211
 
212
- btn.click(fn=drive_im, inputs=[inp1, inp2, inp3], outputs=outp)
 
 
 
 
 
 
 
 
213
  with gr.Tab("Driving by video"):
214
  gr.Markdown(vi_description)
215
 
@@ -220,24 +235,47 @@ with gr.Blocks(title="Demostration of FNeVR") as demo:
220
  inp1 = gr.Image(label="Origin face")
221
 
222
  gr.Markdown("#### Parameters")
223
- inp3 = gr.Radio(choices=['reenactment', 'reconstruction'], value="reenactment", label="mode (if \"reconstruction\" selected, origin face is the first frame of driving video)")
224
- inp4 = gr.Checkbox(value=True, label="find best frame (more time consumed)")
225
- inp5 = gr.Checkbox(value=True, label="relative motion")
226
- inp6 = gr.Checkbox(value=True, label="adaptive scale")
 
 
 
 
 
 
227
 
228
- btn = gr.Button(value="Animate")
229
  with gr.Column():
230
  gr.Markdown("#### Output")
231
- outp = gr.Video(label="Generated video")
 
 
 
 
 
 
 
 
 
 
 
232
 
233
- gr.Examples([["sup-mat/driving.mp4", "sup-mat/source_for_video.png"]], [inp2, inp1])
 
234
 
235
- btn.click(fn=drive_vi, inputs=[inp1, inp2, inp3, inp4, inp5, inp6], outputs=outp)
 
236
 
 
 
 
237
  with gr.Tab("Real time animation"):
238
  gr.Markdown("#### Real time animation")
239
 
240
  gr.Markdown("## Acknowledgements")
241
  gr.Markdown(acknowledgements)
242
 
 
243
  demo.launch()
 
132
  return img_as_ubyte(prediction[0])
133
 
134
 
135
+ def drive_vi(source_image, driving_video, mode, find_best_frame, best_frame, relative, adapt_scale):
136
  reader = imageio.get_reader(driving_video)
137
  fps = reader.get_meta_data()['fps']
138
  driving_video = []
 
150
  source_image = resize(source_image, (256, 256))[..., :3]
151
  driving_video = [resize(frame, (256, 256))[..., :3] for frame in driving_video]
152
 
153
+ i = 0
154
+ if find_best_frame != "specific ref frame" or best_frame > 0:
155
+ i = best_frame if find_best_frame == "specific ref frame" else find_best_frame_func(source_image, driving_video, cpu=cpu)
156
  print("Best frame: " + str(i))
157
  driving_forward = driving_video[i:]
158
  driving_backward = driving_video[:(i + 1)][::-1]
 
166
  adapt_movement_scale=adapt_scale, cpu=cpu)
167
  result_video_path = "result_video.mp4"
168
  imageio.mimsave(result_video_path, [img_as_ubyte(frame) for frame in predictions], fps=fps)
169
+ return result_video_path, i
170
 
171
 
172
  config = "config/vox-256.yaml"
 
175
 
176
  description = "We propose a Face Neural Volume Rendering (FNeVR) network for more realistic face animation, by taking the merits of 2D motion warping on facial expression transformation and 3D volume rendering on high-quality image synthesis in a unified framework.<br>[Paper](https://arxiv.org/abs/2209.10340) and [Code](https://github.com/zengbohan0217/FNeVR)"
177
  im_description = "We can animate a face portrait by a single image in this tab.<br>Please input the origin face and the driving face which provides pose and expression information, then we can obtain the virtual generated face.<br>We can select \"adaptive scale\" parameter for better optic flow estimation using adaptive movement scale based on convex hull of keypoints."
178
+ vi_description = "We can animate a face portrait by a video in this tab.<br>Please input the origin face and the driving video which provides pose and expression information, then we can obtain the virtual generated video.<br>Please select inference mode (reenactment for different identities and reconstruction for the same identities).<br>We can select \"relative motion\" paramter to use relative keypoint coordinates for preserving global object geometry, select \"adaptive scale\" parameter for better optic flow estimation using adaptive movement scale based on convex hull of keypoints, and select \"find best ref frame\" parameter to generate video from the frame that is the most alligned with source image."
179
  acknowledgements = "This work was supported by “the Fundamental Research Funds for the Central Universities”, and the National Natural Science Foundation of China under Grant 62076016, Beijing Natural Science Foundation-Xiaomi Innovation Joint Fund L223024. Besides, we gratefully acknowledge the support of [MindSpore](https://www.mindspore.cn), CANN (Compute Architecture for Neural Networks) and Ascend AI processor used for this research.<br>Our FNeVR implementation is inspired by [FOMM](https://github.com/AliaksandrSiarohin/first-order-model) and [DECA](https://github.com/YadiraF/DECA). We appreciate the authors of these papers for making their codes available to the public."
180
 
181
  generator, kp_detector = load_checkpoints(config_path=config, checkpoint_path=checkpoint, cpu=cpu)
 
203
  gr.Markdown("#### Parameter")
204
  inp3 = gr.Checkbox(value=True, label="adaptive scale")
205
 
206
+ btn1 = gr.Button(value="Animate")
207
  with gr.Column():
208
  gr.Markdown("#### Output")
209
  outp = gr.Image(label="Generated face")
210
 
211
+ with gr.Row():
212
+ with gr.Column():
213
+ btn2 = gr.Button(value="Reset")
214
+ with gr.Column():
215
+ btn3 = gr.Button(value="Cancel")
216
+
217
  gr.Examples([["sup-mat/driving.png", "sup-mat/source.png"]], [inp2, inp1])
218
 
219
+ def reset_output():
220
+ return outp.update(value=None)
221
+
222
+ def reset_all():
223
+ return inp1.update(value=None), inp2.update(value=None), inp3.update(value=True), outp.update(value=None)
224
+
225
+ run = btn1.click(fn=drive_im, inputs=[inp1, inp2, inp3], outputs=outp)
226
+ btn2.click(fn=reset_all, outputs=[inp1, inp2, inp3, outp])
227
+ btn3.click(fn=reset_output, outputs=[outp], cancels=[run])
228
  with gr.Tab("Driving by video"):
229
  gr.Markdown(vi_description)
230
 
 
235
  inp1 = gr.Image(label="Origin face")
236
 
237
  gr.Markdown("#### Parameters")
238
+ inp3 = gr.Radio(choices=["reenactment", "reconstruction"], value="reenactment", label="mode (if \"reconstruction\" selected, origin face is the first frame of driving video)")
239
+ inp6 = gr.Checkbox(value=True, label="relative motion")
240
+ inp7 = gr.Checkbox(value=True, label="adaptive scale")
241
+ inp4 = gr.Radio(choices=["find best ref frame (more time consumed)", "specific ref frame"], value="find best ref frame (more time consumed)", label="set ref frame (used by relative motion and adaptive scale)")
242
+ inp5 = gr.Number(label="specific ref frame (default: 0)", value=0, precision=0, visible=False)
243
+
244
+ def reset_ref(inp4):
245
+ return inp5.update(visible=True) if inp4 == "specific ref frame" else inp5.update(value=0, visible=False)
246
+
247
+ inp4.change(fn=reset_ref, inputs=inp4, outputs=inp5)
248
 
249
+ btn1 = gr.Button(value="Animate")
250
  with gr.Column():
251
  gr.Markdown("#### Output")
252
+ outp1 = gr.Video(label="Generated video")
253
+ outp2 = gr.Number(label="Ref frame", value=0, precision=0)
254
+
255
+ # file = gr.File(value="result_video.mp4", visible=False)
256
+
257
+ with gr.Row():
258
+ with gr.Column():
259
+ btn2 = gr.Button(value="Reset")
260
+ with gr.Column():
261
+ btn3 = gr.Button(value="Cancel")
262
+
263
+ gr.Examples([["sup-mat/driving.mp4", "sup-mat/source_for_video.png", "specific ref frame", 53]], [inp2, inp1, inp4, inp5])
264
 
265
+ def reset_output():
266
+ return outp1.update(value=None), outp2.update(value=0)
267
 
268
+ def reset_all():
269
+ return inp1.update(value=None), inp2.update(value=None), inp3.update(value="reenactment"), inp4.update(value="find best ref frame (more time consumed)"), inp5.update(value=0), inp6.update(value=True), inp7.update(value=True), outp1.update(value=None), outp2.update(value=0)
270
 
271
+ run = btn1.click(fn=drive_vi, inputs=[inp1, inp2, inp3, inp4, inp5, inp6, inp7], outputs=[outp1, outp2])
272
+ btn2.click(fn=reset_all, outputs=[inp1, inp2, inp3, inp4, inp5, inp6, inp7, outp1, outp2])
273
+ btn3.click(fn=reset_output, outputs=[outp1, outp2], cancels=[run])
274
  with gr.Tab("Real time animation"):
275
  gr.Markdown("#### Real time animation")
276
 
277
  gr.Markdown("## Acknowledgements")
278
  gr.Markdown(acknowledgements)
279
 
280
+ demo.queue()
281
  demo.launch()