vinthony commited on
Commit
0ce42bd
1 Parent(s): 6bc96b5
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +5 -1
  2. app.py +53 -6
  3. checkpoints/BFM_Fitting.zip +0 -3
  4. checkpoints/BFM_Fitting/01_MorphableModel.mat +0 -3
  5. checkpoints/BFM_Fitting/01_MorphableModel.mat +1 -0
  6. checkpoints/BFM_Fitting/BFM09_model_info.mat +0 -3
  7. checkpoints/BFM_Fitting/BFM09_model_info.mat +1 -0
  8. checkpoints/BFM_Fitting/BFM_exp_idx.mat +0 -0
  9. checkpoints/BFM_Fitting/BFM_exp_idx.mat +1 -0
  10. checkpoints/BFM_Fitting/BFM_front_idx.mat +0 -0
  11. checkpoints/BFM_Fitting/BFM_front_idx.mat +1 -0
  12. checkpoints/BFM_Fitting/Exp_Pca.bin +0 -3
  13. checkpoints/BFM_Fitting/Exp_Pca.bin +1 -0
  14. checkpoints/BFM_Fitting/facemodel_info.mat +0 -0
  15. checkpoints/BFM_Fitting/facemodel_info.mat +1 -0
  16. checkpoints/BFM_Fitting/select_vertex_id.mat +0 -0
  17. checkpoints/BFM_Fitting/select_vertex_id.mat +1 -0
  18. checkpoints/BFM_Fitting/similarity_Lm3D_all.mat +0 -0
  19. checkpoints/BFM_Fitting/similarity_Lm3D_all.mat +1 -0
  20. checkpoints/BFM_Fitting/std_exp.txt +0 -1
  21. checkpoints/BFM_Fitting/std_exp.txt +1 -0
  22. checkpoints/auido2exp_00300-model.pth +0 -3
  23. checkpoints/auido2exp_00300-model.pth +1 -0
  24. checkpoints/auido2pose_00140-model.pth +0 -3
  25. checkpoints/auido2pose_00140-model.pth +1 -0
  26. checkpoints/epoch_20.pth +0 -3
  27. checkpoints/epoch_20.pth +1 -0
  28. checkpoints/facevid2vid_00189-model.pth.tar +0 -3
  29. checkpoints/facevid2vid_00189-model.pth.tar +1 -0
  30. checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip +0 -3
  31. checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip +1 -0
  32. checkpoints/hub/checkpoints/s3fd-619a316812.pth +0 -3
  33. checkpoints/hub/checkpoints/s3fd-619a316812.pth +1 -0
  34. checkpoints/mapping_00229-model.pth.tar +0 -3
  35. checkpoints/mapping_00229-model.pth.tar +1 -0
  36. checkpoints/shape_predictor_68_face_landmarks.dat +0 -3
  37. checkpoints/shape_predictor_68_face_landmarks.dat +1 -0
  38. checkpoints/wav2lip.pth +0 -3
  39. checkpoints/wav2lip.pth +1 -0
  40. docs/sadtalker_logo.png +0 -0
  41. examples/source_image/full3.png +0 -0
  42. examples/source_image/full4.jpeg +0 -0
  43. inference.py +0 -134
  44. modules/__pycache__/gfpgan_inference.cpython-38.pyc +0 -0
  45. modules/__pycache__/gfpgan_inference.cpython-39.pyc +0 -0
  46. modules/__pycache__/sadtalker_test.cpython-38.pyc +0 -0
  47. modules/__pycache__/sadtalker_test.cpython-39.pyc +0 -0
  48. modules/__pycache__/text2speech.cpython-38.pyc +0 -0
  49. modules/__pycache__/text2speech.cpython-39.pyc +0 -0
  50. modules/gfpgan_inference.py +0 -36
.gitignore CHANGED
@@ -152,4 +152,8 @@ dmypy.json
152
  # Cython debug symbols
153
  cython_debug/
154
 
155
- results/
 
 
 
 
152
  # Cython debug symbols
153
  cython_debug/
154
 
155
+ results/
156
+ checkpoints/
157
+ gradio_cached_examples/
158
+ gfpgan/
159
+ start.sh
app.py CHANGED
@@ -3,15 +3,20 @@ import tempfile
3
  import gradio as gr
4
  from src.gradio_demo import SadTalker
5
  from src.utils.text2speech import TTSTalker
 
6
 
7
  def get_source_image(image):
8
  return image
9
 
10
-
 
 
11
 
12
  def sadtalker_demo():
13
 
14
- sad_talker = SadTalker()
 
 
15
  tts_talker = TTSTalker()
16
 
17
  with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
@@ -42,7 +47,8 @@ def sadtalker_demo():
42
  with gr.Tabs(elem_id="sadtalker_checkbox"):
43
  with gr.TabItem('Settings'):
44
  with gr.Column(variant='panel'):
45
- is_still_mode = gr.Checkbox(label="w/ Still Mode (fewer hand motion, works on full body)")
 
46
  enhancer = gr.Checkbox(label="w/ GFPGAN as Face enhancer")
47
  submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
48
 
@@ -54,42 +60,83 @@ def sadtalker_demo():
54
  [
55
  'examples/source_image/full_body_1.png',
56
  'examples/driven_audio/bus_chinese.wav',
 
57
  True,
58
  False
59
  ],
60
  [
61
  'examples/source_image/full_body_2.png',
62
- 'examples/driven_audio/itosinger1.wav',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  True,
64
  False
65
  ],
66
  [
67
  'examples/source_image/art_13.png',
68
  'examples/driven_audio/fayu.wav',
 
69
  True,
70
  False
71
  ],
72
  [
73
  'examples/source_image/art_5.png',
74
  'examples/driven_audio/chinese_news.wav',
75
- True,
 
76
  False
77
  ],
 
 
 
 
 
 
 
78
  ]
79
  gr.Examples(examples=examples,
80
  inputs=[
81
  source_image,
82
  driven_audio,
 
83
  is_still_mode,
84
  enhancer],
85
  outputs=[gen_video],
86
  fn=sad_talker.test,
87
- cache_examples=os.getenv('SYSTEM') == 'spaces')
88
 
89
  submit.click(
90
  fn=sad_talker.test,
91
  inputs=[source_image,
92
  driven_audio,
 
93
  is_still_mode,
94
  enhancer],
95
  outputs=[gen_video]
3
  import gradio as gr
4
  from src.gradio_demo import SadTalker
5
  from src.utils.text2speech import TTSTalker
6
+ from huggingface_hub import snapshot_download
7
 
8
  def get_source_image(image):
9
  return image
10
 
11
+ def download_model():
12
+ REPO_ID = 'vinthony/SadTalker'
13
+ snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
14
 
15
  def sadtalker_demo():
16
 
17
+ download_model()
18
+
19
+ sad_talker = SadTalker(lazy_load=True)
20
  tts_talker = TTSTalker()
21
 
22
  with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
47
  with gr.Tabs(elem_id="sadtalker_checkbox"):
48
  with gr.TabItem('Settings'):
49
  with gr.Column(variant='panel'):
50
+ preprocess_type = gr.Radio(['crop','resize','full'], value='crop', label='preprocess', info="How to handle input image?")
51
+ is_still_mode = gr.Checkbox(label="w/ Still Mode (fewer hand motion, works with preprocess `full`)")
52
  enhancer = gr.Checkbox(label="w/ GFPGAN as Face enhancer")
53
  submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
54
 
60
  [
61
  'examples/source_image/full_body_1.png',
62
  'examples/driven_audio/bus_chinese.wav',
63
+ 'crop',
64
  True,
65
  False
66
  ],
67
  [
68
  'examples/source_image/full_body_2.png',
69
+ 'examples/driven_audio/japanese.wav',
70
+ 'crop',
71
+ False,
72
+ False
73
+ ],
74
+ [
75
+ 'examples/source_image/full3.png',
76
+ 'examples/driven_audio/deyu.wav',
77
+ 'crop',
78
+ False,
79
+ True
80
+ ],
81
+ [
82
+ 'examples/source_image/full4.jpeg',
83
+ 'examples/driven_audio/eluosi.wav',
84
+ 'full',
85
+ False,
86
+ True
87
+ ],
88
+ [
89
+ 'examples/source_image/full4.jpeg',
90
+ 'examples/driven_audio/imagine.wav',
91
+ 'full',
92
+ True,
93
+ True
94
+ ],
95
+ [
96
+ 'examples/source_image/full_body_1.png',
97
+ 'examples/driven_audio/bus_chinese.wav',
98
+ 'full',
99
  True,
100
  False
101
  ],
102
  [
103
  'examples/source_image/art_13.png',
104
  'examples/driven_audio/fayu.wav',
105
+ 'resize',
106
  True,
107
  False
108
  ],
109
  [
110
  'examples/source_image/art_5.png',
111
  'examples/driven_audio/chinese_news.wav',
112
+ 'resize',
113
+ False,
114
  False
115
  ],
116
+ [
117
+ 'examples/source_image/art_5.png',
118
+ 'examples/driven_audio/RD_Radio31_000.wav',
119
+ 'resize',
120
+ True,
121
+ True
122
+ ],
123
  ]
124
  gr.Examples(examples=examples,
125
  inputs=[
126
  source_image,
127
  driven_audio,
128
+ preprocess_type,
129
  is_still_mode,
130
  enhancer],
131
  outputs=[gen_video],
132
  fn=sad_talker.test,
133
+ cache_examples=True) # os.getenv('SYSTEM') == 'spaces')
134
 
135
  submit.click(
136
  fn=sad_talker.test,
137
  inputs=[source_image,
138
  driven_audio,
139
+ preprocess_type,
140
  is_still_mode,
141
  enhancer],
142
  outputs=[gen_video]
checkpoints/BFM_Fitting.zip DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:785f77f3de288568e76666cd419dcf40646d3f74eae6d4fa3b766c933087a9d8
3
- size 404051745
 
 
 
checkpoints/BFM_Fitting/01_MorphableModel.mat DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:37b1f0742db356a3b1568a8365a06f5b0fe0ab687ac1c3068c803666cbd4d8e2
3
- size 240875364
 
 
 
checkpoints/BFM_Fitting/01_MorphableModel.mat ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/37b1f0742db356a3b1568a8365a06f5b0fe0ab687ac1c3068c803666cbd4d8e2
checkpoints/BFM_Fitting/BFM09_model_info.mat DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:db8d00544f0b0182f1b8430a3bb87662b3ff674eb33c84e6f52dbe2971adb81b
3
- size 127170280
 
 
 
checkpoints/BFM_Fitting/BFM09_model_info.mat ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/db8d00544f0b0182f1b8430a3bb87662b3ff674eb33c84e6f52dbe2971adb81b
checkpoints/BFM_Fitting/BFM_exp_idx.mat DELETED
Binary file (91.9 kB)
checkpoints/BFM_Fitting/BFM_exp_idx.mat ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/1146e4e9c3bef303a497383aa7974c014fe945c7
checkpoints/BFM_Fitting/BFM_front_idx.mat DELETED
Binary file (44.9 kB)
checkpoints/BFM_Fitting/BFM_front_idx.mat ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b9d7b0953dd1dc5b1e28144610485409ac321f9b
checkpoints/BFM_Fitting/Exp_Pca.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7f31380e6cbdaf2aeec698db220bac4f221946e4d551d88c092d47ec49b1726
3
- size 51086404
 
 
 
checkpoints/BFM_Fitting/Exp_Pca.bin ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/e7f31380e6cbdaf2aeec698db220bac4f221946e4d551d88c092d47ec49b1726
checkpoints/BFM_Fitting/facemodel_info.mat DELETED
Binary file (739 kB)
checkpoints/BFM_Fitting/facemodel_info.mat ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/3e516ec7297fa3248098f49ecea10579f4831c0a
checkpoints/BFM_Fitting/select_vertex_id.mat DELETED
Binary file (62.3 kB)
checkpoints/BFM_Fitting/select_vertex_id.mat ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/5b8b220093d93b133acc94ffed159f31a74854cd
checkpoints/BFM_Fitting/similarity_Lm3D_all.mat DELETED
Binary file (994 Bytes)
checkpoints/BFM_Fitting/similarity_Lm3D_all.mat ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/a0e23588302bc71fc899eef53ff06df5f4df4c1d
checkpoints/BFM_Fitting/std_exp.txt DELETED
@@ -1 +0,0 @@
1
- 453980 257264 263068 211890 135873 184721 47055.6 72732 62787.4 106226 56708.5 51439.8 34887.1 44378.7 51813.4 31030.7 23354.9 23128.1 19400 21827.6 22767.7 22057.4 19894.3 16172.8 17142.7 10035.3 14727.5 12972.5 10763.8 8953.93 8682.62 8941.81 6342.3 5205.3 7065.65 6083.35 6678.88 4666.63 5082.89 5134.76 4908.16 3964.93 3739.95 3180.09 2470.45 1866.62 1624.71 2423.74 1668.53 1471.65 1194.52 782.102 815.044 835.782 834.937 744.496 575.146 633.76 705.685 753.409 620.306 673.326 766.189 619.866 559.93 357.264 396.472 556.849 455.048 460.592 400.735 326.702 279.428 291.535 326.584 305.664 287.816 283.642 276.19
 
checkpoints/BFM_Fitting/std_exp.txt ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/767b8de4ea1ca78b6f22b98ff2dee4fa345500bb
checkpoints/auido2exp_00300-model.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7608f0e6b477e50e03ca569ac5b04a841b9217f89d502862fc78fda4e46dec4
3
- size 34278319
 
 
 
checkpoints/auido2exp_00300-model.pth ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b7608f0e6b477e50e03ca569ac5b04a841b9217f89d502862fc78fda4e46dec4
checkpoints/auido2pose_00140-model.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fba6701852dc57efbed25b1e4276e4ff752941860d69fc4429f08a02326ebce
3
- size 95916155
 
 
 
checkpoints/auido2pose_00140-model.pth ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/4fba6701852dc57efbed25b1e4276e4ff752941860d69fc4429f08a02326ebce
checkpoints/epoch_20.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d17a6b23457b521801baae583cb6a58f7238fe6721fc3d65d76407460e9149b
3
- size 288860037
 
 
 
checkpoints/epoch_20.pth ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/6d17a6b23457b521801baae583cb6a58f7238fe6721fc3d65d76407460e9149b
checkpoints/facevid2vid_00189-model.pth.tar DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbad01d46f0510276dc4521322dde6824a873a4222cd0740c85762e7067ea71d
3
- size 2112619148
 
 
 
checkpoints/facevid2vid_00189-model.pth.tar ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/fbad01d46f0510276dc4521322dde6824a873a4222cd0740c85762e7067ea71d
checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd938726adb1f15f361263cce2db9cb820c42585fa8796ec72ce19107f369a46
3
- size 96316515
 
 
 
checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/cd938726adb1f15f361263cce2db9cb820c42585fa8796ec72ce19107f369a46
checkpoints/hub/checkpoints/s3fd-619a316812.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:619a31681264d3f7f7fc7a16a42cbbe8b23f31a256f75a366e5a1bcd59b33543
3
- size 89843225
 
 
 
checkpoints/hub/checkpoints/s3fd-619a316812.pth ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/619a31681264d3f7f7fc7a16a42cbbe8b23f31a256f75a366e5a1bcd59b33543
checkpoints/mapping_00229-model.pth.tar DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:62a1e06006cc963220f6477438518ed86e9788226c62ae382ddc42fbcefb83f1
3
- size 155521183
 
 
 
checkpoints/mapping_00229-model.pth.tar ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/62a1e06006cc963220f6477438518ed86e9788226c62ae382ddc42fbcefb83f1
checkpoints/shape_predictor_68_face_landmarks.dat DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
3
- size 99693937
 
 
 
checkpoints/shape_predictor_68_face_landmarks.dat ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
checkpoints/wav2lip.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b78b681b68ad9fe6c6fb1debc6ff43ad05834a8af8a62ffc4167b7b34ef63c37
3
- size 435807851
 
 
 
checkpoints/wav2lip.pth ADDED
@@ -0,0 +1 @@
 
1
+ ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b78b681b68ad9fe6c6fb1debc6ff43ad05834a8af8a62ffc4167b7b34ef63c37
docs/sadtalker_logo.png ADDED
examples/source_image/full3.png ADDED
examples/source_image/full4.jpeg ADDED
inference.py DELETED
@@ -1,134 +0,0 @@
1
- import torch
2
- from time import strftime
3
- import os, sys, time
4
- from argparse import ArgumentParser
5
-
6
- from src.utils.preprocess import CropAndExtract
7
- from src.test_audio2coeff import Audio2Coeff
8
- from src.facerender.animate import AnimateFromCoeff
9
- from src.generate_batch import get_data
10
- from src.generate_facerender_batch import get_facerender_data
11
-
12
- def main(args):
13
- #torch.backends.cudnn.enabled = False
14
-
15
- pic_path = args.source_image
16
- audio_path = args.driven_audio
17
- save_dir = os.path.join(args.result_dir, strftime("%Y_%m_%d_%H.%M.%S"))
18
- os.makedirs(save_dir, exist_ok=True)
19
- pose_style = args.pose_style
20
- device = args.device
21
- batch_size = args.batch_size
22
- camera_yaw_list = args.camera_yaw
23
- camera_pitch_list = args.camera_pitch
24
- camera_roll_list = args.camera_roll
25
-
26
- current_code_path = sys.argv[0]
27
- current_root_path = os.path.split(current_code_path)[0]
28
-
29
- os.environ['TORCH_HOME']=os.path.join(current_root_path, args.checkpoint_dir)
30
-
31
- path_of_lm_croper = os.path.join(current_root_path, args.checkpoint_dir, 'shape_predictor_68_face_landmarks.dat')
32
- path_of_net_recon_model = os.path.join(current_root_path, args.checkpoint_dir, 'epoch_20.pth')
33
- dir_of_BFM_fitting = os.path.join(current_root_path, args.checkpoint_dir, 'BFM_Fitting')
34
- wav2lip_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'wav2lip.pth')
35
-
36
- audio2pose_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'auido2pose_00140-model.pth')
37
- audio2pose_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2pose.yaml')
38
-
39
- audio2exp_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'auido2exp_00300-model.pth')
40
- audio2exp_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2exp.yaml')
41
-
42
- free_view_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'facevid2vid_00189-model.pth.tar')
43
- mapping_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'mapping_00229-model.pth.tar')
44
- facerender_yaml_path = os.path.join(current_root_path, 'src', 'config', 'facerender.yaml')
45
-
46
- #init model
47
- print(path_of_net_recon_model)
48
- preprocess_model = CropAndExtract(path_of_lm_croper, path_of_net_recon_model, dir_of_BFM_fitting, device)
49
-
50
- print(audio2pose_checkpoint)
51
- print(audio2exp_checkpoint)
52
- audio_to_coeff = Audio2Coeff(audio2pose_checkpoint, audio2pose_yaml_path,
53
- audio2exp_checkpoint, audio2exp_yaml_path,
54
- wav2lip_checkpoint, device)
55
-
56
- print(free_view_checkpoint)
57
- print(mapping_checkpoint)
58
- animate_from_coeff = AnimateFromCoeff(free_view_checkpoint, mapping_checkpoint,
59
- facerender_yaml_path, device)
60
-
61
- #crop image and extract 3dmm from image
62
- first_frame_dir = os.path.join(save_dir, 'first_frame_dir')
63
- os.makedirs(first_frame_dir, exist_ok=True)
64
- first_coeff_path, crop_pic_path = preprocess_model.generate(pic_path, first_frame_dir)
65
- if first_coeff_path is None:
66
- print("Can't get the coeffs of the input")
67
- return
68
-
69
- #audio2ceoff
70
- batch = get_data(first_coeff_path, audio_path, device)
71
- coeff_path = audio_to_coeff.generate(batch, save_dir, pose_style)
72
-
73
- # 3dface render
74
- if args.face3dvis:
75
- from src.face3d.visualize import gen_composed_video
76
- gen_composed_video(args, device, first_coeff_path, coeff_path, audio_path, os.path.join(save_dir, '3dface.mp4'))
77
-
78
- #coeff2video
79
- data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path,
80
- batch_size, camera_yaw_list, camera_pitch_list, camera_roll_list,
81
- expression_scale=args.expression_scale, still_mode=args.still)
82
-
83
- animate_from_coeff.generate(data, save_dir, enhancer=args.enhancer)
84
- video_name = data['video_name']
85
-
86
- if args.enhancer is not None:
87
- print(f'The generated video is named {video_name}_enhanced in {save_dir}')
88
- else:
89
- print(f'The generated video is named {video_name} in {save_dir}')
90
-
91
- return os.path.join(save_dir, video_name+'.mp4'), os.path.join(save_dir, video_name+'.mp4')
92
-
93
-
94
- if __name__ == '__main__':
95
-
96
- parser = ArgumentParser()
97
- parser.add_argument("--driven_audio", default='./examples/driven_audio/japanese.wav', help="path to driven audio")
98
- parser.add_argument("--source_image", default='./examples/source_image/art_0.png', help="path to source image")
99
- parser.add_argument("--checkpoint_dir", default='./checkpoints', help="path to output")
100
- parser.add_argument("--result_dir", default='./results', help="path to output")
101
- parser.add_argument("--pose_style", type=int, default=0, help="input pose style from [0, 46)")
102
- parser.add_argument("--batch_size", type=int, default=2, help="the batch size of facerender")
103
- parser.add_argument("--expression_scale", type=float, default=1., help="the batch size of facerender")
104
- parser.add_argument('--camera_yaw', nargs='+', type=int, default=[0], help="the camera yaw degree")
105
- parser.add_argument('--camera_pitch', nargs='+', type=int, default=[0], help="the camera pitch degree")
106
- parser.add_argument('--camera_roll', nargs='+', type=int, default=[0], help="the camera roll degree")
107
- parser.add_argument('--enhancer', type=str, default=None, help="Face enhancer, [GFPGAN]")
108
- parser.add_argument("--cpu", dest="cpu", action="store_true")
109
- parser.add_argument("--face3dvis", action="store_true", help="generate 3d face and 3d landmarks")
110
- parser.add_argument("--still", action="store_true")
111
-
112
- # net structure and parameters
113
- parser.add_argument('--net_recon', type=str, default='resnet50', choices=['resnet18', 'resnet34', 'resnet50'], help='not use')
114
- parser.add_argument('--init_path', type=str, default=None, help='not Use')
115
- parser.add_argument('--use_last_fc',default=False, help='zero initialize the last fc')
116
- parser.add_argument('--bfm_folder', type=str, default='./checkpoints/BFM_Fitting/')
117
- parser.add_argument('--bfm_model', type=str, default='BFM_model_front.mat', help='bfm model')
118
-
119
- # default renderer parameters
120
- parser.add_argument('--focal', type=float, default=1015.)
121
- parser.add_argument('--center', type=float, default=112.)
122
- parser.add_argument('--camera_d', type=float, default=10.)
123
- parser.add_argument('--z_near', type=float, default=5.)
124
- parser.add_argument('--z_far', type=float, default=15.)
125
-
126
- args = parser.parse_args()
127
-
128
- if torch.cuda.is_available() and not args.cpu:
129
- args.device = "cuda"
130
- else:
131
- args.device = "cpu"
132
-
133
- main(args)
134
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
modules/__pycache__/gfpgan_inference.cpython-38.pyc DELETED
Binary file (1.36 kB)
modules/__pycache__/gfpgan_inference.cpython-39.pyc DELETED
Binary file (1.4 kB)
modules/__pycache__/sadtalker_test.cpython-38.pyc DELETED
Binary file (3.63 kB)
modules/__pycache__/sadtalker_test.cpython-39.pyc DELETED
Binary file (3.98 kB)
modules/__pycache__/text2speech.cpython-38.pyc DELETED
Binary file (473 Bytes)
modules/__pycache__/text2speech.cpython-39.pyc DELETED
Binary file (477 Bytes)
modules/gfpgan_inference.py DELETED
@@ -1,36 +0,0 @@
1
- import os,sys
2
-
3
- def gfpgan(scale, origin_mp4_path):
4
- current_code_path = sys.argv[0]
5
- current_root_path = os.path.split(current_code_path)[0]
6
- print(current_root_path)
7
- gfpgan_code_path = current_root_path+'/repositories/GFPGAN/inference_gfpgan.py'
8
- print(gfpgan_code_path)
9
-
10
- #video2pic
11
- result_dir = os.path.split(origin_mp4_path)[0]
12
- video_name = os.path.split(origin_mp4_path)[1]
13
- video_name = video_name.split('.')[0]
14
- print(video_name)
15
- str_scale = str(scale).replace('.', '_')
16
- output_mp4_path = os.path.join(result_dir, video_name+'##'+str_scale+'.mp4')
17
- temp_output_mp4_path = os.path.join(result_dir, 'temp_'+video_name+'##'+str_scale+'.mp4')
18
-
19
- audio_name = video_name.split('##')[-1]
20
- audio_path = os.path.join(result_dir, audio_name+'.wav')
21
- temp_pic_dir1 = os.path.join(result_dir, video_name)
22
- temp_pic_dir2 = os.path.join(result_dir, video_name+'##'+str_scale)
23
- os.makedirs(temp_pic_dir1, exist_ok=True)
24
- os.makedirs(temp_pic_dir2, exist_ok=True)
25
- cmd1 = 'ffmpeg -i \"{}\" -start_number 0 \"{}\"/%06d.png -loglevel error -y'.format(origin_mp4_path, temp_pic_dir1)
26
- os.system(cmd1)
27
- cmd2 = f'python {gfpgan_code_path} -i {temp_pic_dir1} -o {temp_pic_dir2} -s {scale}'
28
- os.system(cmd2)
29
- cmd3 = f'ffmpeg -r 25 -f image2 -i {temp_pic_dir2}/%06d.png -vcodec libx264 -crf 25 -pix_fmt yuv420p {temp_output_mp4_path}'
30
- os.system(cmd3)
31
- cmd4 = f'ffmpeg -y -i {temp_output_mp4_path} -i {audio_path} -vcodec copy {output_mp4_path}'
32
- os.system(cmd4)
33
- #shutil.rmtree(temp_pic_dir1)
34
- #shutil.rmtree(temp_pic_dir2)
35
-
36
- return output_mp4_path