diff --git a/app.py b/app.py index 44ffb9b5bfe538cbbabacf93652e5aaa45be50f5..e2ad7c6463381f98b4379a41b7cccebb57614033 100644 --- a/app.py +++ b/app.py @@ -1,90 +1,88 @@ import os, sys import tempfile import gradio as gr -from modules.text2speech import text2speech -from modules.sadtalker_test import SadTalker - -def get_driven_audio(audio): - if os.path.isfile(audio): - return audio - else: - save_path = tempfile.NamedTemporaryFile( - delete=False, - suffix=("." + "wav"), - ) - gen_audio = text2speech(audio, save_path.name) - return gen_audio, gen_audio +from src.gradio_demo import SadTalker +from src.utils.text2speech import TTSTalker def get_source_image(image): return image -def sadtalker_demo(result_dir='./tmp/'): + + +def sadtalker_demo(): sad_talker = SadTalker() + tts_talker = TTSTalker() + with gr.Blocks(analytics_enabled=False) as sadtalker_interface: - gr.Markdown("

😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023)

\ + gr.Markdown("

😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023)

\ Arxiv       \ Homepage       \ - Github
") + Github
") - with gr.Row(): + with gr.Row().style(equal_height=False): with gr.Column(variant='panel'): with gr.Tabs(elem_id="sadtalker_source_image"): with gr.TabItem('Upload image'): with gr.Row(): - source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256) + source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256,width=256) with gr.Tabs(elem_id="sadtalker_driven_audio"): - with gr.TabItem('Upload audio(wav/mp3 only currently)'): + with gr.TabItem('Upload OR TTS'): with gr.Column(variant='panel'): driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath") + + with gr.Column(variant='panel'): + input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="Alternatively, you can genreate the audio from text using @Coqui.ai TTS.") + tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary') + tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio]) + with gr.Column(variant='panel'): with gr.Tabs(elem_id="sadtalker_checkbox"): with gr.TabItem('Settings'): with gr.Column(variant='panel'): - is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion)").style(container=True) - is_resize_mode = gr.Checkbox(label="Resize Mode (⚠️ Resize mode need manually crop the image firstly, can handle larger image crop)").style(container=True) - is_enhance_mode = gr.Checkbox(label="Enhance Mode (better face quality )").style(container=True) + is_still_mode = gr.Checkbox(label="w/ Still Mode (fewer hand motion, works on full body)") + enhancer = gr.Checkbox(label="w/ GFPGAN as Face enhancer") submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary') with gr.Tabs(elem_id="sadtalker_genearted"): gen_video = gr.Video(label="Generated video", format="mp4").style(width=256) - gen_text = gr.Textbox(visible=False) - + with gr.Row(): examples = [ [ - 'examples/source_image/art_10.png', - 'examples/driven_audio/deyu.wav', + 'examples/source_image/full_body_1.png', + 'examples/driven_audio/bus_chinese.wav', True, - False, False ], [ - 'examples/source_image/art_1.png', - 'examples/driven_audio/fayu.wav', + 'examples/source_image/full_body_2.png', + 'examples/driven_audio/itosinger1.wav', True, + False + ], + [ + 'examples/source_image/art_13.png', + 'examples/driven_audio/fayu.wav', True, False ], [ - 'examples/source_image/art_9.png', - 'examples/driven_audio/itosinger1.wav', + 'examples/source_image/art_5.png', + 'examples/driven_audio/chinese_news.wav', True, - False, - True - ] + False + ], ] gr.Examples(examples=examples, inputs=[ source_image, driven_audio, is_still_mode, - is_resize_mode, - is_enhance_mode, - gr.Textbox(value=result_dir, visible=False)], - outputs=[gen_video, gen_text], + enhancer], + outputs=[gen_video], fn=sad_talker.test, cache_examples=os.getenv('SYSTEM') == 'spaces') @@ -93,10 +91,8 @@ def sadtalker_demo(result_dir='./tmp/'): inputs=[source_image, driven_audio, is_still_mode, - is_resize_mode, - is_enhance_mode, - gr.Textbox(value=result_dir, visible=False)], - outputs=[gen_video, gen_text] + enhancer], + outputs=[gen_video] ) return sadtalker_interface @@ -104,8 +100,7 @@ def sadtalker_demo(result_dir='./tmp/'): if __name__ == "__main__": - sadtalker_result_dir = os.path.join('./', 'results') - demo = sadtalker_demo(sadtalker_result_dir) + demo = sadtalker_demo() demo.launch() diff --git a/examples/driven_audio/bus_chinese.wav b/examples/driven_audio/bus_chinese.wav new file mode 100644 index 0000000000000000000000000000000000000000..888647738d72dfaee99b8d40bb0ddf6f7a1872e7 Binary files /dev/null and b/examples/driven_audio/bus_chinese.wav differ diff --git a/examples/source_image/full_body_1.png b/examples/source_image/full_body_1.png new file mode 100644 index 0000000000000000000000000000000000000000..4fca65c949b7c7e7f7ed9459c473314a38be791f Binary files /dev/null and b/examples/source_image/full_body_1.png differ diff --git a/examples/source_image/full_body_2.png b/examples/source_image/full_body_2.png new file mode 100644 index 0000000000000000000000000000000000000000..b7bc6228cb2f4e8c01af8d2f52bbbf62540e2412 Binary files /dev/null and b/examples/source_image/full_body_2.png differ diff --git a/examples/source_image/happy.png b/examples/source_image/happy.png new file mode 100644 index 0000000000000000000000000000000000000000..9d194ba9a03dfda0867703d54ea6233819c46a73 Binary files /dev/null and b/examples/source_image/happy.png differ diff --git a/examples/source_image/happy1.png b/examples/source_image/happy1.png new file mode 100644 index 0000000000000000000000000000000000000000..b702974cca1a648ec70efee776e484284b527c90 Binary files /dev/null and b/examples/source_image/happy1.png differ diff --git a/examples/source_image/people_0.png b/examples/source_image/people_0.png new file mode 100644 index 0000000000000000000000000000000000000000..8895eeb07a3e300b9bcfa3bb53e7a6a552182bc3 Binary files /dev/null and b/examples/source_image/people_0.png differ diff --git a/examples/source_image/sad.png b/examples/source_image/sad.png new file mode 100644 index 0000000000000000000000000000000000000000..6584467fdac971207883cdcd84b31da1dbc4dfa6 Binary files /dev/null and b/examples/source_image/sad.png differ diff --git a/examples/source_image/sad1.png b/examples/source_image/sad1.png new file mode 100644 index 0000000000000000000000000000000000000000..341e0cb70886995ecf72eebb4b8a4474ab7d287b Binary files /dev/null and b/examples/source_image/sad1.png differ diff --git a/modules/__pycache__/sadtalker_test.cpython-38.pyc b/modules/__pycache__/sadtalker_test.cpython-38.pyc index c54ce9b8728a52636f9cb9f9c47616709d04cfe4..a96311c6eee958b442fec8776d088b74e7b8b3a2 100644 Binary files a/modules/__pycache__/sadtalker_test.cpython-38.pyc and b/modules/__pycache__/sadtalker_test.cpython-38.pyc differ diff --git a/src/__pycache__/generate_batch.cpython-38.pyc b/src/__pycache__/generate_batch.cpython-38.pyc index c68dd09e49933b52115307195bf3aa446d924922..dc3eb4726e9835d34c08362da995941fef530b8f 100644 Binary files a/src/__pycache__/generate_batch.cpython-38.pyc and b/src/__pycache__/generate_batch.cpython-38.pyc differ diff --git a/src/__pycache__/generate_facerender_batch.cpython-38.pyc b/src/__pycache__/generate_facerender_batch.cpython-38.pyc index 6a30615ed3eaa5902a2fa553ed3ed17a9ae92a51..cc944270498549b70e901f5b1c764d1d832eb49e 100644 Binary files a/src/__pycache__/generate_facerender_batch.cpython-38.pyc and b/src/__pycache__/generate_facerender_batch.cpython-38.pyc differ diff --git a/src/__pycache__/test_audio2coeff.cpython-38.pyc b/src/__pycache__/test_audio2coeff.cpython-38.pyc index c2553cc97f50096d7c7005ad39274a8653cb6ad4..a6d261868c02b57145618adcd583481cf623e391 100644 Binary files a/src/__pycache__/test_audio2coeff.cpython-38.pyc and b/src/__pycache__/test_audio2coeff.cpython-38.pyc differ diff --git a/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc b/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc index 460563d74a990c40a3c5bd6f3209acca6d86b550..de88551314f6c19ad1f5b5b33704f1303f51e029 100644 Binary files a/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc and b/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc differ diff --git a/src/audio2exp_models/__pycache__/networks.cpython-38.pyc b/src/audio2exp_models/__pycache__/networks.cpython-38.pyc index 766660615f22f94c740dd420ccef83ed442c4fac..d703bd9e8f3d0c27c16fa713bba3d0969e984ad3 100644 Binary files a/src/audio2exp_models/__pycache__/networks.cpython-38.pyc and b/src/audio2exp_models/__pycache__/networks.cpython-38.pyc differ diff --git a/src/audio2exp_models/audio2exp.py b/src/audio2exp_models/audio2exp.py index 5f6e6b77b0ceb2089539caa440f7106c7b1e8aa2..9e79a929560592687a505e13188796e2b0ca8772 100644 --- a/src/audio2exp_models/audio2exp.py +++ b/src/audio2exp_models/audio2exp.py @@ -22,7 +22,8 @@ class Audio2Exp(nn.Module): current_mel_input = mel_input[:,i:i+10] - ref = batch['ref'][:, :, :64].repeat((1,current_mel_input.shape[1],1)) #bs T 64 + #ref = batch['ref'][:, :, :64].repeat((1,current_mel_input.shape[1],1)) #bs T 64 + ref = batch['ref'][:, :, :64][:, i:i+10] ratio = batch['ratio_gt'][:, i:i+10] #bs T audiox = current_mel_input.view(-1, 1, 80, 16) # bs*T 1 80 16 diff --git a/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc b/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc index 20fa93168344012f0bdb77727b5b5669fac8a10b..5b2dcc996a73224e972148e252fb4e2deedd69a5 100644 Binary files a/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc and b/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc differ diff --git a/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc b/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc index 97d9bdf072c5bd356cc312357646c6eae2b798d0..b0f11a59fea18ee93c30da5cd4c94d04897ea010 100644 Binary files a/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc and b/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc differ diff --git a/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc b/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc index 0d9aaee3ad4caa8afc40f723d224eb5b25e8afcd..1aa0e494be950e6ca972390b27f2dddc8be6d193 100644 Binary files a/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc and b/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc differ diff --git a/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc b/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc index c7ebfcd0dd3538cedeb7eba984f94d9763b392c6..817b8836123ed1a3b5795d912d84c3ff54d7accc 100644 Binary files a/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc and b/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc differ diff --git a/src/audio2pose_models/__pycache__/networks.cpython-38.pyc b/src/audio2pose_models/__pycache__/networks.cpython-38.pyc index 239626089b91321b1c00cfba2dfe0a3ba1ccb0b9..d18f56064377373a8f4f400c59379b0b79d9f649 100644 Binary files a/src/audio2pose_models/__pycache__/networks.cpython-38.pyc and b/src/audio2pose_models/__pycache__/networks.cpython-38.pyc differ diff --git a/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc b/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc index 0e6b40591fd932ddb2cf686b72afd08c90de1a44..5aa2863a646a6eb8b44e0ebdebc5c21b562c2f39 100644 Binary files a/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc and b/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc differ diff --git a/src/audio2pose_models/audio2pose.py b/src/audio2pose_models/audio2pose.py index 3a37179e221340662a817628df3d01ae9e34404f..1a8410d6ee7f7f1d50305f61332bfbdb9dc8bf0e 100644 --- a/src/audio2pose_models/audio2pose.py +++ b/src/audio2pose_models/audio2pose.py @@ -12,7 +12,7 @@ class Audio2Pose(nn.Module): self.latent_dim = cfg.MODEL.CVAE.LATENT_SIZE self.device = device - self.audio_encoder = AudioEncoder(wav2lip_checkpoint) + self.audio_encoder = AudioEncoder(wav2lip_checkpoint, device) self.audio_encoder.eval() for param in self.audio_encoder.parameters(): param.requires_grad = False @@ -20,10 +20,6 @@ class Audio2Pose(nn.Module): self.netG = CVAE(cfg) self.netD_motion = PoseSequenceDiscriminator(cfg) - self.gan_criterion = nn.MSELoss() - self.reg_criterion = nn.L1Loss(reduction='none') - self.pair_criterion = nn.PairwiseDistance() - self.cosine_loss = nn.CosineSimilarity(dim=1) def forward(self, x): @@ -81,6 +77,10 @@ class Audio2Pose(nn.Module): z = torch.randn(bs, self.latent_dim).to(ref.device) batch['z'] = z audio_emb = self.audio_encoder(indiv_mels_use[:, -1*self.seq_len:,:,:,:]) #bs seq_len 512 + if audio_emb.shape[1] != self.seq_len: + pad_dim = self.seq_len-audio_emb.shape[1] + pad_audio_emb = audio_emb[:, :1].repeat(1, pad_dim, 1) + audio_emb = torch.cat([pad_audio_emb, audio_emb], 1) batch['audio_emb'] = audio_emb batch = self.netG.test(batch) pose_motion_pred_list.append(batch['pose_motion_pred'][:,-1*re:,:]) diff --git a/src/audio2pose_models/audio_encoder.py b/src/audio2pose_models/audio_encoder.py index 0ce036df119f86ef28c3ac8d6c834264571c309a..ea9095ad762caf48ff0f97abf4a086f6f7fee7e7 100644 --- a/src/audio2pose_models/audio_encoder.py +++ b/src/audio2pose_models/audio_encoder.py @@ -19,7 +19,7 @@ class Conv2d(nn.Module): return self.act(out) class AudioEncoder(nn.Module): - def __init__(self, wav2lip_checkpoint): + def __init__(self, wav2lip_checkpoint, device): super(AudioEncoder, self).__init__() self.audio_encoder = nn.Sequential( @@ -41,8 +41,8 @@ class AudioEncoder(nn.Module): Conv2d(256, 512, kernel_size=3, stride=1, padding=0), Conv2d(512, 512, kernel_size=1, stride=1, padding=0),) - #### load the pre-trained audio_encoder\ - wav2lip_state_dict = torch.load(wav2lip_checkpoint)['state_dict'] + #### load the pre-trained audio_encoder + wav2lip_state_dict = torch.load(wav2lip_checkpoint, map_location=torch.device(device))['state_dict'] state_dict = self.audio_encoder.state_dict() for k,v in wav2lip_state_dict.items(): diff --git a/src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc b/src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc index 0469c877400338fae921f4aedf1159b03abbb101..25b9b1377b35ea7231f4d3b44d81aab8d44f4b5b 100644 Binary files a/src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc and b/src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc differ diff --git a/src/face3d/__pycache__/visualize.cpython-38.pyc b/src/face3d/__pycache__/visualize.cpython-38.pyc deleted file mode 100644 index a666447a57777ba5a4c6ed6642f234b79c45d372..0000000000000000000000000000000000000000 Binary files a/src/face3d/__pycache__/visualize.cpython-38.pyc and /dev/null differ diff --git a/src/face3d/models/__pycache__/__init__.cpython-38.pyc b/src/face3d/models/__pycache__/__init__.cpython-38.pyc index 886f0b184346c5530d0bf8d6f4b2300079511225..023f4afb376ad418cc6e3cdd9e821cfa0bcd33f3 100644 Binary files a/src/face3d/models/__pycache__/__init__.cpython-38.pyc and b/src/face3d/models/__pycache__/__init__.cpython-38.pyc differ diff --git a/src/face3d/models/__pycache__/base_model.cpython-38.pyc b/src/face3d/models/__pycache__/base_model.cpython-38.pyc index e42691ec8e26c5c38baf6bd0172dff8110754da1..1076d15ca87eb8922a4fb3706a3aff777187b612 100644 Binary files a/src/face3d/models/__pycache__/base_model.cpython-38.pyc and b/src/face3d/models/__pycache__/base_model.cpython-38.pyc differ diff --git a/src/face3d/models/__pycache__/bfm.cpython-38.pyc b/src/face3d/models/__pycache__/bfm.cpython-38.pyc deleted file mode 100644 index 088a48bf9f0cabeb667c11c21000f0254c63ec81..0000000000000000000000000000000000000000 Binary files a/src/face3d/models/__pycache__/bfm.cpython-38.pyc and /dev/null differ diff --git a/src/face3d/models/__pycache__/facerecon_model.cpython-38.pyc b/src/face3d/models/__pycache__/facerecon_model.cpython-38.pyc deleted file mode 100644 index 3e8de7975dee1099cb3e7698227df4e4062f86ee..0000000000000000000000000000000000000000 Binary files a/src/face3d/models/__pycache__/facerecon_model.cpython-38.pyc and /dev/null differ diff --git a/src/face3d/models/__pycache__/losses.cpython-38.pyc b/src/face3d/models/__pycache__/losses.cpython-38.pyc deleted file mode 100644 index ffbf94d1f1e09d5ba0653c588b0cfaeb3df7b920..0000000000000000000000000000000000000000 Binary files a/src/face3d/models/__pycache__/losses.cpython-38.pyc and /dev/null differ diff --git a/src/face3d/models/__pycache__/networks.cpython-38.pyc b/src/face3d/models/__pycache__/networks.cpython-38.pyc index 1a97b5cd3309786e87448c4478ae2d19a18e096b..e52b5dac3ce0e017ed844aed711ddfb94223be98 100644 Binary files a/src/face3d/models/__pycache__/networks.cpython-38.pyc and b/src/face3d/models/__pycache__/networks.cpython-38.pyc differ diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-36.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index c49397797cf06eaa01ef1327d25f0c145a511994..0000000000000000000000000000000000000000 Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-37.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index 82f8ed2b49d5c718fe15c47d620156600f776765..0000000000000000000000000000000000000000 Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-37.pyc and /dev/null differ diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc index 83f6ad3ed4af3cc3d3cfa9067e345cdffb058638..a891077dd80e455e762875f37b16ff11e58441e7 100644 Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc and b/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc differ diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-39.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index b1291676de1f08eaba633f000d015eab672e0036..0000000000000000000000000000000000000000 Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-36.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-36.pyc deleted file mode 100644 index 6be617e2ecf266f566e6e5d4972465fcd0379ac5..0000000000000000000000000000000000000000 Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-36.pyc and /dev/null differ diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-37.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-37.pyc deleted file mode 100644 index 0a085d7cb2aa24dabc85966931e3aa9db54310e3..0000000000000000000000000000000000000000 Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-37.pyc and /dev/null differ diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc index f59247d26d9210b5fd2960df842753a903a90b3d..e7d3278234555217f1055e02d930d1cd8731afa1 100644 Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc and b/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc differ diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-39.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-39.pyc deleted file mode 100644 index d8a633135905cc3c5fe7673c6d6ab584e0692ce7..0000000000000000000000000000000000000000 Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-39.pyc and /dev/null differ diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-36.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-36.pyc deleted file mode 100644 index 6d9748f002ee2f953efa2391054329b6d32f9016..0000000000000000000000000000000000000000 Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-36.pyc and /dev/null differ diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-37.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-37.pyc deleted file mode 100644 index 50b9f06989f4ca4f6f5bd7a1fdf1952f2035e974..0000000000000000000000000000000000000000 Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-37.pyc and /dev/null differ diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc index d8edc64d28aa3e3fb8c26ba795d04a8ef35b1540..db57e8b41e4fe5bdbee04db62986c15c0e4bffb1 100644 Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc and b/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc differ diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-39.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-39.pyc deleted file mode 100644 index 24ebbc749bfa90340e389e2c88bd1f8218c3e338..0000000000000000000000000000000000000000 Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-39.pyc and /dev/null differ diff --git a/src/face3d/util/__pycache__/__init__.cpython-38.pyc b/src/face3d/util/__pycache__/__init__.cpython-38.pyc index 22771f3169f2da9a37c1bd619a0e5d05003492b9..2671705d02bed0a099b4a375070d0949c1450b7b 100644 Binary files a/src/face3d/util/__pycache__/__init__.cpython-38.pyc and b/src/face3d/util/__pycache__/__init__.cpython-38.pyc differ diff --git a/src/face3d/util/__pycache__/load_mats.cpython-38.pyc b/src/face3d/util/__pycache__/load_mats.cpython-38.pyc index 8a48b59ca078ef709825d54c069f518c15103c4e..f44224c0f7c12afc3590f10b9f5ac570b6b668bb 100644 Binary files a/src/face3d/util/__pycache__/load_mats.cpython-38.pyc and b/src/face3d/util/__pycache__/load_mats.cpython-38.pyc differ diff --git a/src/face3d/util/__pycache__/nvdiffrast.cpython-38.pyc b/src/face3d/util/__pycache__/nvdiffrast.cpython-38.pyc deleted file mode 100644 index 0ac5cc3eb7c6fd3141005a9cd53f604c49036717..0000000000000000000000000000000000000000 Binary files a/src/face3d/util/__pycache__/nvdiffrast.cpython-38.pyc and /dev/null differ diff --git a/src/face3d/util/__pycache__/preprocess.cpython-38.pyc b/src/face3d/util/__pycache__/preprocess.cpython-38.pyc index 7900dafbd8b74629c391eb8972f615650d4461df..90eb37261ae38ab925f149db62d91a1d0078bfcf 100644 Binary files a/src/face3d/util/__pycache__/preprocess.cpython-38.pyc and b/src/face3d/util/__pycache__/preprocess.cpython-38.pyc differ diff --git a/src/face3d/util/__pycache__/util.cpython-38.pyc b/src/face3d/util/__pycache__/util.cpython-38.pyc deleted file mode 100644 index 56d6f9217276ff22306a567df4861f802e61a82a..0000000000000000000000000000000000000000 Binary files a/src/face3d/util/__pycache__/util.cpython-38.pyc and /dev/null differ diff --git a/src/facerender/__pycache__/animate.cpython-38.pyc b/src/facerender/__pycache__/animate.cpython-38.pyc index 11fb3d0ee467093c0cb318003c52eb4c78f11cc9..1f8003ddb550fc6e235abccfb5f8481ee8c16afa 100644 Binary files a/src/facerender/__pycache__/animate.cpython-38.pyc and b/src/facerender/__pycache__/animate.cpython-38.pyc differ diff --git a/src/facerender/animate.py b/src/facerender/animate.py index be2d62ebaeffe06a8dee1e268d832690b1937320..1bd221ad4c99d911222fdf1eb087ebb626afc867 100644 --- a/src/facerender/animate.py +++ b/src/facerender/animate.py @@ -16,6 +16,8 @@ from src.facerender.modules.make_animation import make_animation from pydub import AudioSegment from src.utils.face_enhancer import enhancer as face_enhancer +from src.utils.paste_pic import paste_pic + class AnimateFromCoeff(): @@ -30,21 +32,26 @@ class AnimateFromCoeff(): **config['model_params']['common_params']) kp_extractor = KPDetector(**config['model_params']['kp_detector_params'], **config['model_params']['common_params']) + he_estimator = HEEstimator(**config['model_params']['he_estimator_params'], + **config['model_params']['common_params']) mapping = MappingNet(**config['model_params']['mapping_params']) generator.to(device) kp_extractor.to(device) + he_estimator.to(device) mapping.to(device) for param in generator.parameters(): param.requires_grad = False for param in kp_extractor.parameters(): param.requires_grad = False + for param in he_estimator.parameters(): + param.requires_grad = False for param in mapping.parameters(): param.requires_grad = False if free_view_checkpoint is not None: - self.load_cpk_facevid2vid(free_view_checkpoint, kp_detector=kp_extractor, generator=generator) + self.load_cpk_facevid2vid(free_view_checkpoint, kp_detector=kp_extractor, generator=generator, he_estimator=he_estimator) else: raise AttributeError("Checkpoint should be specified for video head pose estimator.") @@ -55,10 +62,12 @@ class AnimateFromCoeff(): self.kp_extractor = kp_extractor self.generator = generator + self.he_estimator = he_estimator self.mapping = mapping self.kp_extractor.eval() self.generator.eval() + self.he_estimator.eval() self.mapping.eval() self.device = device @@ -107,26 +116,35 @@ class AnimateFromCoeff(): return checkpoint['epoch'] - def generate(self, x, video_save_dir, enhancer=None, original_size=None): + def generate(self, x, video_save_dir, pic_path, crop_info, enhancer=None, full_img_enhancer=None): source_image=x['source_image'].type(torch.FloatTensor) source_semantics=x['source_semantics'].type(torch.FloatTensor) - target_semantics=x['target_semantics_list'].type(torch.FloatTensor) - yaw_c_seq = x['yaw_c_seq'].type(torch.FloatTensor) - pitch_c_seq = x['pitch_c_seq'].type(torch.FloatTensor) - roll_c_seq = x['roll_c_seq'].type(torch.FloatTensor) + target_semantics=x['target_semantics_list'].type(torch.FloatTensor) source_image=source_image.to(self.device) source_semantics=source_semantics.to(self.device) target_semantics=target_semantics.to(self.device) - yaw_c_seq = x['yaw_c_seq'].to(self.device) - pitch_c_seq = x['pitch_c_seq'].to(self.device) - roll_c_seq = x['roll_c_seq'].to(self.device) + if 'yaw_c_seq' in x: + yaw_c_seq = x['yaw_c_seq'].type(torch.FloatTensor) + yaw_c_seq = x['yaw_c_seq'].to(self.device) + else: + yaw_c_seq = None + if 'pitch_c_seq' in x: + pitch_c_seq = x['pitch_c_seq'].type(torch.FloatTensor) + pitch_c_seq = x['pitch_c_seq'].to(self.device) + else: + pitch_c_seq = None + if 'roll_c_seq' in x: + roll_c_seq = x['roll_c_seq'].type(torch.FloatTensor) + roll_c_seq = x['roll_c_seq'].to(self.device) + else: + roll_c_seq = None frame_num = x['frame_num'] predictions_video = make_animation(source_image, source_semantics, target_semantics, - self.generator, self.kp_extractor, self.mapping, - yaw_c_seq, pitch_c_seq, roll_c_seq, use_exp = True,) + self.generator, self.kp_extractor, self.he_estimator, self.mapping, + yaw_c_seq, pitch_c_seq, roll_c_seq, use_exp = True) predictions_video = predictions_video.reshape((-1,)+predictions_video.shape[2:]) predictions_video = predictions_video[:frame_num] @@ -139,6 +157,7 @@ class AnimateFromCoeff(): result = img_as_ubyte(video) ### the generated video is 256x256, so we keep the aspect ratio, + original_size = crop_info[0] if original_size: result = [ cv2.resize(result_i,(256, int(256.0 * original_size[1]/original_size[0]) )) for result_i in result ] @@ -157,7 +176,9 @@ class AnimateFromCoeff(): imageio.mimsave(enhanced_path, enhanced_images, fps=float(25)) - av_path = os.path.join(video_save_dir, video_name) + av_path = os.path.join(video_save_dir, video_name) + return_path = av_path + audio_path = x['audio_path'] audio_name = os.path.splitext(os.path.split(audio_path)[-1])[0] new_audio_path = os.path.join(video_save_dir, audio_name+'.wav') @@ -171,12 +192,28 @@ class AnimateFromCoeff(): cmd = r'ffmpeg -y -i "%s" -i "%s" -vcodec copy "%s"' % (path, new_audio_path, av_path) os.system(cmd) + print(f'The generated video is named {video_name} in {video_save_dir}') if enhancer: + return_path = av_path_enhancer cmd = r'ffmpeg -y -i "%s" -i "%s" -vcodec copy "%s"' % (enhanced_path, new_audio_path, av_path_enhancer) os.system(cmd) os.remove(enhanced_path) + print(f'The generated video is named {video_name_enhancer} in {video_save_dir}') + + if len(crop_info) == 3: + video_name_full = x['video_name'] + '_full.mp4' + full_video_path = os.path.join(video_save_dir, video_name_full) + return_path = full_video_path + if enhancer: + paste_pic(av_path_enhancer, pic_path, crop_info, new_audio_path, full_video_path) + else: + paste_pic(path, pic_path, crop_info, new_audio_path, full_video_path) + print(f'The generated video is named {video_name_full} in {video_save_dir}') + os.remove(path) os.remove(new_audio_path) + return return_path + diff --git a/src/facerender/modules/__pycache__/animate_model.cpython-38.pyc b/src/facerender/modules/__pycache__/animate_model.cpython-38.pyc deleted file mode 100644 index 1ecb83e033911eb82d582e097c513ea0fd4cb69a..0000000000000000000000000000000000000000 Binary files a/src/facerender/modules/__pycache__/animate_model.cpython-38.pyc and /dev/null differ diff --git a/src/facerender/modules/__pycache__/animate_model.cpython-39.pyc b/src/facerender/modules/__pycache__/animate_model.cpython-39.pyc deleted file mode 100644 index 8e9a594ddff05d41ed7fea66e42b37558869332a..0000000000000000000000000000000000000000 Binary files a/src/facerender/modules/__pycache__/animate_model.cpython-39.pyc and /dev/null differ diff --git a/src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc b/src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc index 5178c3763bc9f6fcff3a8a410deff7d3c30060db..7558dbc6512fceb2147fd1fae031212d07e4449d 100644 Binary files a/src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc and b/src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc differ diff --git a/src/facerender/modules/__pycache__/dense_motion.cpython-39.pyc b/src/facerender/modules/__pycache__/dense_motion.cpython-39.pyc deleted file mode 100644 index 9a6cec5db6525ef350d0fcd52efe814b0d3f1e6d..0000000000000000000000000000000000000000 Binary files a/src/facerender/modules/__pycache__/dense_motion.cpython-39.pyc and /dev/null differ diff --git a/src/facerender/modules/__pycache__/generator.cpython-38.pyc b/src/facerender/modules/__pycache__/generator.cpython-38.pyc index 8d132f05d36e505f21c864d4c95931472ba58051..11aa36c10f79820e84d8a275234b85b0371cc050 100644 Binary files a/src/facerender/modules/__pycache__/generator.cpython-38.pyc and b/src/facerender/modules/__pycache__/generator.cpython-38.pyc differ diff --git a/src/facerender/modules/__pycache__/generator.cpython-39.pyc b/src/facerender/modules/__pycache__/generator.cpython-39.pyc deleted file mode 100644 index ac9587fe99d8905d8ac99d60025ed1a8d5bacf1b..0000000000000000000000000000000000000000 Binary files a/src/facerender/modules/__pycache__/generator.cpython-39.pyc and /dev/null differ diff --git a/src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc b/src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc index ccc5d4543365bfc022a06a72d6ed9d388249279a..e0bd1dcd3e98a316628449370f08dc8bd2dde4b9 100644 Binary files a/src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc and b/src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc differ diff --git a/src/facerender/modules/__pycache__/keypoint_detector.cpython-39.pyc b/src/facerender/modules/__pycache__/keypoint_detector.cpython-39.pyc deleted file mode 100644 index e609a2ce2bea049dcc08e711684347032da88e1a..0000000000000000000000000000000000000000 Binary files a/src/facerender/modules/__pycache__/keypoint_detector.cpython-39.pyc and /dev/null differ diff --git a/src/facerender/modules/__pycache__/make_animation.cpython-38.pyc b/src/facerender/modules/__pycache__/make_animation.cpython-38.pyc index 1b54bcc293d742f70db165849b9764666b0f9a8b..76e338a936f0354c81abaa5fc677c5622db16eb3 100644 Binary files a/src/facerender/modules/__pycache__/make_animation.cpython-38.pyc and b/src/facerender/modules/__pycache__/make_animation.cpython-38.pyc differ diff --git a/src/facerender/modules/__pycache__/mapping.cpython-38.pyc b/src/facerender/modules/__pycache__/mapping.cpython-38.pyc index 7e1a2baa2bfab28fe7e3904f94a644633124b56c..b464c917a4d3feb94fa629b3390c000af89ceb9a 100644 Binary files a/src/facerender/modules/__pycache__/mapping.cpython-38.pyc and b/src/facerender/modules/__pycache__/mapping.cpython-38.pyc differ diff --git a/src/facerender/modules/__pycache__/mapping5.cpython-38.pyc b/src/facerender/modules/__pycache__/mapping5.cpython-38.pyc deleted file mode 100644 index ae35fb77f8552d2aa9cb263cba6ca9d37bbee9a7..0000000000000000000000000000000000000000 Binary files a/src/facerender/modules/__pycache__/mapping5.cpython-38.pyc and /dev/null differ diff --git a/src/facerender/modules/__pycache__/mapping5.cpython-39.pyc b/src/facerender/modules/__pycache__/mapping5.cpython-39.pyc deleted file mode 100644 index fa6b6db40007f95fca648909a638810273b2c050..0000000000000000000000000000000000000000 Binary files a/src/facerender/modules/__pycache__/mapping5.cpython-39.pyc and /dev/null differ diff --git a/src/facerender/modules/__pycache__/util.cpython-38.pyc b/src/facerender/modules/__pycache__/util.cpython-38.pyc index 1e1c92955be38c880c52cc70b8051fd8ef4fa63a..4f4d1a6d0e3797390e942821e1e2c238e1c8a8d2 100644 Binary files a/src/facerender/modules/__pycache__/util.cpython-38.pyc and b/src/facerender/modules/__pycache__/util.cpython-38.pyc differ diff --git a/src/facerender/modules/__pycache__/util.cpython-39.pyc b/src/facerender/modules/__pycache__/util.cpython-39.pyc deleted file mode 100644 index 8764b93cb4e5964b831caf9ff376b70105f3dc5d..0000000000000000000000000000000000000000 Binary files a/src/facerender/modules/__pycache__/util.cpython-39.pyc and /dev/null differ diff --git a/src/facerender/modules/dense_motion.py b/src/facerender/modules/dense_motion.py index 30c13060be8e82979771514b4ec51e5de23f49fa..a286ead2e84ed1961335d34a3b50ab38f25e4495 100644 --- a/src/facerender/modules/dense_motion.py +++ b/src/facerender/modules/dense_motion.py @@ -102,6 +102,10 @@ class DenseMotionNetwork(nn.Module): mask = F.softmax(mask, dim=1) out_dict['mask'] = mask mask = mask.unsqueeze(2) # (bs, num_kp+1, 1, d, h, w) + + zeros_mask = torch.zeros_like(mask) + mask = torch.where(mask < 1e-3, zeros_mask, mask) + sparse_motion = sparse_motion.permute(0, 1, 5, 2, 3, 4) # (bs, num_kp+1, 3, d, h, w) deformation = (sparse_motion * mask).sum(dim=1) # (bs, 3, d, h, w) deformation = deformation.permute(0, 2, 3, 4, 1) # (bs, d, h, w, 3) diff --git a/src/facerender/modules/make_animation.py b/src/facerender/modules/make_animation.py index 2b2382d82d26043145184b339103aac64abdaa62..e7887a3fed50d294948dd0a7d4c4956583b5f705 100644 --- a/src/facerender/modules/make_animation.py +++ b/src/facerender/modules/make_animation.py @@ -62,29 +62,33 @@ def get_rotation_matrix(yaw, pitch, roll): return rot_mat -def keypoint_transformation(kp_canonical, he): +def keypoint_transformation(kp_canonical, he, wo_exp=False): kp = kp_canonical['value'] # (bs, k, 3) yaw, pitch, roll= he['yaw'], he['pitch'], he['roll'] yaw = headpose_pred_to_degree(yaw) pitch = headpose_pred_to_degree(pitch) roll = headpose_pred_to_degree(roll) - if 'yaw_c' in he: - yaw = yaw + he['yaw_c'] - if 'pitch_c' in he: - pitch = pitch + he['pitch_c'] - if 'roll_c' in he: - roll = roll + he['roll_c'] + if 'yaw_in' in he: + yaw = he['yaw_in'] + if 'pitch_in' in he: + pitch = he['pitch_in'] + if 'roll_in' in he: + roll = he['roll_in'] rot_mat = get_rotation_matrix(yaw, pitch, roll) # (bs, 3, 3) t, exp = he['t'], he['exp'] + if wo_exp: + exp = exp*0 # keypoint rotation kp_rotated = torch.einsum('bmp,bkp->bkm', rot_mat, kp) # keypoint translation - t = t.unsqueeze_(1).repeat(1, kp.shape[1], 1) + t[:, 0] = t[:, 0]*0 + t[:, 2] = t[:, 2]*0 + t = t.unsqueeze(1).repeat(1, kp.shape[1], 1) kp_t = kp_rotated + t # add expression deviation @@ -96,7 +100,7 @@ def keypoint_transformation(kp_canonical, he): def make_animation(source_image, source_semantics, target_semantics, - generator, kp_detector, mapping, + generator, kp_detector, he_estimator, mapping, yaw_c_seq=None, pitch_c_seq=None, roll_c_seq=None, use_exp=True): with torch.no_grad(): @@ -109,14 +113,12 @@ def make_animation(source_image, source_semantics, target_semantics, for frame_idx in tqdm(range(target_semantics.shape[1]), 'Face Renderer:'): target_semantics_frame = target_semantics[:, frame_idx] he_driving = mapping(target_semantics_frame) - if not use_exp: - he_driving['exp'] = he_driving['exp']*0 if yaw_c_seq is not None: - he_driving['yaw_c'] = yaw_c_seq[:, frame_idx] + he_driving['yaw_in'] = yaw_c_seq[:, frame_idx] if pitch_c_seq is not None: - he_driving['pitch_c'] = pitch_c_seq[:, frame_idx] + he_driving['pitch_in'] = pitch_c_seq[:, frame_idx] if roll_c_seq is not None: - he_driving['roll_c'] = roll_c_seq[:, frame_idx] + he_driving['roll_in'] = roll_c_seq[:, frame_idx] kp_driving = keypoint_transformation(kp_canonical, he_driving) @@ -124,6 +126,14 @@ def make_animation(source_image, source_semantics, target_semantics, #kp_driving_initial=kp_driving_initial) kp_norm = kp_driving out = generator(source_image, kp_source=kp_source, kp_driving=kp_norm) + ''' + source_image_new = out['prediction'].squeeze(1) + kp_canonical_new = kp_detector(source_image_new) + he_source_new = he_estimator(source_image_new) + kp_source_new = keypoint_transformation(kp_canonical_new, he_source_new, wo_exp=True) + kp_driving_new = keypoint_transformation(kp_canonical_new, he_driving, wo_exp=True) + out = generator(source_image_new, kp_source=kp_source_new, kp_driving=kp_driving_new) + ''' predictions.append(out['prediction']) predictions_ts = torch.stack(predictions, dim=1) return predictions_ts diff --git a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-36.pyc b/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 8327a281a1c119814499648bdec814cf753ba0ba..0000000000000000000000000000000000000000 Binary files a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-37.pyc b/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index 4e9c9671abd49037eb51d66e7bb6046177433a27..0000000000000000000000000000000000000000 Binary files a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-37.pyc and /dev/null differ diff --git a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc b/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc index 03d5fdb5ff0e14c08894b394b8c1cae7e1f324c4..a08f1284e68bb6251119739bc46a2dab9f5a171b 100644 Binary files a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc and b/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc differ diff --git a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-39.pyc b/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index 9c0d18c3cec16bbeccbc825186b14c60550563a1..0000000000000000000000000000000000000000 Binary files a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-36.pyc b/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-36.pyc deleted file mode 100644 index 24a89a661e425c0b49c5d616759928e701eab005..0000000000000000000000000000000000000000 Binary files a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-36.pyc and /dev/null differ diff --git a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-37.pyc b/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-37.pyc deleted file mode 100644 index d7658dccf719cd85ac0c6e6f6b190ffe6f32c5ed..0000000000000000000000000000000000000000 Binary files a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-37.pyc and /dev/null differ diff --git a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc b/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc index 20a4560fc425087d5d63c70cc08fd12c2d8a7ea1..f1a96eace36b537e5cfc85be1be94616151aca85 100644 Binary files a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc and b/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc differ diff --git a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-39.pyc b/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-39.pyc deleted file mode 100644 index d1c07e4d0f03cd52a105f009d16f079559a5f97e..0000000000000000000000000000000000000000 Binary files a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-39.pyc and /dev/null differ diff --git a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-36.pyc b/src/facerender/sync_batchnorm/__pycache__/comm.cpython-36.pyc deleted file mode 100644 index 7602415a703e1bd2b6008a9bf6dde9778d4349ae..0000000000000000000000000000000000000000 Binary files a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-36.pyc and /dev/null differ diff --git a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-37.pyc b/src/facerender/sync_batchnorm/__pycache__/comm.cpython-37.pyc deleted file mode 100644 index 1ce98838a834f854dbbc7a8d2f4f1295802e97f3..0000000000000000000000000000000000000000 Binary files a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-37.pyc and /dev/null differ diff --git a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc b/src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc index eb7252b8ad1b6aec2f5566979db0494f71a63d91..e6578b03a7060d9b9b31681e6f7ef27e4251f52e 100644 Binary files a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc and b/src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc differ diff --git a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-39.pyc b/src/facerender/sync_batchnorm/__pycache__/comm.cpython-39.pyc deleted file mode 100644 index b84f093a8aef9c2b92f0beead2318296163c9e1f..0000000000000000000000000000000000000000 Binary files a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-39.pyc and /dev/null differ diff --git a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-36.pyc b/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-36.pyc deleted file mode 100644 index 4a53e2cdf5b5c2d0f7fc9f6c928fe116d629a6c8..0000000000000000000000000000000000000000 Binary files a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-36.pyc and /dev/null differ diff --git a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-37.pyc b/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-37.pyc deleted file mode 100644 index b91c03d671fb5a9334bd4791f6e1f55d397f2e62..0000000000000000000000000000000000000000 Binary files a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-37.pyc and /dev/null differ diff --git a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc b/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc index 30c9811579d75333db1b60fe4622f682013f719b..90f775d27997dc8659edde9eb763d0f8b4007ace 100644 Binary files a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc and b/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc differ diff --git a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-39.pyc b/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-39.pyc deleted file mode 100644 index 561b184da4d393c548f7eb0b3076c765d4bf3745..0000000000000000000000000000000000000000 Binary files a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-39.pyc and /dev/null differ diff --git a/src/generate_batch.py b/src/generate_batch.py index 2d9e19b6aa4c19c13caf0a208e1189cd6c19f796..8bf580e49427527bfd1c2ff533de45ee91e3872e 100644 --- a/src/generate_batch.py +++ b/src/generate_batch.py @@ -48,7 +48,7 @@ def generate_blink_seq_randomly(num_frames): break return ratio -def get_data(first_coeff_path, audio_path, device): +def get_data(first_coeff_path, audio_path, device, ref_eyeblink_coeff_path): syncnet_mel_step_size = 16 fps = 25 @@ -56,10 +56,6 @@ def get_data(first_coeff_path, audio_path, device): pic_name = os.path.splitext(os.path.split(first_coeff_path)[-1])[0] audio_name = os.path.splitext(os.path.split(audio_path)[-1])[0] - source_semantics_path = first_coeff_path - source_semantics_dict = scio.loadmat(source_semantics_path) - ref_coeff = source_semantics_dict['coeff_3dmm'][:1,:70] #1 70 - wav = audio.load_wav(audio_path, 16000) wav_length, num_frames = parse_audio_length(len(wav), 16000, 25) wav = crop_pad_audio(wav, wav_length) @@ -76,7 +72,27 @@ def get_data(first_coeff_path, audio_path, device): m = spec[seq, :] indiv_mels.append(m.T) indiv_mels = np.asarray(indiv_mels) # T 80 16 + ratio = generate_blink_seq_randomly(num_frames) # T + source_semantics_path = first_coeff_path + source_semantics_dict = scio.loadmat(source_semantics_path) + ref_coeff = source_semantics_dict['coeff_3dmm'][:1,:70] #1 70 + ref_coeff = np.repeat(ref_coeff, num_frames, axis=0) + + if ref_eyeblink_coeff_path is not None: + ratio[:num_frames] = 0 + refeyeblink_coeff_dict = scio.loadmat(ref_eyeblink_coeff_path) + refeyeblink_coeff = refeyeblink_coeff_dict['coeff_3dmm'][:,:64] + refeyeblink_num_frames = refeyeblink_coeff.shape[0] + if refeyeblink_num_frames None: + model_name = TTS.list_models()[0] + self.tts = TTS(model_name) + + def test(self, text, language='en'): + + tempf = tempfile.NamedTemporaryFile( + delete = False, + suffix = ('.'+'wav'), + ) + + self.tts.tts_to_file(text, speaker=self.tts.speakers[0], language=language, file_path=tempf.name) + + return tempf.name \ No newline at end of file