import torch import yaml import os import safetensors from safetensors.torch import save_file from yacs.config import CfgNode as CN import sys sys.path.append('/apdcephfs/private_shadowcun/SadTalker') from src.face3d.models import networks from src.facerender.modules.keypoint_detector import HEEstimator, KPDetector from src.facerender.modules.mapping import MappingNet from src.facerender.modules.generator import OcclusionAwareGenerator, OcclusionAwareSPADEGenerator from src.audio2pose_models.audio2pose import Audio2Pose from src.audio2exp_models.networks import SimpleWrapperV2 from src.test_audio2coeff import load_cpk size = 256 ############ face vid2vid config_path = os.path.join('src', 'config', 'facerender.yaml') current_root_path = '.' path_of_net_recon_model = os.path.join(current_root_path, 'checkpoints', 'epoch_20.pth') net_recon = networks.define_net_recon(net_recon='resnet50', use_last_fc=False, init_path='') checkpoint = torch.load(path_of_net_recon_model, map_location='cpu') net_recon.load_state_dict(checkpoint['net_recon']) with open(config_path) as f: config = yaml.safe_load(f) generator = OcclusionAwareSPADEGenerator(**config['model_params']['generator_params'], **config['model_params']['common_params']) kp_extractor = KPDetector(**config['model_params']['kp_detector_params'], **config['model_params']['common_params']) he_estimator = HEEstimator(**config['model_params']['he_estimator_params'], **config['model_params']['common_params']) mapping = MappingNet(**config['model_params']['mapping_params']) def load_cpk_facevid2vid(checkpoint_path, generator=None, discriminator=None, kp_detector=None, he_estimator=None, optimizer_generator=None, optimizer_discriminator=None, optimizer_kp_detector=None, optimizer_he_estimator=None, device="cpu"): checkpoint = torch.load(checkpoint_path, map_location=torch.device(device)) if generator is not None: generator.load_state_dict(checkpoint['generator']) if kp_detector is not None: kp_detector.load_state_dict(checkpoint['kp_detector']) if he_estimator is not None: he_estimator.load_state_dict(checkpoint['he_estimator']) if discriminator is not None: try: discriminator.load_state_dict(checkpoint['discriminator']) except: print ('No discriminator in the state-dict. Dicriminator will be randomly initialized') if optimizer_generator is not None: optimizer_generator.load_state_dict(checkpoint['optimizer_generator']) if optimizer_discriminator is not None: try: optimizer_discriminator.load_state_dict(checkpoint['optimizer_discriminator']) except RuntimeError as e: print ('No discriminator optimizer in the state-dict. Optimizer will be not initialized') if optimizer_kp_detector is not None: optimizer_kp_detector.load_state_dict(checkpoint['optimizer_kp_detector']) if optimizer_he_estimator is not None: optimizer_he_estimator.load_state_dict(checkpoint['optimizer_he_estimator']) return checkpoint['epoch'] def load_cpk_facevid2vid_safetensor(checkpoint_path, generator=None, kp_detector=None, he_estimator=None, device="cpu"): checkpoint = safetensors.torch.load_file(checkpoint_path) if generator is not None: x_generator = {} for k,v in checkpoint.items(): if 'generator' in k: x_generator[k.replace('generator.', '')] = v generator.load_state_dict(x_generator) if kp_detector is not None: x_generator = {} for k,v in checkpoint.items(): if 'kp_extractor' in k: x_generator[k.replace('kp_extractor.', '')] = v kp_detector.load_state_dict(x_generator) if he_estimator is not None: x_generator = {} for k,v in checkpoint.items(): if 'he_estimator' in k: x_generator[k.replace('he_estimator.', '')] = v he_estimator.load_state_dict(x_generator) return None free_view_checkpoint = '/apdcephfs/private_shadowcun/SadTalker/checkpoints/facevid2vid_'+str(size)+'-model.pth.tar' load_cpk_facevid2vid(free_view_checkpoint, kp_detector=kp_extractor, generator=generator, he_estimator=he_estimator) wav2lip_checkpoint = os.path.join(current_root_path, 'checkpoints', 'wav2lip.pth') audio2pose_checkpoint = os.path.join(current_root_path, 'checkpoints', 'auido2pose_00140-model.pth') audio2pose_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2pose.yaml') audio2exp_checkpoint = os.path.join(current_root_path, 'checkpoints', 'auido2exp_00300-model.pth') audio2exp_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2exp.yaml') fcfg_pose = open(audio2pose_yaml_path) cfg_pose = CN.load_cfg(fcfg_pose) cfg_pose.freeze() audio2pose_model = Audio2Pose(cfg_pose, wav2lip_checkpoint) audio2pose_model.eval() load_cpk(audio2pose_checkpoint, model=audio2pose_model, device='cpu') # load audio2exp_model netG = SimpleWrapperV2() netG.eval() load_cpk(audio2exp_checkpoint, model=netG, device='cpu') class SadTalker(torch.nn.Module): def __init__(self, kp_extractor, generator, netG, audio2pose, face_3drecon): super(SadTalker, self).__init__() self.kp_extractor = kp_extractor self.generator = generator self.audio2exp = netG self.audio2pose = audio2pose self.face_3drecon = face_3drecon model = SadTalker(kp_extractor, generator, netG, audio2pose_model, net_recon) # here, we want to convert it to safetensor save_file(model.state_dict(), "checkpoints/SadTalker_V0.0.2_"+str(size)+".safetensors") ### test load_cpk_facevid2vid_safetensor('checkpoints/SadTalker_V0.0.2_'+str(size)+'.safetensors', kp_detector=kp_extractor, generator=generator, he_estimator=None)