diff --git a/src/__pycache__/generate_batch.cpython-38.pyc b/src/__pycache__/generate_batch.cpython-38.pyc
deleted file mode 100644
index 13a47c4eaf86393717647c3b4bd49f49fe406242..0000000000000000000000000000000000000000
Binary files a/src/__pycache__/generate_batch.cpython-38.pyc and /dev/null differ
diff --git a/src/__pycache__/generate_facerender_batch.cpython-38.pyc b/src/__pycache__/generate_facerender_batch.cpython-38.pyc
deleted file mode 100644
index 9425f1e83ef0a81ea1e02a7ec86bfc74d9352bfc..0000000000000000000000000000000000000000
Binary files a/src/__pycache__/generate_facerender_batch.cpython-38.pyc and /dev/null differ
diff --git a/src/__pycache__/test_audio2coeff.cpython-38.pyc b/src/__pycache__/test_audio2coeff.cpython-38.pyc
deleted file mode 100644
index 84aa2f1d6af91014a85c6d7e112d60790a6725a0..0000000000000000000000000000000000000000
Binary files a/src/__pycache__/test_audio2coeff.cpython-38.pyc and /dev/null differ
diff --git a/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc b/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc
deleted file mode 100644
index a9567482f0b8e81ad6c9ceccf0f8a622a66546e9..0000000000000000000000000000000000000000
Binary files a/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc and /dev/null differ
diff --git a/src/audio2exp_models/__pycache__/networks.cpython-38.pyc b/src/audio2exp_models/__pycache__/networks.cpython-38.pyc
deleted file mode 100644
index 407ea976836d9d8802732a6376ec87d41fc80ff2..0000000000000000000000000000000000000000
Binary files a/src/audio2exp_models/__pycache__/networks.cpython-38.pyc and /dev/null differ
diff --git a/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc b/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc
deleted file mode 100644
index f13dd8f582471f5d65cbf8735a8c79ccbef7518b..0000000000000000000000000000000000000000
Binary files a/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc and /dev/null differ
diff --git a/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc b/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc
deleted file mode 100644
index 69f3c22097826e74218bb37f8a55225973e2793c..0000000000000000000000000000000000000000
Binary files a/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc and /dev/null differ
diff --git a/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc b/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc
deleted file mode 100644
index 5dded44216fd8aab03974eed3d8162bce0c0a635..0000000000000000000000000000000000000000
Binary files a/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc and /dev/null differ
diff --git a/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc b/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc
deleted file mode 100644
index f2f2e1836674553ab8d46cac670ed8216261bebc..0000000000000000000000000000000000000000
Binary files a/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc and /dev/null differ
diff --git a/src/audio2pose_models/__pycache__/networks.cpython-38.pyc b/src/audio2pose_models/__pycache__/networks.cpython-38.pyc
deleted file mode 100644
index 7aeff8e03a138118d6f78e5998350382f3ba9f9e..0000000000000000000000000000000000000000
Binary files a/src/audio2pose_models/__pycache__/networks.cpython-38.pyc and /dev/null differ
diff --git a/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc b/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc
deleted file mode 100644
index c4145a9c9a8bb39c4b43d12b9157f948d4aada9e..0000000000000000000000000000000000000000
Binary files a/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc and /dev/null differ
diff --git a/src/src/config/facerender_pirender.yaml b/src/config/facerender_pirender.yaml
similarity index 100%
rename from src/src/config/facerender_pirender.yaml
rename to src/config/facerender_pirender.yaml
diff --git a/src/face3d/models/__pycache__/__init__.cpython-38.pyc b/src/face3d/models/__pycache__/__init__.cpython-38.pyc
deleted file mode 100644
index 99831554825b8fec7ca21767da2c9c2941d4637e..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/__pycache__/__init__.cpython-38.pyc and /dev/null differ
diff --git a/src/face3d/models/__pycache__/base_model.cpython-38.pyc b/src/face3d/models/__pycache__/base_model.cpython-38.pyc
deleted file mode 100644
index f6631694c51d5c339ff1300b92ef5e2b3186ac11..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/__pycache__/base_model.cpython-38.pyc and /dev/null differ
diff --git a/src/face3d/models/__pycache__/networks.cpython-38.pyc b/src/face3d/models/__pycache__/networks.cpython-38.pyc
deleted file mode 100644
index f0e1d09cc5a1840dcd5fedd012499e1ca4c51f5f..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/__pycache__/networks.cpython-38.pyc and /dev/null differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc
deleted file mode 100644
index f92b2e8edc070d6affbb2061185f0d0adb7335f9..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc and /dev/null differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc
deleted file mode 100644
index ac2a1b5344105e501c3317f576430ef41e00efe8..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc and /dev/null differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc
deleted file mode 100644
index ca44195689621e02e50f0dfe8142870855576322..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc and /dev/null differ
diff --git a/src/face3d/util/__pycache__/__init__.cpython-38.pyc b/src/face3d/util/__pycache__/__init__.cpython-38.pyc
deleted file mode 100644
index ea3beac4a69b5dc73e3e6728599e475db1325af4..0000000000000000000000000000000000000000
Binary files a/src/face3d/util/__pycache__/__init__.cpython-38.pyc and /dev/null differ
diff --git a/src/face3d/util/__pycache__/load_mats.cpython-38.pyc b/src/face3d/util/__pycache__/load_mats.cpython-38.pyc
deleted file mode 100644
index 12141aee9bbafb46c4157859824cc3979ecc2973..0000000000000000000000000000000000000000
Binary files a/src/face3d/util/__pycache__/load_mats.cpython-38.pyc and /dev/null differ
diff --git a/src/face3d/util/__pycache__/preprocess.cpython-38.pyc b/src/face3d/util/__pycache__/preprocess.cpython-38.pyc
deleted file mode 100644
index 6205c4ec683edf61196785e85b1133c478c7c9d8..0000000000000000000000000000000000000000
Binary files a/src/face3d/util/__pycache__/preprocess.cpython-38.pyc and /dev/null differ
diff --git a/src/facerender/__pycache__/animate.cpython-38.pyc b/src/facerender/__pycache__/animate.cpython-38.pyc
deleted file mode 100644
index 0f2bbd9cdc5a8465828d0550272e8e5f8492b0ef..0000000000000000000000000000000000000000
Binary files a/src/facerender/__pycache__/animate.cpython-38.pyc and /dev/null differ
diff --git a/src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc b/src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc
deleted file mode 100644
index 1785157c2c0f18d518a6a9741105e99ad169c378..0000000000000000000000000000000000000000
Binary files a/src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc and /dev/null differ
diff --git a/src/facerender/modules/__pycache__/generator.cpython-38.pyc b/src/facerender/modules/__pycache__/generator.cpython-38.pyc
deleted file mode 100644
index 12c4970aa1880f1f58bc5ed48f192852b7ed084a..0000000000000000000000000000000000000000
Binary files a/src/facerender/modules/__pycache__/generator.cpython-38.pyc and /dev/null differ
diff --git a/src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc b/src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc
deleted file mode 100644
index b210e7d99cde1b818e2771967806d2ec5f5adae4..0000000000000000000000000000000000000000
Binary files a/src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc and /dev/null differ
diff --git a/src/facerender/modules/__pycache__/make_animation.cpython-38.pyc b/src/facerender/modules/__pycache__/make_animation.cpython-38.pyc
deleted file mode 100644
index b8f347e1a2404284681cae95939803889faba9dc..0000000000000000000000000000000000000000
Binary files a/src/facerender/modules/__pycache__/make_animation.cpython-38.pyc and /dev/null differ
diff --git a/src/facerender/modules/__pycache__/mapping.cpython-38.pyc b/src/facerender/modules/__pycache__/mapping.cpython-38.pyc
deleted file mode 100644
index 8ed3e34e45b30d40d07973d70c9db5a18b3468ba..0000000000000000000000000000000000000000
Binary files a/src/facerender/modules/__pycache__/mapping.cpython-38.pyc and /dev/null differ
diff --git a/src/facerender/modules/__pycache__/util.cpython-38.pyc b/src/facerender/modules/__pycache__/util.cpython-38.pyc
deleted file mode 100644
index b86a22c2ebe31a47a3190b110a06e70413de6311..0000000000000000000000000000000000000000
Binary files a/src/facerender/modules/__pycache__/util.cpython-38.pyc and /dev/null differ
diff --git a/src/src/facerender/pirender/base_function.py b/src/facerender/pirender/base_function.py
similarity index 100%
rename from src/src/facerender/pirender/base_function.py
rename to src/facerender/pirender/base_function.py
diff --git a/src/src/facerender/pirender/config.py b/src/facerender/pirender/config.py
similarity index 100%
rename from src/src/facerender/pirender/config.py
rename to src/facerender/pirender/config.py
diff --git a/src/src/facerender/pirender/face_model.py b/src/facerender/pirender/face_model.py
similarity index 100%
rename from src/src/facerender/pirender/face_model.py
rename to src/facerender/pirender/face_model.py
diff --git a/src/src/facerender/pirender_animate.py b/src/facerender/pirender_animate.py
similarity index 100%
rename from src/src/facerender/pirender_animate.py
rename to src/facerender/pirender_animate.py
diff --git a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc b/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc
deleted file mode 100644
index 8bc00fa326e84c7058c64e4004b73c55d7845850..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc and /dev/null differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc b/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc
deleted file mode 100644
index d2d5f83e4f70a222bc633c1b6497b65e76ac6a3f..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc and /dev/null differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc b/src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc
deleted file mode 100644
index e4ccff272091e0f7bc55608867b89d43a64f0be7..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc and /dev/null differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc b/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc
deleted file mode 100644
index 4cfc6b318f20e14134fb40950e233f94d4f6cf1b..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc and /dev/null differ
diff --git a/src/generate_facerender_batch.py b/src/generate_facerender_batch.py
index a62b6edffa41529ba828905fb86ca302a01d37cc..a821a6ece2fcff83c288a0989097d863cfec3dd1 100644
--- a/src/generate_facerender_batch.py
+++ b/src/generate_facerender_batch.py
@@ -7,7 +7,7 @@ import scipy.io as scio
 
 def get_facerender_data(coeff_path, pic_path, first_coeff_path, audio_path, 
                         batch_size, input_yaw_list=None, input_pitch_list=None, input_roll_list=None, 
-                        expression_scale=1.0, still_mode = False, preprocess='crop', size = 256):
+                        expression_scale=1.0, still_mode = False, preprocess='crop', size = 256, facemodel='facevid2vid'):
 
     semantic_radius = 13
     video_name = os.path.splitext(os.path.split(coeff_path)[-1])[0]
@@ -27,10 +27,9 @@ def get_facerender_data(coeff_path, pic_path, first_coeff_path, audio_path,
     source_semantics_dict = scio.loadmat(first_coeff_path)
     generated_dict = scio.loadmat(coeff_path)
 
-    if 'full' not in preprocess.lower():
+    if 'full' not in preprocess.lower() and facemodel != 'pirender':
         source_semantics = source_semantics_dict['coeff_3dmm'][:1,:70]         #1 70
         generated_3dmm = generated_dict['coeff_3dmm'][:,:70]
-
     else:
         source_semantics = source_semantics_dict['coeff_3dmm'][:1,:73]         #1 70
         generated_3dmm = generated_dict['coeff_3dmm'][:,:70]
@@ -43,7 +42,7 @@ def get_facerender_data(coeff_path, pic_path, first_coeff_path, audio_path,
     # target 
     generated_3dmm[:, :64] = generated_3dmm[:, :64] * expression_scale
 
-    if 'full' in preprocess.lower():
+    if 'full' in preprocess.lower() or facemodel == 'pirender':
         generated_3dmm = np.concatenate([generated_3dmm, np.repeat(source_semantics[:,70:], generated_3dmm.shape[0], axis=0)], axis=1)
 
     if still_mode:
diff --git a/src/gradio_demo.py b/src/gradio_demo.py
index 9c9ae056f019d463f03c88eb75a19c9ef037c2c2..9a2399fc44704b544ef39bb908d32a21da9fae17 100644
--- a/src/gradio_demo.py
+++ b/src/gradio_demo.py
@@ -1,5 +1,6 @@
 import torch, uuid
-import os, sys, shutil
+import os, sys, shutil, platform
+from src.facerender.pirender_animate import AnimateFromCoeff_PIRender
 from src.utils.preprocess import CropAndExtract
 from src.test_audio2coeff import Audio2Coeff  
 from src.facerender.animate import AnimateFromCoeff
@@ -20,8 +21,10 @@ class SadTalker():
 
     def __init__(self, checkpoint_path='checkpoints', config_path='src/config', lazy_load=False):
 
-        if torch.cuda.is_available() :
+        if torch.cuda.is_available():
             device = "cuda"
+        elif platform.system() == 'Darwin': # macos 
+            device = "mps"
         else:
             device = "cpu"
         
@@ -35,7 +38,9 @@ class SadTalker():
 
     def test(self, source_image, driven_audio, preprocess='crop', 
         still_mode=False,  use_enhancer=False, batch_size=1, size=256, 
-        pose_style = 0, exp_scale=1.0, 
+        pose_style = 0, 
+        facerender='facevid2vid',
+        exp_scale=1.0, 
         use_ref_video = False,
         ref_video = None,
         ref_info = None,
@@ -48,7 +53,15 @@ class SadTalker():
             
         self.audio_to_coeff = Audio2Coeff(self.sadtalker_paths, self.device)
         self.preprocess_model = CropAndExtract(self.sadtalker_paths, self.device)
-        self.animate_from_coeff = AnimateFromCoeff(self.sadtalker_paths, self.device)
+        
+        if facerender == 'facevid2vid' and self.device != 'mps':
+            self.animate_from_coeff = AnimateFromCoeff(self.sadtalker_paths, self.device)
+        elif facerender == 'pirender' or self.device == 'mps':
+            self.animate_from_coeff = AnimateFromCoeff_PIRender(self.sadtalker_paths, self.device)
+            facerender = 'pirender'
+        else:
+            raise(RuntimeError('Unknown model: {}'.format(facerender)))
+            
 
         time_tag = str(uuid.uuid4())
         save_dir = os.path.join(result_dir, time_tag)
@@ -131,11 +144,13 @@ class SadTalker():
         if use_ref_video and ref_info == 'all':
             coeff_path = ref_video_coeff_path # self.audio_to_coeff.generate(batch, save_dir, pose_style, ref_pose_coeff_path)
         else:
-            batch = get_data(first_coeff_path, audio_path, self.device, ref_eyeblink_coeff_path=ref_eyeblink_coeff_path, still=still_mode, idlemode=use_idle_mode, length_of_audio=length_of_audio, use_blink=use_blink) # longer audio?
+            batch = get_data(first_coeff_path, audio_path, self.device, ref_eyeblink_coeff_path=ref_eyeblink_coeff_path, still=still_mode, \
+                idlemode=use_idle_mode, length_of_audio=length_of_audio, use_blink=use_blink) # longer audio?
             coeff_path = self.audio_to_coeff.generate(batch, save_dir, pose_style, ref_pose_coeff_path)
 
         #coeff2video
-        data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path, batch_size, still_mode=still_mode, preprocess=preprocess, size=size, expression_scale = exp_scale)
+        data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path, batch_size, still_mode=still_mode, \
+            preprocess=preprocess, size=size, expression_scale = exp_scale, facemodel=facerender)
         return_path = self.animate_from_coeff.generate(data, save_dir,  pic_path, crop_info, enhancer='gfpgan' if use_enhancer else None, preprocess=preprocess, img_size=size)
         video_name = data['video_name']
         print(f'The generated video is named {video_name} in {save_dir}')
diff --git a/src/src/audio2exp_models/audio2exp.py b/src/src/audio2exp_models/audio2exp.py
deleted file mode 100644
index 9e79a929560592687a505e13188796e2b0ca8772..0000000000000000000000000000000000000000
--- a/src/src/audio2exp_models/audio2exp.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from tqdm import tqdm
-import torch
-from torch import nn
-
-
-class Audio2Exp(nn.Module):
-    def __init__(self, netG, cfg, device, prepare_training_loss=False):
-        super(Audio2Exp, self).__init__()
-        self.cfg = cfg
-        self.device = device
-        self.netG = netG.to(device)
-
-    def test(self, batch):
-
-        mel_input = batch['indiv_mels']                         # bs T 1 80 16
-        bs = mel_input.shape[0]
-        T = mel_input.shape[1]
-
-        exp_coeff_pred = []
-
-        for i in tqdm(range(0, T, 10),'audio2exp:'): # every 10 frames
-            
-            current_mel_input = mel_input[:,i:i+10]
-
-            #ref = batch['ref'][:, :, :64].repeat((1,current_mel_input.shape[1],1))           #bs T 64
-            ref = batch['ref'][:, :, :64][:, i:i+10]
-            ratio = batch['ratio_gt'][:, i:i+10]                               #bs T
-
-            audiox = current_mel_input.view(-1, 1, 80, 16)                  # bs*T 1 80 16
-
-            curr_exp_coeff_pred  = self.netG(audiox, ref, ratio)         # bs T 64 
-
-            exp_coeff_pred += [curr_exp_coeff_pred]
-
-        # BS x T x 64
-        results_dict = {
-            'exp_coeff_pred': torch.cat(exp_coeff_pred, axis=1)
-            }
-        return results_dict
-
-
diff --git a/src/src/audio2exp_models/networks.py b/src/src/audio2exp_models/networks.py
deleted file mode 100644
index f052e18101f5446a527ae354b3621e7d0d4991cc..0000000000000000000000000000000000000000
--- a/src/src/audio2exp_models/networks.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import torch
-import torch.nn.functional as F
-from torch import nn
-
-class Conv2d(nn.Module):
-    def __init__(self, cin, cout, kernel_size, stride, padding, residual=False, use_act = True, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.conv_block = nn.Sequential(
-                            nn.Conv2d(cin, cout, kernel_size, stride, padding),
-                            nn.BatchNorm2d(cout)
-                            )
-        self.act = nn.ReLU()
-        self.residual = residual
-        self.use_act = use_act
-
-    def forward(self, x):
-        out = self.conv_block(x)
-        if self.residual:
-            out += x
-        
-        if self.use_act:
-            return self.act(out)
-        else:
-            return out
-
-class SimpleWrapperV2(nn.Module):
-    def __init__(self) -> None:
-        super().__init__()
-        self.audio_encoder = nn.Sequential(
-            Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
-            Conv2d(32, 32, kernel_size=3, stride=1, padding=1, residual=True),
-            Conv2d(32, 32, kernel_size=3, stride=1, padding=1, residual=True),
-
-            Conv2d(32, 64, kernel_size=3, stride=(3, 1), padding=1),
-            Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True),
-            Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True),
-
-            Conv2d(64, 128, kernel_size=3, stride=3, padding=1),
-            Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True),
-            Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True),
-
-            Conv2d(128, 256, kernel_size=3, stride=(3, 2), padding=1),
-            Conv2d(256, 256, kernel_size=3, stride=1, padding=1, residual=True),
-
-            Conv2d(256, 512, kernel_size=3, stride=1, padding=0),
-            Conv2d(512, 512, kernel_size=1, stride=1, padding=0),
-            )
-
-        #### load the pre-trained audio_encoder 
-        #self.audio_encoder = self.audio_encoder.to(device)  
-        '''
-        wav2lip_state_dict = torch.load('/apdcephfs_cq2/share_1290939/wenxuazhang/checkpoints/wav2lip.pth')['state_dict']
-        state_dict = self.audio_encoder.state_dict()
-
-        for k,v in wav2lip_state_dict.items():
-            if 'audio_encoder' in k:
-                print('init:', k)
-                state_dict[k.replace('module.audio_encoder.', '')] = v
-        self.audio_encoder.load_state_dict(state_dict)
-        '''
-
-        self.mapping1 = nn.Linear(512+64+1, 64)
-        #self.mapping2 = nn.Linear(30, 64)
-        #nn.init.constant_(self.mapping1.weight, 0.)
-        nn.init.constant_(self.mapping1.bias, 0.)
-
-    def forward(self, x, ref, ratio):
-        x = self.audio_encoder(x).view(x.size(0), -1)
-        ref_reshape = ref.reshape(x.size(0), -1)
-        ratio = ratio.reshape(x.size(0), -1)
-        
-        y = self.mapping1(torch.cat([x, ref_reshape, ratio], dim=1)) 
-        out = y.reshape(ref.shape[0], ref.shape[1], -1) #+ ref # resudial
-        return out
diff --git a/src/src/audio2pose_models/audio2pose.py b/src/src/audio2pose_models/audio2pose.py
deleted file mode 100644
index 2b8cd1427038460a7679260a424d2f01d2bcf2c5..0000000000000000000000000000000000000000
--- a/src/src/audio2pose_models/audio2pose.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import torch
-from torch import nn
-from src.audio2pose_models.cvae import CVAE
-from src.audio2pose_models.discriminator import PoseSequenceDiscriminator
-from src.audio2pose_models.audio_encoder import AudioEncoder
-
-class Audio2Pose(nn.Module):
-    def __init__(self, cfg, wav2lip_checkpoint, device='cuda'):
-        super().__init__()
-        self.cfg = cfg
-        self.seq_len = cfg.MODEL.CVAE.SEQ_LEN
-        self.latent_dim = cfg.MODEL.CVAE.LATENT_SIZE
-        self.device = device
-
-        self.audio_encoder = AudioEncoder(wav2lip_checkpoint, device)
-        self.audio_encoder.eval()
-        for param in self.audio_encoder.parameters():
-            param.requires_grad = False
-
-        self.netG = CVAE(cfg)
-        self.netD_motion = PoseSequenceDiscriminator(cfg)
-        
-        
-    def forward(self, x):
-
-        batch = {}
-        coeff_gt = x['gt'].cuda().squeeze(0)           #bs frame_len+1 73
-        batch['pose_motion_gt'] = coeff_gt[:, 1:, 64:70] - coeff_gt[:, :1, 64:70] #bs frame_len 6
-        batch['ref'] = coeff_gt[:, 0, 64:70]  #bs  6
-        batch['class'] = x['class'].squeeze(0).cuda() # bs
-        indiv_mels= x['indiv_mels'].cuda().squeeze(0) # bs seq_len+1 80 16
-
-        # forward
-        audio_emb_list = []
-        audio_emb = self.audio_encoder(indiv_mels[:, 1:, :, :].unsqueeze(2)) #bs seq_len 512
-        batch['audio_emb'] = audio_emb
-        batch = self.netG(batch)
-
-        pose_motion_pred = batch['pose_motion_pred']           # bs frame_len 6
-        pose_gt = coeff_gt[:, 1:, 64:70].clone()               # bs frame_len 6
-        pose_pred = coeff_gt[:, :1, 64:70] + pose_motion_pred  # bs frame_len 6
-
-        batch['pose_pred'] = pose_pred
-        batch['pose_gt'] = pose_gt
-
-        return batch
-
-    def test(self, x):
-
-        batch = {}
-        ref = x['ref']                            #bs 1 70
-        batch['ref'] = x['ref'][:,0,-6:]  
-        batch['class'] = x['class']  
-        bs = ref.shape[0]
-        
-        indiv_mels= x['indiv_mels']               # bs T 1 80 16
-        indiv_mels_use = indiv_mels[:, 1:]        # we regard the ref as the first frame
-        num_frames = x['num_frames']
-        num_frames = int(num_frames) - 1
-
-        #  
-        div = num_frames//self.seq_len
-        re = num_frames%self.seq_len
-        audio_emb_list = []
-        pose_motion_pred_list = [torch.zeros(batch['ref'].unsqueeze(1).shape, dtype=batch['ref'].dtype, 
-                                                device=batch['ref'].device)]
-
-        for i in range(div):
-            z = torch.randn(bs, self.latent_dim).to(ref.device)
-            batch['z'] = z
-            audio_emb = self.audio_encoder(indiv_mels_use[:, i*self.seq_len:(i+1)*self.seq_len,:,:,:]) #bs seq_len 512
-            batch['audio_emb'] = audio_emb
-            batch = self.netG.test(batch)
-            pose_motion_pred_list.append(batch['pose_motion_pred'])  #list of bs seq_len 6
-        
-        if re != 0:
-            z = torch.randn(bs, self.latent_dim).to(ref.device)
-            batch['z'] = z
-            audio_emb = self.audio_encoder(indiv_mels_use[:, -1*self.seq_len:,:,:,:]) #bs seq_len  512
-            if audio_emb.shape[1] != self.seq_len:
-                pad_dim = self.seq_len-audio_emb.shape[1]
-                pad_audio_emb = audio_emb[:, :1].repeat(1, pad_dim, 1) 
-                audio_emb = torch.cat([pad_audio_emb, audio_emb], 1) 
-            batch['audio_emb'] = audio_emb
-            batch = self.netG.test(batch)
-            pose_motion_pred_list.append(batch['pose_motion_pred'][:,-1*re:,:])   
-        
-        pose_motion_pred = torch.cat(pose_motion_pred_list, dim = 1)
-        batch['pose_motion_pred'] = pose_motion_pred
-
-        pose_pred = ref[:, :1, -6:] + pose_motion_pred  # bs T 6
-
-        batch['pose_pred'] = pose_pred
-        return batch
diff --git a/src/src/audio2pose_models/audio_encoder.py b/src/src/audio2pose_models/audio_encoder.py
deleted file mode 100644
index 6279d2014a2e786a6c549f084339e18d00e50331..0000000000000000000000000000000000000000
--- a/src/src/audio2pose_models/audio_encoder.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import torch
-from torch import nn
-from torch.nn import functional as F
-
-class Conv2d(nn.Module):
-    def __init__(self, cin, cout, kernel_size, stride, padding, residual=False, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.conv_block = nn.Sequential(
-                            nn.Conv2d(cin, cout, kernel_size, stride, padding),
-                            nn.BatchNorm2d(cout)
-                            )
-        self.act = nn.ReLU()
-        self.residual = residual
-
-    def forward(self, x):
-        out = self.conv_block(x)
-        if self.residual:
-            out += x
-        return self.act(out)
-
-class AudioEncoder(nn.Module):
-    def __init__(self, wav2lip_checkpoint, device):
-        super(AudioEncoder, self).__init__()
-
-        self.audio_encoder = nn.Sequential(
-            Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
-            Conv2d(32, 32, kernel_size=3, stride=1, padding=1, residual=True),
-            Conv2d(32, 32, kernel_size=3, stride=1, padding=1, residual=True),
-
-            Conv2d(32, 64, kernel_size=3, stride=(3, 1), padding=1),
-            Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True),
-            Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True),
-
-            Conv2d(64, 128, kernel_size=3, stride=3, padding=1),
-            Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True),
-            Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True),
-
-            Conv2d(128, 256, kernel_size=3, stride=(3, 2), padding=1),
-            Conv2d(256, 256, kernel_size=3, stride=1, padding=1, residual=True),
-
-            Conv2d(256, 512, kernel_size=3, stride=1, padding=0),
-            Conv2d(512, 512, kernel_size=1, stride=1, padding=0),)
-
-        #### load the pre-trained audio_encoder, we do not need to load wav2lip model here.
-        # wav2lip_state_dict = torch.load(wav2lip_checkpoint, map_location=torch.device(device))['state_dict']
-        # state_dict = self.audio_encoder.state_dict()
-
-        # for k,v in wav2lip_state_dict.items():
-        #     if 'audio_encoder' in k:
-        #         state_dict[k.replace('module.audio_encoder.', '')] = v
-        # self.audio_encoder.load_state_dict(state_dict)
-
-
-    def forward(self, audio_sequences):
-        # audio_sequences = (B, T, 1, 80, 16)
-        B = audio_sequences.size(0)
-
-        audio_sequences = torch.cat([audio_sequences[:, i] for i in range(audio_sequences.size(1))], dim=0)
-
-        audio_embedding = self.audio_encoder(audio_sequences) # B, 512, 1, 1
-        dim = audio_embedding.shape[1]
-        audio_embedding = audio_embedding.reshape((B, -1, dim, 1, 1))
-
-        return audio_embedding.squeeze(-1).squeeze(-1) #B seq_len+1 512 
diff --git a/src/src/audio2pose_models/cvae.py b/src/src/audio2pose_models/cvae.py
deleted file mode 100644
index d017ce865a03bae40dfe066dbcd82e29839d89dc..0000000000000000000000000000000000000000
--- a/src/src/audio2pose_models/cvae.py
+++ /dev/null
@@ -1,149 +0,0 @@
-import torch
-import torch.nn.functional as F
-from torch import nn
-from src.audio2pose_models.res_unet import ResUnet
-
-def class2onehot(idx, class_num):
-
-    assert torch.max(idx).item() < class_num
-    onehot = torch.zeros(idx.size(0), class_num).to(idx.device)
-    onehot.scatter_(1, idx, 1)
-    return onehot
-
-class CVAE(nn.Module):
-    def __init__(self, cfg):
-        super().__init__()
-        encoder_layer_sizes = cfg.MODEL.CVAE.ENCODER_LAYER_SIZES
-        decoder_layer_sizes = cfg.MODEL.CVAE.DECODER_LAYER_SIZES
-        latent_size = cfg.MODEL.CVAE.LATENT_SIZE
-        num_classes = cfg.DATASET.NUM_CLASSES
-        audio_emb_in_size = cfg.MODEL.CVAE.AUDIO_EMB_IN_SIZE
-        audio_emb_out_size = cfg.MODEL.CVAE.AUDIO_EMB_OUT_SIZE
-        seq_len = cfg.MODEL.CVAE.SEQ_LEN
-
-        self.latent_size = latent_size
-
-        self.encoder = ENCODER(encoder_layer_sizes, latent_size, num_classes,
-                                audio_emb_in_size, audio_emb_out_size, seq_len)
-        self.decoder = DECODER(decoder_layer_sizes, latent_size, num_classes,
-                                audio_emb_in_size, audio_emb_out_size, seq_len)
-    def reparameterize(self, mu, logvar):
-        std = torch.exp(0.5 * logvar)
-        eps = torch.randn_like(std)
-        return mu + eps * std
-
-    def forward(self, batch):
-        batch = self.encoder(batch)
-        mu = batch['mu']
-        logvar = batch['logvar']
-        z = self.reparameterize(mu, logvar)
-        batch['z'] = z
-        return self.decoder(batch)
-
-    def test(self, batch):
-        '''
-        class_id = batch['class']
-        z = torch.randn([class_id.size(0), self.latent_size]).to(class_id.device)
-        batch['z'] = z
-        '''
-        return self.decoder(batch)
-
-class ENCODER(nn.Module):
-    def __init__(self, layer_sizes, latent_size, num_classes, 
-                audio_emb_in_size, audio_emb_out_size, seq_len):
-        super().__init__()
-
-        self.resunet = ResUnet()
-        self.num_classes = num_classes
-        self.seq_len = seq_len
-
-        self.MLP = nn.Sequential()
-        layer_sizes[0] += latent_size + seq_len*audio_emb_out_size + 6
-        for i, (in_size, out_size) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
-            self.MLP.add_module(
-                name="L{:d}".format(i), module=nn.Linear(in_size, out_size))
-            self.MLP.add_module(name="A{:d}".format(i), module=nn.ReLU())
-
-        self.linear_means = nn.Linear(layer_sizes[-1], latent_size)
-        self.linear_logvar = nn.Linear(layer_sizes[-1], latent_size)
-        self.linear_audio = nn.Linear(audio_emb_in_size, audio_emb_out_size)
-
-        self.classbias = nn.Parameter(torch.randn(self.num_classes, latent_size))
-
-    def forward(self, batch):
-        class_id = batch['class']
-        pose_motion_gt = batch['pose_motion_gt']                             #bs seq_len 6
-        ref = batch['ref']                             #bs 6
-        bs = pose_motion_gt.shape[0]
-        audio_in = batch['audio_emb']                          # bs seq_len audio_emb_in_size
-
-        #pose encode
-        pose_emb = self.resunet(pose_motion_gt.unsqueeze(1))          #bs 1 seq_len 6 
-        pose_emb = pose_emb.reshape(bs, -1)                    #bs seq_len*6
-
-        #audio mapping
-        print(audio_in.shape)
-        audio_out = self.linear_audio(audio_in)                # bs seq_len audio_emb_out_size
-        audio_out = audio_out.reshape(bs, -1)
-
-        class_bias = self.classbias[class_id]                  #bs latent_size
-        x_in = torch.cat([ref, pose_emb, audio_out, class_bias], dim=-1) #bs seq_len*(audio_emb_out_size+6)+latent_size
-        x_out = self.MLP(x_in)
-
-        mu = self.linear_means(x_out)
-        logvar = self.linear_means(x_out)                      #bs latent_size 
-
-        batch.update({'mu':mu, 'logvar':logvar})
-        return batch
-
-class DECODER(nn.Module):
-    def __init__(self, layer_sizes, latent_size, num_classes, 
-                audio_emb_in_size, audio_emb_out_size, seq_len):
-        super().__init__()
-
-        self.resunet = ResUnet()
-        self.num_classes = num_classes
-        self.seq_len = seq_len
-
-        self.MLP = nn.Sequential()
-        input_size = latent_size + seq_len*audio_emb_out_size + 6
-        for i, (in_size, out_size) in enumerate(zip([input_size]+layer_sizes[:-1], layer_sizes)):
-            self.MLP.add_module(
-                name="L{:d}".format(i), module=nn.Linear(in_size, out_size))
-            if i+1 < len(layer_sizes):
-                self.MLP.add_module(name="A{:d}".format(i), module=nn.ReLU())
-            else:
-                self.MLP.add_module(name="sigmoid", module=nn.Sigmoid())
-        
-        self.pose_linear = nn.Linear(6, 6)
-        self.linear_audio = nn.Linear(audio_emb_in_size, audio_emb_out_size)
-
-        self.classbias = nn.Parameter(torch.randn(self.num_classes, latent_size))
-
-    def forward(self, batch):
-
-        z = batch['z']                                          #bs latent_size
-        bs = z.shape[0]
-        class_id = batch['class']
-        ref = batch['ref']                             #bs 6
-        audio_in = batch['audio_emb']                           # bs seq_len audio_emb_in_size
-        #print('audio_in: ', audio_in[:, :, :10])
-
-        audio_out = self.linear_audio(audio_in)                 # bs seq_len audio_emb_out_size
-        #print('audio_out: ', audio_out[:, :, :10])
-        audio_out = audio_out.reshape([bs, -1])                 # bs seq_len*audio_emb_out_size
-        class_bias = self.classbias[class_id]                   #bs latent_size
-
-        z = z + class_bias
-        x_in = torch.cat([ref, z, audio_out], dim=-1)
-        x_out = self.MLP(x_in)                                  # bs layer_sizes[-1]
-        x_out = x_out.reshape((bs, self.seq_len, -1))
-
-        #print('x_out: ', x_out)
-
-        pose_emb = self.resunet(x_out.unsqueeze(1))             #bs 1 seq_len 6
-
-        pose_motion_pred = self.pose_linear(pose_emb.squeeze(1))       #bs seq_len 6
-
-        batch.update({'pose_motion_pred':pose_motion_pred})
-        return batch
diff --git a/src/src/audio2pose_models/discriminator.py b/src/src/audio2pose_models/discriminator.py
deleted file mode 100644
index 339c38e4812ff38a810f0f3a1c01812f6d5d78db..0000000000000000000000000000000000000000
--- a/src/src/audio2pose_models/discriminator.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import torch
-import torch.nn.functional as F
-from torch import nn
-
-class ConvNormRelu(nn.Module):
-    def __init__(self, conv_type='1d', in_channels=3, out_channels=64, downsample=False,
-                 kernel_size=None, stride=None, padding=None, norm='BN', leaky=False):
-        super().__init__()
-        if kernel_size is None:
-            if downsample:
-                kernel_size, stride, padding = 4, 2, 1
-            else:
-                kernel_size, stride, padding = 3, 1, 1
-
-        if conv_type == '2d':
-            self.conv = nn.Conv2d(
-                in_channels,
-                out_channels,
-                kernel_size,
-                stride,
-                padding,
-                bias=False,
-            )
-            if norm == 'BN':
-                self.norm = nn.BatchNorm2d(out_channels)
-            elif norm == 'IN':
-                self.norm = nn.InstanceNorm2d(out_channels)
-            else:
-                raise NotImplementedError
-        elif conv_type == '1d':
-            self.conv = nn.Conv1d(
-                in_channels,
-                out_channels,
-                kernel_size,
-                stride,
-                padding,
-                bias=False,
-            )
-            if norm == 'BN':
-                self.norm = nn.BatchNorm1d(out_channels)
-            elif norm == 'IN':
-                self.norm = nn.InstanceNorm1d(out_channels)
-            else:
-                raise NotImplementedError
-        nn.init.kaiming_normal_(self.conv.weight)
-
-        self.act = nn.LeakyReLU(negative_slope=0.2, inplace=False) if leaky else nn.ReLU(inplace=True)
-
-    def forward(self, x):
-        x = self.conv(x)
-        if isinstance(self.norm, nn.InstanceNorm1d):
-            x = self.norm(x.permute((0, 2, 1))).permute((0, 2, 1))  # normalize on [C]
-        else:
-            x = self.norm(x)
-        x = self.act(x)
-        return x
-
-
-class PoseSequenceDiscriminator(nn.Module):
-    def __init__(self, cfg):
-        super().__init__()
-        self.cfg = cfg
-        leaky = self.cfg.MODEL.DISCRIMINATOR.LEAKY_RELU
-
-        self.seq = nn.Sequential(
-            ConvNormRelu('1d', cfg.MODEL.DISCRIMINATOR.INPUT_CHANNELS, 256, downsample=True, leaky=leaky),  # B, 256, 64
-            ConvNormRelu('1d', 256, 512, downsample=True, leaky=leaky),  # B, 512, 32
-            ConvNormRelu('1d', 512, 1024, kernel_size=3, stride=1, padding=1, leaky=leaky),  # B, 1024, 16
-            nn.Conv1d(1024, 1, kernel_size=3, stride=1, padding=1, bias=True)  # B, 1, 16
-        )
-
-    def forward(self, x):
-        x = x.reshape(x.size(0), x.size(1), -1).transpose(1, 2)
-        x = self.seq(x)
-        x = x.squeeze(1)
-        return x
\ No newline at end of file
diff --git a/src/src/audio2pose_models/networks.py b/src/src/audio2pose_models/networks.py
deleted file mode 100644
index 8aa0b1390e7b4bb0e16057ac94d2fe84f48421af..0000000000000000000000000000000000000000
--- a/src/src/audio2pose_models/networks.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import torch.nn as nn
-import torch
-
-
-class ResidualConv(nn.Module):
-    def __init__(self, input_dim, output_dim, stride, padding):
-        super(ResidualConv, self).__init__()
-
-        self.conv_block = nn.Sequential(
-            nn.BatchNorm2d(input_dim),
-            nn.ReLU(),
-            nn.Conv2d(
-                input_dim, output_dim, kernel_size=3, stride=stride, padding=padding
-            ),
-            nn.BatchNorm2d(output_dim),
-            nn.ReLU(),
-            nn.Conv2d(output_dim, output_dim, kernel_size=3, padding=1),
-        )
-        self.conv_skip = nn.Sequential(
-            nn.Conv2d(input_dim, output_dim, kernel_size=3, stride=stride, padding=1),
-            nn.BatchNorm2d(output_dim),
-        )
-
-    def forward(self, x):
-
-        return self.conv_block(x) + self.conv_skip(x)
-
-
-class Upsample(nn.Module):
-    def __init__(self, input_dim, output_dim, kernel, stride):
-        super(Upsample, self).__init__()
-
-        self.upsample = nn.ConvTranspose2d(
-            input_dim, output_dim, kernel_size=kernel, stride=stride
-        )
-
-    def forward(self, x):
-        return self.upsample(x)
-
-
-class Squeeze_Excite_Block(nn.Module):
-    def __init__(self, channel, reduction=16):
-        super(Squeeze_Excite_Block, self).__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2d(1)
-        self.fc = nn.Sequential(
-            nn.Linear(channel, channel // reduction, bias=False),
-            nn.ReLU(inplace=True),
-            nn.Linear(channel // reduction, channel, bias=False),
-            nn.Sigmoid(),
-        )
-
-    def forward(self, x):
-        b, c, _, _ = x.size()
-        y = self.avg_pool(x).view(b, c)
-        y = self.fc(y).view(b, c, 1, 1)
-        return x * y.expand_as(x)
-
-
-class ASPP(nn.Module):
-    def __init__(self, in_dims, out_dims, rate=[6, 12, 18]):
-        super(ASPP, self).__init__()
-
-        self.aspp_block1 = nn.Sequential(
-            nn.Conv2d(
-                in_dims, out_dims, 3, stride=1, padding=rate[0], dilation=rate[0]
-            ),
-            nn.ReLU(inplace=True),
-            nn.BatchNorm2d(out_dims),
-        )
-        self.aspp_block2 = nn.Sequential(
-            nn.Conv2d(
-                in_dims, out_dims, 3, stride=1, padding=rate[1], dilation=rate[1]
-            ),
-            nn.ReLU(inplace=True),
-            nn.BatchNorm2d(out_dims),
-        )
-        self.aspp_block3 = nn.Sequential(
-            nn.Conv2d(
-                in_dims, out_dims, 3, stride=1, padding=rate[2], dilation=rate[2]
-            ),
-            nn.ReLU(inplace=True),
-            nn.BatchNorm2d(out_dims),
-        )
-
-        self.output = nn.Conv2d(len(rate) * out_dims, out_dims, 1)
-        self._init_weights()
-
-    def forward(self, x):
-        x1 = self.aspp_block1(x)
-        x2 = self.aspp_block2(x)
-        x3 = self.aspp_block3(x)
-        out = torch.cat([x1, x2, x3], dim=1)
-        return self.output(out)
-
-    def _init_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight)
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-
-
-class Upsample_(nn.Module):
-    def __init__(self, scale=2):
-        super(Upsample_, self).__init__()
-
-        self.upsample = nn.Upsample(mode="bilinear", scale_factor=scale)
-
-    def forward(self, x):
-        return self.upsample(x)
-
-
-class AttentionBlock(nn.Module):
-    def __init__(self, input_encoder, input_decoder, output_dim):
-        super(AttentionBlock, self).__init__()
-
-        self.conv_encoder = nn.Sequential(
-            nn.BatchNorm2d(input_encoder),
-            nn.ReLU(),
-            nn.Conv2d(input_encoder, output_dim, 3, padding=1),
-            nn.MaxPool2d(2, 2),
-        )
-
-        self.conv_decoder = nn.Sequential(
-            nn.BatchNorm2d(input_decoder),
-            nn.ReLU(),
-            nn.Conv2d(input_decoder, output_dim, 3, padding=1),
-        )
-
-        self.conv_attn = nn.Sequential(
-            nn.BatchNorm2d(output_dim),
-            nn.ReLU(),
-            nn.Conv2d(output_dim, 1, 1),
-        )
-
-    def forward(self, x1, x2):
-        out = self.conv_encoder(x1) + self.conv_decoder(x2)
-        out = self.conv_attn(out)
-        return out * x2
\ No newline at end of file
diff --git a/src/src/audio2pose_models/res_unet.py b/src/src/audio2pose_models/res_unet.py
deleted file mode 100644
index f2611e1d1a9bf233507427b34928fca60e094224..0000000000000000000000000000000000000000
--- a/src/src/audio2pose_models/res_unet.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-from src.audio2pose_models.networks import ResidualConv, Upsample
-
-
-class ResUnet(nn.Module):
-    def __init__(self, channel=1, filters=[32, 64, 128, 256]):
-        super(ResUnet, self).__init__()
-
-        self.input_layer = nn.Sequential(
-            nn.Conv2d(channel, filters[0], kernel_size=3, padding=1),
-            nn.BatchNorm2d(filters[0]),
-            nn.ReLU(),
-            nn.Conv2d(filters[0], filters[0], kernel_size=3, padding=1),
-        )
-        self.input_skip = nn.Sequential(
-            nn.Conv2d(channel, filters[0], kernel_size=3, padding=1)
-        )
-
-        self.residual_conv_1 = ResidualConv(filters[0], filters[1], stride=(2,1), padding=1)
-        self.residual_conv_2 = ResidualConv(filters[1], filters[2], stride=(2,1), padding=1)
-
-        self.bridge = ResidualConv(filters[2], filters[3], stride=(2,1), padding=1)
-
-        self.upsample_1 = Upsample(filters[3], filters[3], kernel=(2,1), stride=(2,1))
-        self.up_residual_conv1 = ResidualConv(filters[3] + filters[2], filters[2], stride=1, padding=1)
-
-        self.upsample_2 = Upsample(filters[2], filters[2], kernel=(2,1), stride=(2,1))
-        self.up_residual_conv2 = ResidualConv(filters[2] + filters[1], filters[1], stride=1, padding=1)
-
-        self.upsample_3 = Upsample(filters[1], filters[1], kernel=(2,1), stride=(2,1))
-        self.up_residual_conv3 = ResidualConv(filters[1] + filters[0], filters[0], stride=1, padding=1)
-
-        self.output_layer = nn.Sequential(
-            nn.Conv2d(filters[0], 1, 1, 1),
-            nn.Sigmoid(),
-        )
-
-    def forward(self, x):
-        # Encode
-        x1 = self.input_layer(x) + self.input_skip(x)
-        x2 = self.residual_conv_1(x1)
-        x3 = self.residual_conv_2(x2)
-        # Bridge
-        x4 = self.bridge(x3)
-
-        # Decode
-        x4 = self.upsample_1(x4)
-        x5 = torch.cat([x4, x3], dim=1)
-
-        x6 = self.up_residual_conv1(x5)
-
-        x6 = self.upsample_2(x6)
-        x7 = torch.cat([x6, x2], dim=1)
-
-        x8 = self.up_residual_conv2(x7)
-
-        x8 = self.upsample_3(x8)
-        x9 = torch.cat([x8, x1], dim=1)
-
-        x10 = self.up_residual_conv3(x9)
-
-        output = self.output_layer(x10)
-
-        return output
\ No newline at end of file
diff --git a/src/src/config/auido2exp.yaml b/src/src/config/auido2exp.yaml
deleted file mode 100644
index 7369dbf350476e14a1d600507f1f8b7d8aa6ecd3..0000000000000000000000000000000000000000
--- a/src/src/config/auido2exp.yaml
+++ /dev/null
@@ -1,58 +0,0 @@
-DATASET:
-  TRAIN_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/file_list/train.txt
-  EVAL_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/file_list/val.txt
-  TRAIN_BATCH_SIZE: 32
-  EVAL_BATCH_SIZE: 32
-  EXP: True
-  EXP_DIM: 64
-  FRAME_LEN: 32
-  COEFF_LEN: 73
-  NUM_CLASSES: 46
-  AUDIO_ROOT_PATH: /apdcephfs_cq2/share_1290939/wenxuazhang/voxceleb1/wav
-  COEFF_ROOT_PATH: /apdcephfs_cq2/share_1290939/wenxuazhang/voxceleb1/wav2lip_3dmm
-  LMDB_PATH: /apdcephfs_cq2/share_1290939/shadowcun/datasets/VoxCeleb/v1/imdb
-  DEBUG: True
-  NUM_REPEATS: 2
-  T: 40
-  
-
-MODEL:
-  FRAMEWORK: V2
-  AUDIOENCODER:
-    LEAKY_RELU: True
-    NORM: 'IN'
-  DISCRIMINATOR:
-    LEAKY_RELU: False
-    INPUT_CHANNELS: 6
-  CVAE:
-    AUDIO_EMB_IN_SIZE: 512
-    AUDIO_EMB_OUT_SIZE: 128
-    SEQ_LEN: 32
-    LATENT_SIZE: 256
-    ENCODER_LAYER_SIZES: [192, 1024]
-    DECODER_LAYER_SIZES: [1024, 192]
-    
-
-TRAIN:
-  MAX_EPOCH: 300
-  GENERATOR:
-    LR: 2.0e-5
-  DISCRIMINATOR:
-    LR: 1.0e-5
-  LOSS:
-    W_FEAT: 0
-    W_COEFF_EXP: 2
-    W_LM: 1.0e-2
-    W_LM_MOUTH: 0
-    W_REG: 0
-    W_SYNC: 0
-    W_COLOR: 0
-    W_EXPRESSION: 0
-    W_LIPREADING: 0.01
-    W_LIPREADING_VV: 0
-    W_EYE_BLINK: 4
-
-TAG:
-  NAME:  small_dataset
-
-
diff --git a/src/src/config/auido2pose.yaml b/src/src/config/auido2pose.yaml
deleted file mode 100644
index bc61f94d12f406f2d8d02545e55b61075051484d..0000000000000000000000000000000000000000
--- a/src/src/config/auido2pose.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-DATASET:
-  TRAIN_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/audio2pose_unet_noAudio/dataset/train_33.txt
-  EVAL_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/audio2pose_unet_noAudio/dataset/val.txt
-  TRAIN_BATCH_SIZE: 64
-  EVAL_BATCH_SIZE: 1
-  EXP: True
-  EXP_DIM: 64
-  FRAME_LEN: 32
-  COEFF_LEN: 73
-  NUM_CLASSES: 46
-  AUDIO_ROOT_PATH: /apdcephfs_cq2/share_1290939/wenxuazhang/voxceleb1/wav
-  COEFF_ROOT_PATH: /apdcephfs_cq2/share_1290939/shadowcun/datasets/VoxCeleb/v1/imdb
-  DEBUG: True
-  
-
-MODEL:
-  AUDIOENCODER:
-    LEAKY_RELU: True
-    NORM: 'IN'
-  DISCRIMINATOR:
-    LEAKY_RELU: False
-    INPUT_CHANNELS: 6
-  CVAE:
-    AUDIO_EMB_IN_SIZE: 512
-    AUDIO_EMB_OUT_SIZE: 6
-    SEQ_LEN: 32
-    LATENT_SIZE: 64
-    ENCODER_LAYER_SIZES: [192, 128]
-    DECODER_LAYER_SIZES: [128, 192]
-    
-
-TRAIN:
-  MAX_EPOCH: 150
-  GENERATOR:
-    LR: 1.0e-4
-  DISCRIMINATOR:
-    LR: 1.0e-4
-  LOSS:
-    LAMBDA_REG: 1
-    LAMBDA_LANDMARKS: 0
-    LAMBDA_VERTICES: 0
-    LAMBDA_GAN_MOTION: 0.7
-    LAMBDA_GAN_COEFF: 0
-    LAMBDA_KL: 1
-
-TAG:
-  NAME: cvae_UNET_useAudio_usewav2lipAudioEncoder
-
-
diff --git a/src/src/config/facerender.yaml b/src/src/config/facerender.yaml
deleted file mode 100644
index 9494ef82dfa16b16b7aa0b848ebdd6b23e739e2a..0000000000000000000000000000000000000000
--- a/src/src/config/facerender.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-model_params:
-  common_params:
-    num_kp: 15 
-    image_channel: 3                    
-    feature_channel: 32
-    estimate_jacobian: False   # True
-  kp_detector_params:
-     temperature: 0.1
-     block_expansion: 32            
-     max_features: 1024
-     scale_factor: 0.25         # 0.25
-     num_blocks: 5
-     reshape_channel: 16384  # 16384 = 1024 * 16
-     reshape_depth: 16
-  he_estimator_params:
-     block_expansion: 64            
-     max_features: 2048
-     num_bins: 66
-  generator_params:
-    block_expansion: 64
-    max_features: 512
-    num_down_blocks: 2
-    reshape_channel: 32
-    reshape_depth: 16         # 512 = 32 * 16
-    num_resblocks: 6
-    estimate_occlusion_map: True
-    dense_motion_params:
-      block_expansion: 32
-      max_features: 1024
-      num_blocks: 5
-      reshape_depth: 16
-      compress: 4
-  discriminator_params:
-    scales: [1]
-    block_expansion: 32                 
-    max_features: 512
-    num_blocks: 4
-    sn: True
-  mapping_params:
-      coeff_nc: 70
-      descriptor_nc: 1024
-      layer: 3
-      num_kp: 15
-      num_bins: 66
-
diff --git a/src/src/config/facerender_still.yaml b/src/src/config/facerender_still.yaml
deleted file mode 100644
index 6b4d66dade3e655ac4cfc25a994ca28e53821d80..0000000000000000000000000000000000000000
--- a/src/src/config/facerender_still.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-model_params:
-  common_params:
-    num_kp: 15 
-    image_channel: 3                    
-    feature_channel: 32
-    estimate_jacobian: False   # True
-  kp_detector_params:
-     temperature: 0.1
-     block_expansion: 32            
-     max_features: 1024
-     scale_factor: 0.25         # 0.25
-     num_blocks: 5
-     reshape_channel: 16384  # 16384 = 1024 * 16
-     reshape_depth: 16
-  he_estimator_params:
-     block_expansion: 64            
-     max_features: 2048
-     num_bins: 66
-  generator_params:
-    block_expansion: 64
-    max_features: 512
-    num_down_blocks: 2
-    reshape_channel: 32
-    reshape_depth: 16         # 512 = 32 * 16
-    num_resblocks: 6
-    estimate_occlusion_map: True
-    dense_motion_params:
-      block_expansion: 32
-      max_features: 1024
-      num_blocks: 5
-      reshape_depth: 16
-      compress: 4
-  discriminator_params:
-    scales: [1]
-    block_expansion: 32                 
-    max_features: 512
-    num_blocks: 4
-    sn: True
-  mapping_params:
-      coeff_nc: 73
-      descriptor_nc: 1024
-      layer: 3
-      num_kp: 15
-      num_bins: 66
-
diff --git a/src/src/config/similarity_Lm3D_all.mat b/src/src/config/similarity_Lm3D_all.mat
deleted file mode 100644
index a0e23588302bc71fc899eef53ff06df5f4df4c1d..0000000000000000000000000000000000000000
Binary files a/src/src/config/similarity_Lm3D_all.mat and /dev/null differ
diff --git a/src/src/face3d/data/__init__.py b/src/src/face3d/data/__init__.py
deleted file mode 100644
index 9a9761c518a1b07c5996165869742af0a52c82bc..0000000000000000000000000000000000000000
--- a/src/src/face3d/data/__init__.py
+++ /dev/null
@@ -1,116 +0,0 @@
-"""This package includes all the modules related to data loading and preprocessing
-
- To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset.
- You need to implement four functions:
-    -- <__init__>:                      initialize the class, first call BaseDataset.__init__(self, opt).
-    -- <__len__>:                       return the size of dataset.
-    -- <__getitem__>:                   get a data point from data loader.
-    -- <modify_commandline_options>:    (optionally) add dataset-specific options and set default options.
-
-Now you can use the dataset class by specifying flag '--dataset_mode dummy'.
-See our template dataset class 'template_dataset.py' for more details.
-"""
-import numpy as np
-import importlib
-import torch.utils.data
-from face3d.data.base_dataset import BaseDataset
-
-
-def find_dataset_using_name(dataset_name):
-    """Import the module "data/[dataset_name]_dataset.py".
-
-    In the file, the class called DatasetNameDataset() will
-    be instantiated. It has to be a subclass of BaseDataset,
-    and it is case-insensitive.
-    """
-    dataset_filename = "data." + dataset_name + "_dataset"
-    datasetlib = importlib.import_module(dataset_filename)
-
-    dataset = None
-    target_dataset_name = dataset_name.replace('_', '') + 'dataset'
-    for name, cls in datasetlib.__dict__.items():
-        if name.lower() == target_dataset_name.lower() \
-           and issubclass(cls, BaseDataset):
-            dataset = cls
-
-    if dataset is None:
-        raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name))
-
-    return dataset
-
-
-def get_option_setter(dataset_name):
-    """Return the static method <modify_commandline_options> of the dataset class."""
-    dataset_class = find_dataset_using_name(dataset_name)
-    return dataset_class.modify_commandline_options
-
-
-def create_dataset(opt, rank=0):
-    """Create a dataset given the option.
-
-    This function wraps the class CustomDatasetDataLoader.
-        This is the main interface between this package and 'train.py'/'test.py'
-
-    Example:
-        >>> from data import create_dataset
-        >>> dataset = create_dataset(opt)
-    """
-    data_loader = CustomDatasetDataLoader(opt, rank=rank)
-    dataset = data_loader.load_data()
-    return dataset
-
-class CustomDatasetDataLoader():
-    """Wrapper class of Dataset class that performs multi-threaded data loading"""
-
-    def __init__(self, opt, rank=0):
-        """Initialize this class
-
-        Step 1: create a dataset instance given the name [dataset_mode]
-        Step 2: create a multi-threaded data loader.
-        """
-        self.opt = opt
-        dataset_class = find_dataset_using_name(opt.dataset_mode)
-        self.dataset = dataset_class(opt)
-        self.sampler = None
-        print("rank %d %s dataset [%s] was created" % (rank, self.dataset.name, type(self.dataset).__name__))
-        if opt.use_ddp and opt.isTrain:
-            world_size = opt.world_size
-            self.sampler = torch.utils.data.distributed.DistributedSampler(
-                    self.dataset,
-                    num_replicas=world_size,
-                    rank=rank,
-                    shuffle=not opt.serial_batches
-                )
-            self.dataloader = torch.utils.data.DataLoader(
-                        self.dataset,
-                        sampler=self.sampler,
-                        num_workers=int(opt.num_threads / world_size), 
-                        batch_size=int(opt.batch_size / world_size), 
-                        drop_last=True)
-        else:
-            self.dataloader = torch.utils.data.DataLoader(
-                self.dataset,
-                batch_size=opt.batch_size,
-                shuffle=(not opt.serial_batches) and opt.isTrain,
-                num_workers=int(opt.num_threads),
-                drop_last=True
-            )
-
-    def set_epoch(self, epoch):
-        self.dataset.current_epoch = epoch
-        if self.sampler is not None:
-            self.sampler.set_epoch(epoch)
-
-    def load_data(self):
-        return self
-
-    def __len__(self):
-        """Return the number of data in the dataset"""
-        return min(len(self.dataset), self.opt.max_dataset_size)
-
-    def __iter__(self):
-        """Return a batch of data"""
-        for i, data in enumerate(self.dataloader):
-            if i * self.opt.batch_size >= self.opt.max_dataset_size:
-                break
-            yield data
diff --git a/src/src/face3d/data/base_dataset.py b/src/src/face3d/data/base_dataset.py
deleted file mode 100644
index 1bd57d082d519f512d7114b4f867b6695fb7de06..0000000000000000000000000000000000000000
--- a/src/src/face3d/data/base_dataset.py
+++ /dev/null
@@ -1,125 +0,0 @@
-"""This module implements an abstract base class (ABC) 'BaseDataset' for datasets.
-
-It also includes common transformation functions (e.g., get_transform, __scale_width), which can be later used in subclasses.
-"""
-import random
-import numpy as np
-import torch.utils.data as data
-from PIL import Image
-import torchvision.transforms as transforms
-from abc import ABC, abstractmethod
-
-
-class BaseDataset(data.Dataset, ABC):
-    """This class is an abstract base class (ABC) for datasets.
-
-    To create a subclass, you need to implement the following four functions:
-    -- <__init__>:                      initialize the class, first call BaseDataset.__init__(self, opt).
-    -- <__len__>:                       return the size of dataset.
-    -- <__getitem__>:                   get a data point.
-    -- <modify_commandline_options>:    (optionally) add dataset-specific options and set default options.
-    """
-
-    def __init__(self, opt):
-        """Initialize the class; save the options in the class
-
-        Parameters:
-            opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions
-        """
-        self.opt = opt
-        # self.root = opt.dataroot
-        self.current_epoch = 0
-
-    @staticmethod
-    def modify_commandline_options(parser, is_train):
-        """Add new dataset-specific options, and rewrite default values for existing options.
-
-        Parameters:
-            parser          -- original option parser
-            is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options.
-
-        Returns:
-            the modified parser.
-        """
-        return parser
-
-    @abstractmethod
-    def __len__(self):
-        """Return the total number of images in the dataset."""
-        return 0
-
-    @abstractmethod
-    def __getitem__(self, index):
-        """Return a data point and its metadata information.
-
-        Parameters:
-            index - - a random integer for data indexing
-
-        Returns:
-            a dictionary of data with their names. It ususally contains the data itself and its metadata information.
-        """
-        pass
-
-
-def get_transform(grayscale=False):
-    transform_list = []
-    if grayscale:
-        transform_list.append(transforms.Grayscale(1))
-    transform_list += [transforms.ToTensor()]
-    return transforms.Compose(transform_list)
-
-def get_affine_mat(opt, size):
-    shift_x, shift_y, scale, rot_angle, flip = 0., 0., 1., 0., False
-    w, h = size
-
-    if 'shift' in opt.preprocess:
-        shift_pixs = int(opt.shift_pixs)
-        shift_x = random.randint(-shift_pixs, shift_pixs)
-        shift_y = random.randint(-shift_pixs, shift_pixs)
-    if 'scale' in opt.preprocess:
-        scale = 1 + opt.scale_delta * (2 * random.random() - 1)
-    if 'rot' in opt.preprocess:
-        rot_angle = opt.rot_angle * (2 * random.random() - 1)
-        rot_rad = -rot_angle * np.pi/180
-    if 'flip' in opt.preprocess:
-        flip = random.random() > 0.5
-
-    shift_to_origin = np.array([1, 0, -w//2, 0, 1, -h//2, 0, 0, 1]).reshape([3, 3])
-    flip_mat = np.array([-1 if flip else 1, 0, 0, 0, 1, 0, 0, 0, 1]).reshape([3, 3])
-    shift_mat = np.array([1, 0, shift_x, 0, 1, shift_y, 0, 0, 1]).reshape([3, 3])
-    rot_mat = np.array([np.cos(rot_rad), np.sin(rot_rad), 0, -np.sin(rot_rad), np.cos(rot_rad), 0, 0, 0, 1]).reshape([3, 3])
-    scale_mat = np.array([scale, 0, 0, 0, scale, 0, 0, 0, 1]).reshape([3, 3])
-    shift_to_center = np.array([1, 0, w//2, 0, 1, h//2, 0, 0, 1]).reshape([3, 3])
-    
-    affine = shift_to_center @ scale_mat @ rot_mat @ shift_mat @ flip_mat @ shift_to_origin    
-    affine_inv = np.linalg.inv(affine)
-    return affine, affine_inv, flip
-
-def apply_img_affine(img, affine_inv, method=Image.BICUBIC):
-    return img.transform(img.size, Image.AFFINE, data=affine_inv.flatten()[:6], resample=Image.BICUBIC)
-
-def apply_lm_affine(landmark, affine, flip, size):
-    _, h = size
-    lm = landmark.copy()
-    lm[:, 1] = h - 1 - lm[:, 1]
-    lm = np.concatenate((lm, np.ones([lm.shape[0], 1])), -1)
-    lm = lm @ np.transpose(affine)
-    lm[:, :2] = lm[:, :2] / lm[:, 2:]
-    lm = lm[:, :2]
-    lm[:, 1] = h - 1 - lm[:, 1]
-    if flip:
-        lm_ = lm.copy()
-        lm_[:17] = lm[16::-1]
-        lm_[17:22] = lm[26:21:-1]
-        lm_[22:27] = lm[21:16:-1]
-        lm_[31:36] = lm[35:30:-1]
-        lm_[36:40] = lm[45:41:-1]
-        lm_[40:42] = lm[47:45:-1]
-        lm_[42:46] = lm[39:35:-1]
-        lm_[46:48] = lm[41:39:-1]
-        lm_[48:55] = lm[54:47:-1]
-        lm_[55:60] = lm[59:54:-1]
-        lm_[60:65] = lm[64:59:-1]
-        lm_[65:68] = lm[67:64:-1]
-        lm = lm_
-    return lm
diff --git a/src/src/face3d/data/flist_dataset.py b/src/src/face3d/data/flist_dataset.py
deleted file mode 100644
index c0b6945c80aa756074a5d3c02b9443b15ddcfc57..0000000000000000000000000000000000000000
--- a/src/src/face3d/data/flist_dataset.py
+++ /dev/null
@@ -1,125 +0,0 @@
-"""This script defines the custom dataset for Deep3DFaceRecon_pytorch
-"""
-
-import os.path
-from data.base_dataset import BaseDataset, get_transform, get_affine_mat, apply_img_affine, apply_lm_affine
-from data.image_folder import make_dataset
-from PIL import Image
-import random
-import util.util as util
-import numpy as np
-import json
-import torch
-from scipy.io import loadmat, savemat
-import pickle
-from util.preprocess import align_img, estimate_norm
-from util.load_mats import load_lm3d
-
-
-def default_flist_reader(flist):
-    """
-    flist format: impath label\nimpath label\n ...(same to caffe's filelist)
-    """
-    imlist = []
-    with open(flist, 'r') as rf:
-        for line in rf.readlines():
-            impath = line.strip()
-            imlist.append(impath)
-
-    return imlist
-
-def jason_flist_reader(flist):
-    with open(flist, 'r') as fp:
-        info = json.load(fp)
-    return info
-
-def parse_label(label):
-    return torch.tensor(np.array(label).astype(np.float32))
-
-
-class FlistDataset(BaseDataset):
-    """
-    It requires one directories to host training images '/path/to/data/train'
-    You can train the model with the dataset flag '--dataroot /path/to/data'.
-    """
-
-    def __init__(self, opt):
-        """Initialize this dataset class.
-
-        Parameters:
-            opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
-        """
-        BaseDataset.__init__(self, opt)
-        
-        self.lm3d_std = load_lm3d(opt.bfm_folder)
-        
-        msk_names = default_flist_reader(opt.flist)
-        self.msk_paths = [os.path.join(opt.data_root, i) for i in msk_names]
-
-        self.size = len(self.msk_paths) 
-        self.opt = opt
-        
-        self.name = 'train' if opt.isTrain else 'val'
-        if '_' in opt.flist:
-            self.name += '_' + opt.flist.split(os.sep)[-1].split('_')[0]
-        
-
-    def __getitem__(self, index):
-        """Return a data point and its metadata information.
-
-        Parameters:
-            index (int)      -- a random integer for data indexing
-
-        Returns a dictionary that contains A, B, A_paths and B_paths
-            img (tensor)       -- an image in the input domain
-            msk (tensor)       -- its corresponding attention mask
-            lm  (tensor)       -- its corresponding 3d landmarks
-            im_paths (str)     -- image paths
-            aug_flag (bool)    -- a flag used to tell whether its raw or augmented
-        """
-        msk_path = self.msk_paths[index % self.size]  # make sure index is within then range
-        img_path = msk_path.replace('mask/', '')
-        lm_path = '.'.join(msk_path.replace('mask', 'landmarks').split('.')[:-1]) + '.txt'
-
-        raw_img = Image.open(img_path).convert('RGB')
-        raw_msk = Image.open(msk_path).convert('RGB')
-        raw_lm = np.loadtxt(lm_path).astype(np.float32)
-
-        _, img, lm, msk = align_img(raw_img, raw_lm, self.lm3d_std, raw_msk)
-        
-        aug_flag = self.opt.use_aug and self.opt.isTrain
-        if aug_flag:
-            img, lm, msk = self._augmentation(img, lm, self.opt, msk)
-        
-        _, H = img.size
-        M = estimate_norm(lm, H)
-        transform = get_transform()
-        img_tensor = transform(img)
-        msk_tensor = transform(msk)[:1, ...]
-        lm_tensor = parse_label(lm)
-        M_tensor = parse_label(M)
-
-
-        return {'imgs': img_tensor, 
-                'lms': lm_tensor, 
-                'msks': msk_tensor, 
-                'M': M_tensor,
-                'im_paths': img_path, 
-                'aug_flag': aug_flag,
-                'dataset': self.name}
-
-    def _augmentation(self, img, lm, opt, msk=None):
-        affine, affine_inv, flip = get_affine_mat(opt, img.size)
-        img = apply_img_affine(img, affine_inv)
-        lm = apply_lm_affine(lm, affine, flip, img.size)
-        if msk is not None:
-            msk = apply_img_affine(msk, affine_inv, method=Image.BILINEAR)
-        return img, lm, msk
-    
-
-
-
-    def __len__(self):
-        """Return the total number of images in the dataset.
-        """
-        return self.size
diff --git a/src/src/face3d/data/image_folder.py b/src/src/face3d/data/image_folder.py
deleted file mode 100644
index efadc2ecbe2fb4b53b78230aba25ec505eff0e55..0000000000000000000000000000000000000000
--- a/src/src/face3d/data/image_folder.py
+++ /dev/null
@@ -1,66 +0,0 @@
-"""A modified image folder class
-
-We modify the official PyTorch image folder (https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py)
-so that this class can load images from both current directory and its subdirectories.
-"""
-import numpy as np
-import torch.utils.data as data
-
-from PIL import Image
-import os
-import os.path
-
-IMG_EXTENSIONS = [
-    '.jpg', '.JPG', '.jpeg', '.JPEG',
-    '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
-    '.tif', '.TIF', '.tiff', '.TIFF',
-]
-
-
-def is_image_file(filename):
-    return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
-
-
-def make_dataset(dir, max_dataset_size=float("inf")):
-    images = []
-    assert os.path.isdir(dir) or os.path.islink(dir), '%s is not a valid directory' % dir
-
-    for root, _, fnames in sorted(os.walk(dir, followlinks=True)):
-        for fname in fnames:
-            if is_image_file(fname):
-                path = os.path.join(root, fname)
-                images.append(path)
-    return images[:min(max_dataset_size, len(images))]
-
-
-def default_loader(path):
-    return Image.open(path).convert('RGB')
-
-
-class ImageFolder(data.Dataset):
-
-    def __init__(self, root, transform=None, return_paths=False,
-                 loader=default_loader):
-        imgs = make_dataset(root)
-        if len(imgs) == 0:
-            raise(RuntimeError("Found 0 images in: " + root + "\n"
-                               "Supported image extensions are: " + ",".join(IMG_EXTENSIONS)))
-
-        self.root = root
-        self.imgs = imgs
-        self.transform = transform
-        self.return_paths = return_paths
-        self.loader = loader
-
-    def __getitem__(self, index):
-        path = self.imgs[index]
-        img = self.loader(path)
-        if self.transform is not None:
-            img = self.transform(img)
-        if self.return_paths:
-            return img, path
-        else:
-            return img
-
-    def __len__(self):
-        return len(self.imgs)
diff --git a/src/src/face3d/data/template_dataset.py b/src/src/face3d/data/template_dataset.py
deleted file mode 100644
index bfdf16be2a8a834b204c45d88c86857b37b9bd25..0000000000000000000000000000000000000000
--- a/src/src/face3d/data/template_dataset.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""Dataset class template
-
-This module provides a template for users to implement custom datasets.
-You can specify '--dataset_mode template' to use this dataset.
-The class name should be consistent with both the filename and its dataset_mode option.
-The filename should be <dataset_mode>_dataset.py
-The class name should be <Dataset_mode>Dataset.py
-You need to implement the following functions:
-    -- <modify_commandline_options>:　Add dataset-specific options and rewrite default values for existing options.
-    -- <__init__>: Initialize this dataset class.
-    -- <__getitem__>: Return a data point and its metadata information.
-    -- <__len__>: Return the number of images.
-"""
-from data.base_dataset import BaseDataset, get_transform
-# from data.image_folder import make_dataset
-# from PIL import Image
-
-
-class TemplateDataset(BaseDataset):
-    """A template dataset class for you to implement custom datasets."""
-    @staticmethod
-    def modify_commandline_options(parser, is_train):
-        """Add new dataset-specific options, and rewrite default values for existing options.
-
-        Parameters:
-            parser          -- original option parser
-            is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options.
-
-        Returns:
-            the modified parser.
-        """
-        parser.add_argument('--new_dataset_option', type=float, default=1.0, help='new dataset option')
-        parser.set_defaults(max_dataset_size=10, new_dataset_option=2.0)  # specify dataset-specific default values
-        return parser
-
-    def __init__(self, opt):
-        """Initialize this dataset class.
-
-        Parameters:
-            opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
-
-        A few things can be done here.
-        - save the options (have been done in BaseDataset)
-        - get image paths and meta information of the dataset.
-        - define the image transformation.
-        """
-        # save the option and dataset root
-        BaseDataset.__init__(self, opt)
-        # get the image paths of your dataset;
-        self.image_paths = []  # You can call sorted(make_dataset(self.root, opt.max_dataset_size)) to get all the image paths under the directory self.root
-        # define the default transform function. You can use <base_dataset.get_transform>; You can also define your custom transform function
-        self.transform = get_transform(opt)
-
-    def __getitem__(self, index):
-        """Return a data point and its metadata information.
-
-        Parameters:
-            index -- a random integer for data indexing
-
-        Returns:
-            a dictionary of data with their names. It usually contains the data itself and its metadata information.
-
-        Step 1: get a random image path: e.g., path = self.image_paths[index]
-        Step 2: load your data from the disk: e.g., image = Image.open(path).convert('RGB').
-        Step 3: convert your data to a PyTorch tensor. You can use helpder functions such as self.transform. e.g., data = self.transform(image)
-        Step 4: return a data point as a dictionary.
-        """
-        path = 'temp'    # needs to be a string
-        data_A = None    # needs to be a tensor
-        data_B = None    # needs to be a tensor
-        return {'data_A': data_A, 'data_B': data_B, 'path': path}
-
-    def __len__(self):
-        """Return the total number of images."""
-        return len(self.image_paths)
diff --git a/src/src/face3d/extract_kp_videos.py b/src/src/face3d/extract_kp_videos.py
deleted file mode 100644
index 21616a3b4b5077ffdce99621395237b4edcff58c..0000000000000000000000000000000000000000
--- a/src/src/face3d/extract_kp_videos.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-import cv2
-import time
-import glob
-import argparse
-import face_alignment
-import numpy as np
-from PIL import Image
-from tqdm import tqdm
-from itertools import cycle
-
-from torch.multiprocessing import Pool, Process, set_start_method
-
-class KeypointExtractor():
-    def __init__(self, device):
-        self.detector = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, 
-                                                     device=device)   
-
-    def extract_keypoint(self, images, name=None, info=True):
-        if isinstance(images, list):
-            keypoints = []
-            if info:
-                i_range = tqdm(images,desc='landmark Det:')
-            else:
-                i_range = images
-
-            for image in i_range:
-                current_kp = self.extract_keypoint(image)
-                if np.mean(current_kp) == -1 and keypoints:
-                    keypoints.append(keypoints[-1])
-                else:
-                    keypoints.append(current_kp[None])
-
-            keypoints = np.concatenate(keypoints, 0)
-            np.savetxt(os.path.splitext(name)[0]+'.txt', keypoints.reshape(-1))
-            return keypoints
-        else:
-            while True:
-                try:
-                    keypoints = self.detector.get_landmarks_from_image(np.array(images))[0]
-                    break
-                except RuntimeError as e:
-                    if str(e).startswith('CUDA'):
-                        print("Warning: out of memory, sleep for 1s")
-                        time.sleep(1)
-                    else:
-                        print(e)
-                        break    
-                except TypeError:
-                    print('No face detected in this image')
-                    shape = [68, 2]
-                    keypoints = -1. * np.ones(shape)                    
-                    break
-            if name is not None:
-                np.savetxt(os.path.splitext(name)[0]+'.txt', keypoints.reshape(-1))
-            return keypoints
-
-def read_video(filename):
-    frames = []
-    cap = cv2.VideoCapture(filename)
-    while cap.isOpened():
-        ret, frame = cap.read()
-        if ret:
-            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            frame = Image.fromarray(frame)
-            frames.append(frame)
-        else:
-            break
-    cap.release()
-    return frames
-
-def run(data):
-    filename, opt, device = data
-    os.environ['CUDA_VISIBLE_DEVICES'] = device
-    kp_extractor = KeypointExtractor()
-    images = read_video(filename)
-    name = filename.split('/')[-2:]
-    os.makedirs(os.path.join(opt.output_dir, name[-2]), exist_ok=True)
-    kp_extractor.extract_keypoint(
-        images, 
-        name=os.path.join(opt.output_dir, name[-2], name[-1])
-    )
-
-if __name__ == '__main__':
-    set_start_method('spawn')
-    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input_dir', type=str, help='the folder of the input files')
-    parser.add_argument('--output_dir', type=str, help='the folder of the output files')
-    parser.add_argument('--device_ids', type=str, default='0,1')
-    parser.add_argument('--workers', type=int, default=4)
-
-    opt = parser.parse_args()
-    filenames = list()
-    VIDEO_EXTENSIONS_LOWERCASE = {'mp4'}
-    VIDEO_EXTENSIONS = VIDEO_EXTENSIONS_LOWERCASE.union({f.upper() for f in VIDEO_EXTENSIONS_LOWERCASE})
-    extensions = VIDEO_EXTENSIONS
-    
-    for ext in extensions:
-        os.listdir(f'{opt.input_dir}')
-        print(f'{opt.input_dir}/*.{ext}')
-        filenames = sorted(glob.glob(f'{opt.input_dir}/*.{ext}'))
-    print('Total number of videos:', len(filenames))
-    pool = Pool(opt.workers)
-    args_list = cycle([opt])
-    device_ids = opt.device_ids.split(",")
-    device_ids = cycle(device_ids)
-    for data in tqdm(pool.imap_unordered(run, zip(filenames, args_list, device_ids))):
-        None
diff --git a/src/src/face3d/extract_kp_videos_safe.py b/src/src/face3d/extract_kp_videos_safe.py
deleted file mode 100644
index ba3830b84bee98e02a7d0681803cc4b1719787c2..0000000000000000000000000000000000000000
--- a/src/src/face3d/extract_kp_videos_safe.py
+++ /dev/null
@@ -1,151 +0,0 @@
-import os
-import cv2
-import time
-import glob
-import argparse
-import numpy as np
-from PIL import Image
-import torch
-from tqdm import tqdm
-from itertools import cycle
-from torch.multiprocessing import Pool, Process, set_start_method
-
-from facexlib.alignment import landmark_98_to_68
-from facexlib.detection import init_detection_model
-
-from facexlib.utils import load_file_from_url
-from facexlib.alignment.awing_arch import FAN
-
-def init_alignment_model(model_name, half=False, device='cuda', model_rootpath=None):
-    if model_name == 'awing_fan':
-        model = FAN(num_modules=4, num_landmarks=98, device=device)
-        model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/alignment_WFLW_4HG.pth'
-    else:
-        raise NotImplementedError(f'{model_name} is not implemented.')
-
-    model_path = load_file_from_url(
-        url=model_url, model_dir='facexlib/weights', progress=True, file_name=None, save_dir=model_rootpath)
-    model.load_state_dict(torch.load(model_path, map_location=device)['state_dict'], strict=True)
-    model.eval()
-    model = model.to(device)
-    return model
-
-
-class KeypointExtractor():
-    def __init__(self, device='cuda'):
-
-        ### gfpgan/weights
-        try:
-            import webui  # in webui
-            root_path = 'extensions/SadTalker/gfpgan/weights' 
-
-        except:
-            root_path = 'gfpgan/weights'
-
-        self.detector = init_alignment_model('awing_fan',device=device, model_rootpath=root_path)   
-        self.det_net = init_detection_model('retinaface_resnet50', half=False,device=device, model_rootpath=root_path)
-
-    def extract_keypoint(self, images, name=None, info=True):
-        if isinstance(images, list):
-            keypoints = []
-            if info:
-                i_range = tqdm(images,desc='landmark Det:')
-            else:
-                i_range = images
-
-            for image in i_range:
-                current_kp = self.extract_keypoint(image)
-                # current_kp = self.detector.get_landmarks(np.array(image))
-                if np.mean(current_kp) == -1 and keypoints:
-                    keypoints.append(keypoints[-1])
-                else:
-                    keypoints.append(current_kp[None])
-
-            keypoints = np.concatenate(keypoints, 0)
-            np.savetxt(os.path.splitext(name)[0]+'.txt', keypoints.reshape(-1))
-            return keypoints
-        else:
-            while True:
-                try:
-                    with torch.no_grad():
-                        # face detection -> face alignment.
-                        img = np.array(images)
-                        bboxes = self.det_net.detect_faces(images, 0.97)
-                        
-                        bboxes = bboxes[0]
-                        img = img[int(bboxes[1]):int(bboxes[3]), int(bboxes[0]):int(bboxes[2]), :]
-
-                        keypoints = landmark_98_to_68(self.detector.get_landmarks(img)) # [0]
-
-                        #### keypoints to the original location
-                        keypoints[:,0] += int(bboxes[0])
-                        keypoints[:,1] += int(bboxes[1])
-
-                        break
-                except RuntimeError as e:
-                    if str(e).startswith('CUDA'):
-                        print("Warning: out of memory, sleep for 1s")
-                        time.sleep(1)
-                    else:
-                        print(e)
-                        break    
-                except TypeError:
-                    print('No face detected in this image')
-                    shape = [68, 2]
-                    keypoints = -1. * np.ones(shape)                    
-                    break
-            if name is not None:
-                np.savetxt(os.path.splitext(name)[0]+'.txt', keypoints.reshape(-1))
-            return keypoints
-
-def read_video(filename):
-    frames = []
-    cap = cv2.VideoCapture(filename)
-    while cap.isOpened():
-        ret, frame = cap.read()
-        if ret:
-            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            frame = Image.fromarray(frame)
-            frames.append(frame)
-        else:
-            break
-    cap.release()
-    return frames
-
-def run(data):
-    filename, opt, device = data
-    os.environ['CUDA_VISIBLE_DEVICES'] = device
-    kp_extractor = KeypointExtractor()
-    images = read_video(filename)
-    name = filename.split('/')[-2:]
-    os.makedirs(os.path.join(opt.output_dir, name[-2]), exist_ok=True)
-    kp_extractor.extract_keypoint(
-        images, 
-        name=os.path.join(opt.output_dir, name[-2], name[-1])
-    )
-
-if __name__ == '__main__':
-    set_start_method('spawn')
-    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input_dir', type=str, help='the folder of the input files')
-    parser.add_argument('--output_dir', type=str, help='the folder of the output files')
-    parser.add_argument('--device_ids', type=str, default='0,1')
-    parser.add_argument('--workers', type=int, default=4)
-
-    opt = parser.parse_args()
-    filenames = list()
-    VIDEO_EXTENSIONS_LOWERCASE = {'mp4'}
-    VIDEO_EXTENSIONS = VIDEO_EXTENSIONS_LOWERCASE.union({f.upper() for f in VIDEO_EXTENSIONS_LOWERCASE})
-    extensions = VIDEO_EXTENSIONS
-    
-    for ext in extensions:
-        os.listdir(f'{opt.input_dir}')
-        print(f'{opt.input_dir}/*.{ext}')
-        filenames = sorted(glob.glob(f'{opt.input_dir}/*.{ext}'))
-    print('Total number of videos:', len(filenames))
-    pool = Pool(opt.workers)
-    args_list = cycle([opt])
-    device_ids = opt.device_ids.split(",")
-    device_ids = cycle(device_ids)
-    for data in tqdm(pool.imap_unordered(run, zip(filenames, args_list, device_ids))):
-        None
diff --git a/src/src/face3d/models/__init__.py b/src/src/face3d/models/__init__.py
deleted file mode 100644
index 5a7986c7ad2ec48f404adf81fea5aa06aaf1eeb4..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/__init__.py
+++ /dev/null
@@ -1,67 +0,0 @@
-"""This package contains modules related to objective functions, optimizations, and network architectures.
-
-To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel.
-You need to implement the following five functions:
-    -- <__init__>:                      initialize the class; first call BaseModel.__init__(self, opt).
-    -- <set_input>:                     unpack data from dataset and apply preprocessing.
-    -- <forward>:                       produce intermediate results.
-    -- <optimize_parameters>:           calculate loss, gradients, and update network weights.
-    -- <modify_commandline_options>:    (optionally) add model-specific options and set default options.
-
-In the function <__init__>, you need to define four lists:
-    -- self.loss_names (str list):          specify the training losses that you want to plot and save.
-    -- self.model_names (str list):         define networks used in our training.
-    -- self.visual_names (str list):        specify the images that you want to display and save.
-    -- self.optimizers (optimizer list):    define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage.
-
-Now you can use the model class by specifying flag '--model dummy'.
-See our template model class 'template_model.py' for more details.
-"""
-
-import importlib
-from src.face3d.models.base_model import BaseModel
-
-
-def find_model_using_name(model_name):
-    """Import the module "models/[model_name]_model.py".
-
-    In the file, the class called DatasetNameModel() will
-    be instantiated. It has to be a subclass of BaseModel,
-    and it is case-insensitive.
-    """
-    model_filename = "face3d.models." + model_name + "_model"
-    modellib = importlib.import_module(model_filename)
-    model = None
-    target_model_name = model_name.replace('_', '') + 'model'
-    for name, cls in modellib.__dict__.items():
-        if name.lower() == target_model_name.lower() \
-           and issubclass(cls, BaseModel):
-            model = cls
-
-    if model is None:
-        print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name))
-        exit(0)
-
-    return model
-
-
-def get_option_setter(model_name):
-    """Return the static method <modify_commandline_options> of the model class."""
-    model_class = find_model_using_name(model_name)
-    return model_class.modify_commandline_options
-
-
-def create_model(opt):
-    """Create a model given the option.
-
-    This function warps the class CustomDatasetDataLoader.
-    This is the main interface between this package and 'train.py'/'test.py'
-
-    Example:
-        >>> from models import create_model
-        >>> model = create_model(opt)
-    """
-    model = find_model_using_name(opt.model)
-    instance = model(opt)
-    print("model [%s] was created" % type(instance).__name__)
-    return instance
diff --git a/src/src/face3d/models/arcface_torch/README.md b/src/src/face3d/models/arcface_torch/README.md
deleted file mode 100644
index 2ee63a861229b68873561fa39bfa7c9a8b53b947..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/README.md
+++ /dev/null
@@ -1,164 +0,0 @@
-# Distributed Arcface Training in Pytorch
-
-This is a deep learning library that makes face recognition efficient, and effective, which can train tens of millions
-identity on a single server.
-
-## Requirements
-
-- Install [pytorch](http://pytorch.org) (torch>=1.6.0), our doc for [install.md](docs/install.md).
-- `pip install -r requirements.txt`.
-- Download the dataset
-  from [https://github.com/deepinsight/insightface/tree/master/recognition/_datasets_](https://github.com/deepinsight/insightface/tree/master/recognition/_datasets_)
-  .
-
-## How to Training
-
-To train a model, run `train.py` with the path to the configs:
-
-### 1. Single node, 8 GPUs:
-
-```shell
-python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/ms1mv3_r50
-```
-
-### 2. Multiple nodes, each node 8 GPUs:
-
-Node 0:
-
-```shell
-python -m torch.distributed.launch --nproc_per_node=8 --nnodes=2 --node_rank=0 --master_addr="ip1" --master_port=1234 train.py train.py configs/ms1mv3_r50
-```
-
-Node 1:
-
-```shell
-python -m torch.distributed.launch --nproc_per_node=8 --nnodes=2 --node_rank=1 --master_addr="ip1" --master_port=1234 train.py train.py configs/ms1mv3_r50
-```
-
-### 3.Training resnet2060 with 8 GPUs:
-
-```shell
-python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/ms1mv3_r2060.py
-```
-
-## Model Zoo
-
-- The models are available for non-commercial research purposes only.  
-- All models can be found in here.  
-- [Baidu Yun Pan](https://pan.baidu.com/s/1CL-l4zWqsI1oDuEEYVhj-g):   e8pw  
-- [onedrive](https://1drv.ms/u/s!AswpsDO2toNKq0lWY69vN58GR6mw?e=p9Ov5d)
-
-### Performance on [**ICCV2021-MFR**](http://iccv21-mfr.com/)
-
-ICCV2021-MFR testset consists of non-celebrities so we can ensure that it has very few overlap with public available face 
-recognition training set, such as MS1M and CASIA as they mostly collected from online celebrities. 
-As the result, we can evaluate the FAIR performance for different algorithms.  
-
-For **ICCV2021-MFR-ALL** set, TAR is measured on all-to-all 1:1 protocal, with FAR less than 0.000001(e-6). The 
-globalised multi-racial testset contains 242,143 identities and 1,624,305 images. 
-
-For **ICCV2021-MFR-MASK** set, TAR is measured on mask-to-nonmask 1:1 protocal, with FAR less than 0.0001(e-4). 
-Mask testset contains 6,964 identities, 6,964 masked images and 13,928 non-masked images. 
-There are totally 13,928 positive pairs and 96,983,824 negative pairs.
-
-| Datasets | backbone  | Training throughout | Size / MB  | **ICCV2021-MFR-MASK** | **ICCV2021-MFR-ALL** |
-| :---:    | :---      | :---                | :---       |:---                   |:---                  |     
-| MS1MV3    | r18  | -              | 91   | **47.85** | **68.33** |
-| Glint360k | r18  | 8536           | 91   | **53.32** | **72.07** |
-| MS1MV3    | r34  | -              | 130  | **58.72** | **77.36** |
-| Glint360k | r34  | 6344           | 130  | **65.10** | **83.02** |
-| MS1MV3    | r50  | 5500           | 166  | **63.85** | **80.53** |
-| Glint360k | r50  | 5136           | 166  | **70.23** | **87.08** |
-| MS1MV3    | r100 | -              | 248  | **69.09** | **84.31** |
-| Glint360k | r100 | 3332           | 248  | **75.57** | **90.66** |
-| MS1MV3    | mobilefacenet | 12185 | 7.8  | **41.52** | **65.26** |        
-| Glint360k | mobilefacenet | 11197 | 7.8  | **44.52** | **66.48** |  
-
-### Performance on IJB-C and Verification Datasets
-
-|   Datasets | backbone      | IJBC(1e-05) | IJBC(1e-04) | agedb30 | cfp_fp | lfw  |  log    |
-| :---:      |    :---       | :---          | :---  | :---  |:---   |:---    |:---     |  
-| MS1MV3     | r18      | 92.07 | 94.66 | 97.77 | 97.73 | 99.77 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_arcface_r18_fp16/training.log)|         
-| MS1MV3     | r34      | 94.10 | 95.90 | 98.10 | 98.67 | 99.80 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_arcface_r34_fp16/training.log)|        
-| MS1MV3     | r50      | 94.79 | 96.46 | 98.35 | 98.96 | 99.83 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_arcface_r50_fp16/training.log)|         
-| MS1MV3     | r100     | 95.31 | 96.81 | 98.48 | 99.06 | 99.85 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_arcface_r100_fp16/training.log)|        
-| MS1MV3     | **r2060**| 95.34 | 97.11 | 98.67 | 99.24 | 99.87 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_arcface_r2060_fp16/training.log)|
-| Glint360k  |r18-0.1   | 93.16 | 95.33 | 97.72 | 97.73 | 99.77 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_cosface_r18_fp16_0.1/training.log)| 
-| Glint360k  |r34-0.1   | 95.16 | 96.56 | 98.33 | 98.78 | 99.82 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_cosface_r34_fp16_0.1/training.log)| 
-| Glint360k  |r50-0.1   | 95.61 | 96.97 | 98.38 | 99.20 | 99.83 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_cosface_r50_fp16_0.1/training.log)| 
-| Glint360k  |r100-0.1  | 95.88 | 97.32 | 98.48 | 99.29 | 99.82 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_cosface_r100_fp16_0.1/training.log)|
-
-[comment]: <> (More details see [model.md]&#40;docs/modelzoo.md&#41; in docs.)
-
-
-## [Speed Benchmark](docs/speed_benchmark.md)
-
-**Arcface Torch** can train large-scale face recognition training set efficiently and quickly. When the number of
-classes in training sets is greater than 300K and the training is sufficient, partial fc sampling strategy will get same
-accuracy with several times faster training performance and smaller GPU memory. 
-Partial FC is a sparse variant of the model parallel architecture for large sacle  face recognition. Partial FC use a 
-sparse softmax, where each batch dynamicly sample a subset of class centers for training. In each iteration, only a 
-sparse part of the parameters will be updated, which can reduce a lot of GPU memory and calculations. With Partial FC, 
-we can scale trainset of 29 millions identities, the largest to date. Partial FC also supports multi-machine distributed 
-training and mixed precision training.
-
-![Image text](https://github.com/anxiangsir/insightface_arcface_log/blob/master/partial_fc_v2.png)
-
-More details see 
-[speed_benchmark.md](docs/speed_benchmark.md) in docs.
-
-### 1. Training speed of different parallel methods (samples / second), Tesla V100 32GB * 8. (Larger is better)
-
-`-` means training failed because of gpu memory limitations.
-
-| Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 |
-| :---    | :--- | :--- | :--- |
-|125000   | 4681         | 4824          | 5004     |
-|1400000  | **1672**     | 3043          | 4738     |
-|5500000  | **-**        | **1389**      | 3975     |
-|8000000  | **-**        | **-**         | 3565     |
-|16000000 | **-**        | **-**         | 2679     |
-|29000000 | **-**        | **-**         | **1855** |
-
-### 2. GPU memory cost of different parallel methods (MB per GPU), Tesla V100 32GB * 8. (Smaller is better)
-
-| Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 |
-| :---    | :---      | :---      | :---  |
-|125000   | 7358      | 5306      | 4868  |
-|1400000  | 32252     | 11178     | 6056  |
-|5500000  | **-**     | 32188     | 9854  |
-|8000000  | **-**     | **-**     | 12310 |
-|16000000 | **-**     | **-**     | 19950 |
-|29000000 | **-**     | **-**     | 32324 |
-
-## Evaluation ICCV2021-MFR and IJB-C
-
-More details see [eval.md](docs/eval.md) in docs.
-
-## Test
-
-We tested many versions of PyTorch. Please create an issue if you are having trouble.  
-
-- [x] torch 1.6.0
-- [x] torch 1.7.1
-- [x] torch 1.8.0
-- [x] torch 1.9.0
-
-## Citation
-
-```
-@inproceedings{deng2019arcface,
-  title={Arcface: Additive angular margin loss for deep face recognition},
-  author={Deng, Jiankang and Guo, Jia and Xue, Niannan and Zafeiriou, Stefanos},
-  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
-  pages={4690--4699},
-  year={2019}
-}
-@inproceedings{an2020partical_fc,
-  title={Partial FC: Training 10 Million Identities on a Single Machine},
-  author={An, Xiang and Zhu, Xuhan and Xiao, Yang and Wu, Lan and Zhang, Ming and Gao, Yuan and Qin, Bin and
-  Zhang, Debing and Fu Ying},
-  booktitle={Arxiv 2010.05222},
-  year={2020}
-}
-```
diff --git a/src/src/face3d/models/arcface_torch/backbones/__init__.py b/src/src/face3d/models/arcface_torch/backbones/__init__.py
deleted file mode 100644
index 55bd4c5d1889a1a998b52eb56793bbc1eef1b691..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/backbones/__init__.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from .iresnet import iresnet18, iresnet34, iresnet50, iresnet100, iresnet200
-from .mobilefacenet import get_mbf
-
-
-def get_model(name, **kwargs):
-    # resnet
-    if name == "r18":
-        return iresnet18(False, **kwargs)
-    elif name == "r34":
-        return iresnet34(False, **kwargs)
-    elif name == "r50":
-        return iresnet50(False, **kwargs)
-    elif name == "r100":
-        return iresnet100(False, **kwargs)
-    elif name == "r200":
-        return iresnet200(False, **kwargs)
-    elif name == "r2060":
-        from .iresnet2060 import iresnet2060
-        return iresnet2060(False, **kwargs)
-    elif name == "mbf":
-        fp16 = kwargs.get("fp16", False)
-        num_features = kwargs.get("num_features", 512)
-        return get_mbf(fp16=fp16, num_features=num_features)
-    else:
-        raise ValueError()
\ No newline at end of file
diff --git a/src/src/face3d/models/arcface_torch/backbones/iresnet.py b/src/src/face3d/models/arcface_torch/backbones/iresnet.py
deleted file mode 100644
index c6d3b9c240c24687d432197f976ee01fbf423216..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/backbones/iresnet.py
+++ /dev/null
@@ -1,187 +0,0 @@
-import torch
-from torch import nn
-
-__all__ = ['iresnet18', 'iresnet34', 'iresnet50', 'iresnet100', 'iresnet200']
-
-
-def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
-    """3x3 convolution with padding"""
-    return nn.Conv2d(in_planes,
-                     out_planes,
-                     kernel_size=3,
-                     stride=stride,
-                     padding=dilation,
-                     groups=groups,
-                     bias=False,
-                     dilation=dilation)
-
-
-def conv1x1(in_planes, out_planes, stride=1):
-    """1x1 convolution"""
-    return nn.Conv2d(in_planes,
-                     out_planes,
-                     kernel_size=1,
-                     stride=stride,
-                     bias=False)
-
-
-class IBasicBlock(nn.Module):
-    expansion = 1
-    def __init__(self, inplanes, planes, stride=1, downsample=None,
-                 groups=1, base_width=64, dilation=1):
-        super(IBasicBlock, self).__init__()
-        if groups != 1 or base_width != 64:
-            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
-        if dilation > 1:
-            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
-        self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05,)
-        self.conv1 = conv3x3(inplanes, planes)
-        self.bn2 = nn.BatchNorm2d(planes, eps=1e-05,)
-        self.prelu = nn.PReLU(planes)
-        self.conv2 = conv3x3(planes, planes, stride)
-        self.bn3 = nn.BatchNorm2d(planes, eps=1e-05,)
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        identity = x
-        out = self.bn1(x)
-        out = self.conv1(out)
-        out = self.bn2(out)
-        out = self.prelu(out)
-        out = self.conv2(out)
-        out = self.bn3(out)
-        if self.downsample is not None:
-            identity = self.downsample(x)
-        out += identity
-        return out
-
-
-class IResNet(nn.Module):
-    fc_scale = 7 * 7
-    def __init__(self,
-                 block, layers, dropout=0, num_features=512, zero_init_residual=False,
-                 groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False):
-        super(IResNet, self).__init__()
-        self.fp16 = fp16
-        self.inplanes = 64
-        self.dilation = 1
-        if replace_stride_with_dilation is None:
-            replace_stride_with_dilation = [False, False, False]
-        if len(replace_stride_with_dilation) != 3:
-            raise ValueError("replace_stride_with_dilation should be None "
-                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
-        self.groups = groups
-        self.base_width = width_per_group
-        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
-        self.prelu = nn.PReLU(self.inplanes)
-        self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
-        self.layer2 = self._make_layer(block,
-                                       128,
-                                       layers[1],
-                                       stride=2,
-                                       dilate=replace_stride_with_dilation[0])
-        self.layer3 = self._make_layer(block,
-                                       256,
-                                       layers[2],
-                                       stride=2,
-                                       dilate=replace_stride_with_dilation[1])
-        self.layer4 = self._make_layer(block,
-                                       512,
-                                       layers[3],
-                                       stride=2,
-                                       dilate=replace_stride_with_dilation[2])
-        self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05,)
-        self.dropout = nn.Dropout(p=dropout, inplace=True)
-        self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
-        self.features = nn.BatchNorm1d(num_features, eps=1e-05)
-        nn.init.constant_(self.features.weight, 1.0)
-        self.features.weight.requires_grad = False
-
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.normal_(m.weight, 0, 0.1)
-            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
-                nn.init.constant_(m.weight, 1)
-                nn.init.constant_(m.bias, 0)
-
-        if zero_init_residual:
-            for m in self.modules():
-                if isinstance(m, IBasicBlock):
-                    nn.init.constant_(m.bn2.weight, 0)
-
-    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
-        downsample = None
-        previous_dilation = self.dilation
-        if dilate:
-            self.dilation *= stride
-            stride = 1
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                conv1x1(self.inplanes, planes * block.expansion, stride),
-                nn.BatchNorm2d(planes * block.expansion, eps=1e-05, ),
-            )
-        layers = []
-        layers.append(
-            block(self.inplanes, planes, stride, downsample, self.groups,
-                  self.base_width, previous_dilation))
-        self.inplanes = planes * block.expansion
-        for _ in range(1, blocks):
-            layers.append(
-                block(self.inplanes,
-                      planes,
-                      groups=self.groups,
-                      base_width=self.base_width,
-                      dilation=self.dilation))
-
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        with torch.cuda.amp.autocast(self.fp16):
-            x = self.conv1(x)
-            x = self.bn1(x)
-            x = self.prelu(x)
-            x = self.layer1(x)
-            x = self.layer2(x)
-            x = self.layer3(x)
-            x = self.layer4(x)
-            x = self.bn2(x)
-            x = torch.flatten(x, 1)
-            x = self.dropout(x)
-        x = self.fc(x.float() if self.fp16 else x)
-        x = self.features(x)
-        return x
-
-
-def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
-    model = IResNet(block, layers, **kwargs)
-    if pretrained:
-        raise ValueError()
-    return model
-
-
-def iresnet18(pretrained=False, progress=True, **kwargs):
-    return _iresnet('iresnet18', IBasicBlock, [2, 2, 2, 2], pretrained,
-                    progress, **kwargs)
-
-
-def iresnet34(pretrained=False, progress=True, **kwargs):
-    return _iresnet('iresnet34', IBasicBlock, [3, 4, 6, 3], pretrained,
-                    progress, **kwargs)
-
-
-def iresnet50(pretrained=False, progress=True, **kwargs):
-    return _iresnet('iresnet50', IBasicBlock, [3, 4, 14, 3], pretrained,
-                    progress, **kwargs)
-
-
-def iresnet100(pretrained=False, progress=True, **kwargs):
-    return _iresnet('iresnet100', IBasicBlock, [3, 13, 30, 3], pretrained,
-                    progress, **kwargs)
-
-
-def iresnet200(pretrained=False, progress=True, **kwargs):
-    return _iresnet('iresnet200', IBasicBlock, [6, 26, 60, 6], pretrained,
-                    progress, **kwargs)
-
diff --git a/src/src/face3d/models/arcface_torch/backbones/iresnet2060.py b/src/src/face3d/models/arcface_torch/backbones/iresnet2060.py
deleted file mode 100644
index 21d1122144d207637d2444cba1f68fe630c89f31..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/backbones/iresnet2060.py
+++ /dev/null
@@ -1,176 +0,0 @@
-import torch
-from torch import nn
-
-assert torch.__version__ >= "1.8.1"
-from torch.utils.checkpoint import checkpoint_sequential
-
-__all__ = ['iresnet2060']
-
-
-def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
-    """3x3 convolution with padding"""
-    return nn.Conv2d(in_planes,
-                     out_planes,
-                     kernel_size=3,
-                     stride=stride,
-                     padding=dilation,
-                     groups=groups,
-                     bias=False,
-                     dilation=dilation)
-
-
-def conv1x1(in_planes, out_planes, stride=1):
-    """1x1 convolution"""
-    return nn.Conv2d(in_planes,
-                     out_planes,
-                     kernel_size=1,
-                     stride=stride,
-                     bias=False)
-
-
-class IBasicBlock(nn.Module):
-    expansion = 1
-
-    def __init__(self, inplanes, planes, stride=1, downsample=None,
-                 groups=1, base_width=64, dilation=1):
-        super(IBasicBlock, self).__init__()
-        if groups != 1 or base_width != 64:
-            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
-        if dilation > 1:
-            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
-        self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05, )
-        self.conv1 = conv3x3(inplanes, planes)
-        self.bn2 = nn.BatchNorm2d(planes, eps=1e-05, )
-        self.prelu = nn.PReLU(planes)
-        self.conv2 = conv3x3(planes, planes, stride)
-        self.bn3 = nn.BatchNorm2d(planes, eps=1e-05, )
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        identity = x
-        out = self.bn1(x)
-        out = self.conv1(out)
-        out = self.bn2(out)
-        out = self.prelu(out)
-        out = self.conv2(out)
-        out = self.bn3(out)
-        if self.downsample is not None:
-            identity = self.downsample(x)
-        out += identity
-        return out
-
-
-class IResNet(nn.Module):
-    fc_scale = 7 * 7
-
-    def __init__(self,
-                 block, layers, dropout=0, num_features=512, zero_init_residual=False,
-                 groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False):
-        super(IResNet, self).__init__()
-        self.fp16 = fp16
-        self.inplanes = 64
-        self.dilation = 1
-        if replace_stride_with_dilation is None:
-            replace_stride_with_dilation = [False, False, False]
-        if len(replace_stride_with_dilation) != 3:
-            raise ValueError("replace_stride_with_dilation should be None "
-                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
-        self.groups = groups
-        self.base_width = width_per_group
-        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
-        self.prelu = nn.PReLU(self.inplanes)
-        self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
-        self.layer2 = self._make_layer(block,
-                                       128,
-                                       layers[1],
-                                       stride=2,
-                                       dilate=replace_stride_with_dilation[0])
-        self.layer3 = self._make_layer(block,
-                                       256,
-                                       layers[2],
-                                       stride=2,
-                                       dilate=replace_stride_with_dilation[1])
-        self.layer4 = self._make_layer(block,
-                                       512,
-                                       layers[3],
-                                       stride=2,
-                                       dilate=replace_stride_with_dilation[2])
-        self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05, )
-        self.dropout = nn.Dropout(p=dropout, inplace=True)
-        self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
-        self.features = nn.BatchNorm1d(num_features, eps=1e-05)
-        nn.init.constant_(self.features.weight, 1.0)
-        self.features.weight.requires_grad = False
-
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.normal_(m.weight, 0, 0.1)
-            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
-                nn.init.constant_(m.weight, 1)
-                nn.init.constant_(m.bias, 0)
-
-        if zero_init_residual:
-            for m in self.modules():
-                if isinstance(m, IBasicBlock):
-                    nn.init.constant_(m.bn2.weight, 0)
-
-    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
-        downsample = None
-        previous_dilation = self.dilation
-        if dilate:
-            self.dilation *= stride
-            stride = 1
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                conv1x1(self.inplanes, planes * block.expansion, stride),
-                nn.BatchNorm2d(planes * block.expansion, eps=1e-05, ),
-            )
-        layers = []
-        layers.append(
-            block(self.inplanes, planes, stride, downsample, self.groups,
-                  self.base_width, previous_dilation))
-        self.inplanes = planes * block.expansion
-        for _ in range(1, blocks):
-            layers.append(
-                block(self.inplanes,
-                      planes,
-                      groups=self.groups,
-                      base_width=self.base_width,
-                      dilation=self.dilation))
-
-        return nn.Sequential(*layers)
-
-    def checkpoint(self, func, num_seg, x):
-        if self.training:
-            return checkpoint_sequential(func, num_seg, x)
-        else:
-            return func(x)
-
-    def forward(self, x):
-        with torch.cuda.amp.autocast(self.fp16):
-            x = self.conv1(x)
-            x = self.bn1(x)
-            x = self.prelu(x)
-            x = self.layer1(x)
-            x = self.checkpoint(self.layer2, 20, x)
-            x = self.checkpoint(self.layer3, 100, x)
-            x = self.layer4(x)
-            x = self.bn2(x)
-            x = torch.flatten(x, 1)
-            x = self.dropout(x)
-        x = self.fc(x.float() if self.fp16 else x)
-        x = self.features(x)
-        return x
-
-
-def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
-    model = IResNet(block, layers, **kwargs)
-    if pretrained:
-        raise ValueError()
-    return model
-
-
-def iresnet2060(pretrained=False, progress=True, **kwargs):
-    return _iresnet('iresnet2060', IBasicBlock, [3, 128, 1024 - 128, 3], pretrained, progress, **kwargs)
diff --git a/src/src/face3d/models/arcface_torch/backbones/mobilefacenet.py b/src/src/face3d/models/arcface_torch/backbones/mobilefacenet.py
deleted file mode 100644
index 87731491d76f9ff61cc70e57bb3f18c54fae308c..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/backbones/mobilefacenet.py
+++ /dev/null
@@ -1,130 +0,0 @@
-'''
-Adapted from https://github.com/cavalleria/cavaface.pytorch/blob/master/backbone/mobilefacenet.py
-Original author cavalleria
-'''
-
-import torch.nn as nn
-from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, Sequential, Module
-import torch
-
-
-class Flatten(Module):
-    def forward(self, x):
-        return x.view(x.size(0), -1)
-
-
-class ConvBlock(Module):
-    def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
-        super(ConvBlock, self).__init__()
-        self.layers = nn.Sequential(
-            Conv2d(in_c, out_c, kernel, groups=groups, stride=stride, padding=padding, bias=False),
-            BatchNorm2d(num_features=out_c),
-            PReLU(num_parameters=out_c)
-        )
-
-    def forward(self, x):
-        return self.layers(x)
-
-
-class LinearBlock(Module):
-    def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
-        super(LinearBlock, self).__init__()
-        self.layers = nn.Sequential(
-            Conv2d(in_c, out_c, kernel, stride, padding, groups=groups, bias=False),
-            BatchNorm2d(num_features=out_c)
-        )
-
-    def forward(self, x):
-        return self.layers(x)
-
-
-class DepthWise(Module):
-    def __init__(self, in_c, out_c, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1):
-        super(DepthWise, self).__init__()
-        self.residual = residual
-        self.layers = nn.Sequential(
-            ConvBlock(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1)),
-            ConvBlock(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride),
-            LinearBlock(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
-        )
-
-    def forward(self, x):
-        short_cut = None
-        if self.residual:
-            short_cut = x
-        x = self.layers(x)
-        if self.residual:
-            output = short_cut + x
-        else:
-            output = x
-        return output
-
-
-class Residual(Module):
-    def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
-        super(Residual, self).__init__()
-        modules = []
-        for _ in range(num_block):
-            modules.append(DepthWise(c, c, True, kernel, stride, padding, groups))
-        self.layers = Sequential(*modules)
-
-    def forward(self, x):
-        return self.layers(x)
-
-
-class GDC(Module):
-    def __init__(self, embedding_size):
-        super(GDC, self).__init__()
-        self.layers = nn.Sequential(
-            LinearBlock(512, 512, groups=512, kernel=(7, 7), stride=(1, 1), padding=(0, 0)),
-            Flatten(),
-            Linear(512, embedding_size, bias=False),
-            BatchNorm1d(embedding_size))
-
-    def forward(self, x):
-        return self.layers(x)
-
-
-class MobileFaceNet(Module):
-    def __init__(self, fp16=False, num_features=512):
-        super(MobileFaceNet, self).__init__()
-        scale = 2
-        self.fp16 = fp16
-        self.layers = nn.Sequential(
-            ConvBlock(3, 64 * scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1)),
-            ConvBlock(64 * scale, 64 * scale, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64),
-            DepthWise(64 * scale, 64 * scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128),
-            Residual(64 * scale, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
-            DepthWise(64 * scale, 128 * scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256),
-            Residual(128 * scale, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
-            DepthWise(128 * scale, 128 * scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512),
-            Residual(128 * scale, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
-        )
-        self.conv_sep = ConvBlock(128 * scale, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0))
-        self.features = GDC(num_features)
-        self._initialize_weights()
-
-    def _initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
-                if m.bias is not None:
-                    m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.Linear):
-                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
-                if m.bias is not None:
-                    m.bias.data.zero_()
-
-    def forward(self, x):
-        with torch.cuda.amp.autocast(self.fp16):
-            x = self.layers(x)
-        x = self.conv_sep(x.float() if self.fp16 else x)
-        x = self.features(x)
-        return x
-
-
-def get_mbf(fp16, num_features):
-    return MobileFaceNet(fp16, num_features)
\ No newline at end of file
diff --git a/src/src/face3d/models/arcface_torch/configs/3millions.py b/src/src/face3d/models/arcface_torch/configs/3millions.py
deleted file mode 100644
index c9edc2f1414e35f93abfd3dfe11a61f1f406580e..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/configs/3millions.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from easydict import EasyDict as edict
-
-# configs for test speed
-
-config = edict()
-config.loss = "arcface"
-config.network = "r50"
-config.resume = False
-config.output = None
-config.embedding_size = 512
-config.sample_rate = 1.0
-config.fp16 = True
-config.momentum = 0.9
-config.weight_decay = 5e-4
-config.batch_size = 128
-config.lr = 0.1  # batch size is 512
-
-config.rec = "synthetic"
-config.num_classes = 300 * 10000
-config.num_epoch = 30
-config.warmup_epoch = -1
-config.decay_epoch = [10, 16, 22]
-config.val_targets = []
diff --git a/src/src/face3d/models/arcface_torch/configs/3millions_pfc.py b/src/src/face3d/models/arcface_torch/configs/3millions_pfc.py
deleted file mode 100644
index 77caafdbb300d8109d5bfdb844f131710ef81f20..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/configs/3millions_pfc.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from easydict import EasyDict as edict
-
-# configs for test speed
-
-config = edict()
-config.loss = "arcface"
-config.network = "r50"
-config.resume = False
-config.output = None
-config.embedding_size = 512
-config.sample_rate = 0.1
-config.fp16 = True
-config.momentum = 0.9
-config.weight_decay = 5e-4
-config.batch_size = 128
-config.lr = 0.1  # batch size is 512
-
-config.rec = "synthetic"
-config.num_classes = 300 * 10000
-config.num_epoch = 30
-config.warmup_epoch = -1
-config.decay_epoch = [10, 16, 22]
-config.val_targets = []
diff --git a/src/src/face3d/models/arcface_torch/configs/__init__.py b/src/src/face3d/models/arcface_torch/configs/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/src/face3d/models/arcface_torch/configs/base.py b/src/src/face3d/models/arcface_torch/configs/base.py
deleted file mode 100644
index 78e4b36a9142b649ec39a8c59331bb2557f2ad57..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/configs/base.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from easydict import EasyDict as edict
-
-# make training faster
-# our RAM is 256G
-# mount -t tmpfs -o size=140G  tmpfs /train_tmp
-
-config = edict()
-config.loss = "arcface"
-config.network = "r50"
-config.resume = False
-config.output = "ms1mv3_arcface_r50"
-
-config.dataset = "ms1m-retinaface-t1"
-config.embedding_size = 512
-config.sample_rate = 1
-config.fp16 = False
-config.momentum = 0.9
-config.weight_decay = 5e-4
-config.batch_size = 128
-config.lr = 0.1  # batch size is 512
-
-if config.dataset == "emore":
-    config.rec = "/train_tmp/faces_emore"
-    config.num_classes = 85742
-    config.num_image = 5822653
-    config.num_epoch = 16
-    config.warmup_epoch = -1
-    config.decay_epoch = [8, 14, ]
-    config.val_targets = ["lfw", ]
-
-elif config.dataset == "ms1m-retinaface-t1":
-    config.rec = "/train_tmp/ms1m-retinaface-t1"
-    config.num_classes = 93431
-    config.num_image = 5179510
-    config.num_epoch = 25
-    config.warmup_epoch = -1
-    config.decay_epoch = [11, 17, 22]
-    config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
-
-elif config.dataset == "glint360k":
-    config.rec = "/train_tmp/glint360k"
-    config.num_classes = 360232
-    config.num_image = 17091657
-    config.num_epoch = 20
-    config.warmup_epoch = -1
-    config.decay_epoch = [8, 12, 15, 18]
-    config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
-
-elif config.dataset == "webface":
-    config.rec = "/train_tmp/faces_webface_112x112"
-    config.num_classes = 10572
-    config.num_image = "forget"
-    config.num_epoch = 34
-    config.warmup_epoch = -1
-    config.decay_epoch = [20, 28, 32]
-    config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
diff --git a/src/src/face3d/models/arcface_torch/configs/glint360k_mbf.py b/src/src/face3d/models/arcface_torch/configs/glint360k_mbf.py
deleted file mode 100644
index 46ae777cc97af41a531cba4e5d1ff31f2efcb468..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/configs/glint360k_mbf.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from easydict import EasyDict as edict
-
-# make training faster
-# our RAM is 256G
-# mount -t tmpfs -o size=140G  tmpfs /train_tmp
-
-config = edict()
-config.loss = "cosface"
-config.network = "mbf"
-config.resume = False
-config.output = None
-config.embedding_size = 512
-config.sample_rate = 0.1
-config.fp16 = True
-config.momentum = 0.9
-config.weight_decay = 2e-4
-config.batch_size = 128
-config.lr = 0.1  # batch size is 512
-
-config.rec = "/train_tmp/glint360k"
-config.num_classes = 360232
-config.num_image = 17091657
-config.num_epoch = 20
-config.warmup_epoch = -1
-config.decay_epoch = [8, 12, 15, 18]
-config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
diff --git a/src/src/face3d/models/arcface_torch/configs/glint360k_r100.py b/src/src/face3d/models/arcface_torch/configs/glint360k_r100.py
deleted file mode 100644
index 93d0701c0094517cec147c382b005e8063938548..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/configs/glint360k_r100.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from easydict import EasyDict as edict
-
-# make training faster
-# our RAM is 256G
-# mount -t tmpfs -o size=140G  tmpfs /train_tmp
-
-config = edict()
-config.loss = "cosface"
-config.network = "r100"
-config.resume = False
-config.output = None
-config.embedding_size = 512
-config.sample_rate = 1.0
-config.fp16 = True
-config.momentum = 0.9
-config.weight_decay = 5e-4
-config.batch_size = 128
-config.lr = 0.1  # batch size is 512
-
-config.rec = "/train_tmp/glint360k"
-config.num_classes = 360232
-config.num_image = 17091657
-config.num_epoch = 20
-config.warmup_epoch = -1
-config.decay_epoch = [8, 12, 15, 18]
-config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
diff --git a/src/src/face3d/models/arcface_torch/configs/glint360k_r18.py b/src/src/face3d/models/arcface_torch/configs/glint360k_r18.py
deleted file mode 100644
index 7a8db34cd547e8e667103c93585296e47a894e97..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/configs/glint360k_r18.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from easydict import EasyDict as edict
-
-# make training faster
-# our RAM is 256G
-# mount -t tmpfs -o size=140G  tmpfs /train_tmp
-
-config = edict()
-config.loss = "cosface"
-config.network = "r18"
-config.resume = False
-config.output = None
-config.embedding_size = 512
-config.sample_rate = 1.0
-config.fp16 = True
-config.momentum = 0.9
-config.weight_decay = 5e-4
-config.batch_size = 128
-config.lr = 0.1  # batch size is 512
-
-config.rec = "/train_tmp/glint360k"
-config.num_classes = 360232
-config.num_image = 17091657
-config.num_epoch = 20
-config.warmup_epoch = -1
-config.decay_epoch = [8, 12, 15, 18]
-config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
diff --git a/src/src/face3d/models/arcface_torch/configs/glint360k_r34.py b/src/src/face3d/models/arcface_torch/configs/glint360k_r34.py
deleted file mode 100644
index fda2701758a839a7161d09c25f0ca3d26033baff..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/configs/glint360k_r34.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from easydict import EasyDict as edict
-
-# make training faster
-# our RAM is 256G
-# mount -t tmpfs -o size=140G  tmpfs /train_tmp
-
-config = edict()
-config.loss = "cosface"
-config.network = "r34"
-config.resume = False
-config.output = None
-config.embedding_size = 512
-config.sample_rate = 1.0
-config.fp16 = True
-config.momentum = 0.9
-config.weight_decay = 5e-4
-config.batch_size = 128
-config.lr = 0.1  # batch size is 512
-
-config.rec = "/train_tmp/glint360k"
-config.num_classes = 360232
-config.num_image = 17091657
-config.num_epoch = 20
-config.warmup_epoch = -1
-config.decay_epoch = [8, 12, 15, 18]
-config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
diff --git a/src/src/face3d/models/arcface_torch/configs/glint360k_r50.py b/src/src/face3d/models/arcface_torch/configs/glint360k_r50.py
deleted file mode 100644
index 37e7922f1f63284e356dcc45a5f979f9c105f25e..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/configs/glint360k_r50.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from easydict import EasyDict as edict
-
-# make training faster
-# our RAM is 256G
-# mount -t tmpfs -o size=140G  tmpfs /train_tmp
-
-config = edict()
-config.loss = "cosface"
-config.network = "r50"
-config.resume = False
-config.output = None
-config.embedding_size = 512
-config.sample_rate = 1.0
-config.fp16 = True
-config.momentum = 0.9
-config.weight_decay = 5e-4
-config.batch_size = 128
-config.lr = 0.1  # batch size is 512
-
-config.rec = "/train_tmp/glint360k"
-config.num_classes = 360232
-config.num_image = 17091657
-config.num_epoch = 20
-config.warmup_epoch = -1
-config.decay_epoch = [8, 12, 15, 18]
-config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
diff --git a/src/src/face3d/models/arcface_torch/configs/ms1mv3_mbf.py b/src/src/face3d/models/arcface_torch/configs/ms1mv3_mbf.py
deleted file mode 100644
index b8a00d6305eeda5a94788017afc1cda0d4a4cd2a..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/configs/ms1mv3_mbf.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from easydict import EasyDict as edict
-
-# make training faster
-# our RAM is 256G
-# mount -t tmpfs -o size=140G  tmpfs /train_tmp
-
-config = edict()
-config.loss = "arcface"
-config.network = "mbf"
-config.resume = False
-config.output = None
-config.embedding_size = 512
-config.sample_rate = 1.0
-config.fp16 = True
-config.momentum = 0.9
-config.weight_decay = 2e-4
-config.batch_size = 128
-config.lr = 0.1  # batch size is 512
-
-config.rec = "/train_tmp/ms1m-retinaface-t1"
-config.num_classes = 93431
-config.num_image = 5179510
-config.num_epoch = 30
-config.warmup_epoch = -1
-config.decay_epoch = [10, 20, 25]
-config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
diff --git a/src/src/face3d/models/arcface_torch/configs/ms1mv3_r18.py b/src/src/face3d/models/arcface_torch/configs/ms1mv3_r18.py
deleted file mode 100644
index eb4e0d31f1aedf4590628d394e1606920fefb5c9..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/configs/ms1mv3_r18.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from easydict import EasyDict as edict
-
-# make training faster
-# our RAM is 256G
-# mount -t tmpfs -o size=140G  tmpfs /train_tmp
-
-config = edict()
-config.loss = "arcface"
-config.network = "r18"
-config.resume = False
-config.output = None
-config.embedding_size = 512
-config.sample_rate = 1.0
-config.fp16 = True
-config.momentum = 0.9
-config.weight_decay = 5e-4
-config.batch_size = 128
-config.lr = 0.1  # batch size is 512
-
-config.rec = "/train_tmp/ms1m-retinaface-t1"
-config.num_classes = 93431
-config.num_image = 5179510
-config.num_epoch = 25
-config.warmup_epoch = -1
-config.decay_epoch = [10, 16, 22]
-config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
diff --git a/src/src/face3d/models/arcface_torch/configs/ms1mv3_r2060.py b/src/src/face3d/models/arcface_torch/configs/ms1mv3_r2060.py
deleted file mode 100644
index 23ad81e082c4b6390b67b164d0ceb84bb0635684..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/configs/ms1mv3_r2060.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from easydict import EasyDict as edict
-
-# make training faster
-# our RAM is 256G
-# mount -t tmpfs -o size=140G  tmpfs /train_tmp
-
-config = edict()
-config.loss = "arcface"
-config.network = "r2060"
-config.resume = False
-config.output = None
-config.embedding_size = 512
-config.sample_rate = 1.0
-config.fp16 = True
-config.momentum = 0.9
-config.weight_decay = 5e-4
-config.batch_size = 64
-config.lr = 0.1  # batch size is 512
-
-config.rec = "/train_tmp/ms1m-retinaface-t1"
-config.num_classes = 93431
-config.num_image = 5179510
-config.num_epoch = 25
-config.warmup_epoch = -1
-config.decay_epoch = [10, 16, 22]
-config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
diff --git a/src/src/face3d/models/arcface_torch/configs/ms1mv3_r34.py b/src/src/face3d/models/arcface_torch/configs/ms1mv3_r34.py
deleted file mode 100644
index 5f78337a3d1f9eb6e9145eb5093618796c6842d2..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/configs/ms1mv3_r34.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from easydict import EasyDict as edict
-
-# make training faster
-# our RAM is 256G
-# mount -t tmpfs -o size=140G  tmpfs /train_tmp
-
-config = edict()
-config.loss = "arcface"
-config.network = "r34"
-config.resume = False
-config.output = None
-config.embedding_size = 512
-config.sample_rate = 1.0
-config.fp16 = True
-config.momentum = 0.9
-config.weight_decay = 5e-4
-config.batch_size = 128
-config.lr = 0.1  # batch size is 512
-
-config.rec = "/train_tmp/ms1m-retinaface-t1"
-config.num_classes = 93431
-config.num_image = 5179510
-config.num_epoch = 25
-config.warmup_epoch = -1
-config.decay_epoch = [10, 16, 22]
-config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
diff --git a/src/src/face3d/models/arcface_torch/configs/ms1mv3_r50.py b/src/src/face3d/models/arcface_torch/configs/ms1mv3_r50.py
deleted file mode 100644
index 08ba55dbbea6df0afffddbb3d1ed173efad99604..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/configs/ms1mv3_r50.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from easydict import EasyDict as edict
-
-# make training faster
-# our RAM is 256G
-# mount -t tmpfs -o size=140G  tmpfs /train_tmp
-
-config = edict()
-config.loss = "arcface"
-config.network = "r50"
-config.resume = False
-config.output = None
-config.embedding_size = 512
-config.sample_rate = 1.0
-config.fp16 = True
-config.momentum = 0.9
-config.weight_decay = 5e-4
-config.batch_size = 128
-config.lr = 0.1  # batch size is 512
-
-config.rec = "/train_tmp/ms1m-retinaface-t1"
-config.num_classes = 93431
-config.num_image = 5179510
-config.num_epoch = 25
-config.warmup_epoch = -1
-config.decay_epoch = [10, 16, 22]
-config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
diff --git a/src/src/face3d/models/arcface_torch/configs/speed.py b/src/src/face3d/models/arcface_torch/configs/speed.py
deleted file mode 100644
index 45e95237da65e44f35a172c25ac6dc4e313e4eae..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/configs/speed.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from easydict import EasyDict as edict
-
-# configs for test speed
-
-config = edict()
-config.loss = "arcface"
-config.network = "r50"
-config.resume = False
-config.output = None
-config.embedding_size = 512
-config.sample_rate = 1.0
-config.fp16 = True
-config.momentum = 0.9
-config.weight_decay = 5e-4
-config.batch_size = 128
-config.lr = 0.1  # batch size is 512
-
-config.rec = "synthetic"
-config.num_classes = 100 * 10000
-config.num_epoch = 30
-config.warmup_epoch = -1
-config.decay_epoch = [10, 16, 22]
-config.val_targets = []
diff --git a/src/src/face3d/models/arcface_torch/dataset.py b/src/src/face3d/models/arcface_torch/dataset.py
deleted file mode 100644
index 96bbb8bb6da99122f350bc8e1a6390245840e32b..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/dataset.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import numbers
-import os
-import queue as Queue
-import threading
-
-import mxnet as mx
-import numpy as np
-import torch
-from torch.utils.data import DataLoader, Dataset
-from torchvision import transforms
-
-
-class BackgroundGenerator(threading.Thread):
-    def __init__(self, generator, local_rank, max_prefetch=6):
-        super(BackgroundGenerator, self).__init__()
-        self.queue = Queue.Queue(max_prefetch)
-        self.generator = generator
-        self.local_rank = local_rank
-        self.daemon = True
-        self.start()
-
-    def run(self):
-        torch.cuda.set_device(self.local_rank)
-        for item in self.generator:
-            self.queue.put(item)
-        self.queue.put(None)
-
-    def next(self):
-        next_item = self.queue.get()
-        if next_item is None:
-            raise StopIteration
-        return next_item
-
-    def __next__(self):
-        return self.next()
-
-    def __iter__(self):
-        return self
-
-
-class DataLoaderX(DataLoader):
-
-    def __init__(self, local_rank, **kwargs):
-        super(DataLoaderX, self).__init__(**kwargs)
-        self.stream = torch.cuda.Stream(local_rank)
-        self.local_rank = local_rank
-
-    def __iter__(self):
-        self.iter = super(DataLoaderX, self).__iter__()
-        self.iter = BackgroundGenerator(self.iter, self.local_rank)
-        self.preload()
-        return self
-
-    def preload(self):
-        self.batch = next(self.iter, None)
-        if self.batch is None:
-            return None
-        with torch.cuda.stream(self.stream):
-            for k in range(len(self.batch)):
-                self.batch[k] = self.batch[k].to(device=self.local_rank, non_blocking=True)
-
-    def __next__(self):
-        torch.cuda.current_stream().wait_stream(self.stream)
-        batch = self.batch
-        if batch is None:
-            raise StopIteration
-        self.preload()
-        return batch
-
-
-class MXFaceDataset(Dataset):
-    def __init__(self, root_dir, local_rank):
-        super(MXFaceDataset, self).__init__()
-        self.transform = transforms.Compose(
-            [transforms.ToPILImage(),
-             transforms.RandomHorizontalFlip(),
-             transforms.ToTensor(),
-             transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
-             ])
-        self.root_dir = root_dir
-        self.local_rank = local_rank
-        path_imgrec = os.path.join(root_dir, 'train.rec')
-        path_imgidx = os.path.join(root_dir, 'train.idx')
-        self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')
-        s = self.imgrec.read_idx(0)
-        header, _ = mx.recordio.unpack(s)
-        if header.flag > 0:
-            self.header0 = (int(header.label[0]), int(header.label[1]))
-            self.imgidx = np.array(range(1, int(header.label[0])))
-        else:
-            self.imgidx = np.array(list(self.imgrec.keys))
-
-    def __getitem__(self, index):
-        idx = self.imgidx[index]
-        s = self.imgrec.read_idx(idx)
-        header, img = mx.recordio.unpack(s)
-        label = header.label
-        if not isinstance(label, numbers.Number):
-            label = label[0]
-        label = torch.tensor(label, dtype=torch.long)
-        sample = mx.image.imdecode(img).asnumpy()
-        if self.transform is not None:
-            sample = self.transform(sample)
-        return sample, label
-
-    def __len__(self):
-        return len(self.imgidx)
-
-
-class SyntheticDataset(Dataset):
-    def __init__(self, local_rank):
-        super(SyntheticDataset, self).__init__()
-        img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.int32)
-        img = np.transpose(img, (2, 0, 1))
-        img = torch.from_numpy(img).squeeze(0).float()
-        img = ((img / 255) - 0.5) / 0.5
-        self.img = img
-        self.label = 1
-
-    def __getitem__(self, index):
-        return self.img, self.label
-
-    def __len__(self):
-        return 1000000
diff --git a/src/src/face3d/models/arcface_torch/docs/eval.md b/src/src/face3d/models/arcface_torch/docs/eval.md
deleted file mode 100644
index dd1d9e257367b6422680966198646c45e5a2671d..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/docs/eval.md
+++ /dev/null
@@ -1,31 +0,0 @@
-## Eval on ICCV2021-MFR
-
-coming soon.
-
-
-## Eval IJBC
-You can eval ijbc with pytorch or onnx.
-
-
-1. Eval IJBC With Onnx
-```shell
-CUDA_VISIBLE_DEVICES=0 python onnx_ijbc.py --model-root ms1mv3_arcface_r50 --image-path IJB_release/IJBC --result-dir ms1mv3_arcface_r50
-```
-
-2. Eval IJBC With Pytorch
-```shell
-CUDA_VISIBLE_DEVICES=0,1 python eval_ijbc.py \
---model-prefix ms1mv3_arcface_r50/backbone.pth \
---image-path IJB_release/IJBC \
---result-dir ms1mv3_arcface_r50 \
---batch-size 128 \
---job ms1mv3_arcface_r50 \
---target IJBC \
---network iresnet50
-```
-
-## Inference
-
-```shell
-python inference.py --weight ms1mv3_arcface_r50/backbone.pth --network r50
-```
diff --git a/src/src/face3d/models/arcface_torch/docs/install.md b/src/src/face3d/models/arcface_torch/docs/install.md
deleted file mode 100644
index 6314a40441285e9236438e468caf8b71a407531a..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/docs/install.md
+++ /dev/null
@@ -1,51 +0,0 @@
-## v1.8.0 
-### Linux and Windows  
-```shell
-# CUDA 11.0
-pip --default-timeout=100 install torch==1.8.0+cu111 torchvision==0.9.0+cu111 torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html
-
-# CUDA 10.2
-pip --default-timeout=100 install torch==1.8.0 torchvision==0.9.0 torchaudio==0.8.0
-
-# CPU only
-pip --default-timeout=100 install torch==1.8.0+cpu torchvision==0.9.0+cpu torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html
-
-```
-
-
-## v1.7.1  
-### Linux and Windows  
-```shell
-# CUDA 11.0
-pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html
-
-# CUDA 10.2
-pip install torch==1.7.1 torchvision==0.8.2 torchaudio==0.7.2
-
-# CUDA 10.1
-pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html
-
-# CUDA 9.2
-pip install torch==1.7.1+cu92 torchvision==0.8.2+cu92 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html
-
-# CPU only
-pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html
-```
-
-
-## v1.6.0  
-
-### Linux and Windows
-```shell
-# CUDA 10.2
-pip install torch==1.6.0 torchvision==0.7.0
-
-# CUDA 10.1
-pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
-
-# CUDA 9.2
-pip install torch==1.6.0+cu92 torchvision==0.7.0+cu92 -f https://download.pytorch.org/whl/torch_stable.html
-
-# CPU only
-pip install torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
-```
\ No newline at end of file
diff --git a/src/src/face3d/models/arcface_torch/docs/modelzoo.md b/src/src/face3d/models/arcface_torch/docs/modelzoo.md
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/src/face3d/models/arcface_torch/docs/speed_benchmark.md b/src/src/face3d/models/arcface_torch/docs/speed_benchmark.md
deleted file mode 100644
index 055aee0defe2c43a523ced48260242f0f99b7cea..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/docs/speed_benchmark.md
+++ /dev/null
@@ -1,93 +0,0 @@
-## Test Training Speed
-
-- Test Commands
-
-You need to use the following two commands to test the Partial FC training performance. 
-The number of identites is **3 millions** (synthetic data), turn mixed precision  training on, backbone is resnet50, 
-batch size is 1024.
-```shell
-# Model Parallel
-python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/3millions
-# Partial FC 0.1
-python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/3millions_pfc
-```
-
-- GPU Memory
-
-```
-# (Model Parallel) gpustat -i
-[0] Tesla V100-SXM2-32GB | 64'C,  94 % | 30338 / 32510 MB 
-[1] Tesla V100-SXM2-32GB | 60'C,  99 % | 28876 / 32510 MB 
-[2] Tesla V100-SXM2-32GB | 60'C,  99 % | 28872 / 32510 MB 
-[3] Tesla V100-SXM2-32GB | 69'C,  99 % | 28872 / 32510 MB 
-[4] Tesla V100-SXM2-32GB | 66'C,  99 % | 28888 / 32510 MB 
-[5] Tesla V100-SXM2-32GB | 60'C,  99 % | 28932 / 32510 MB 
-[6] Tesla V100-SXM2-32GB | 68'C, 100 % | 28916 / 32510 MB 
-[7] Tesla V100-SXM2-32GB | 65'C,  99 % | 28860 / 32510 MB 
-
-# (Partial FC 0.1) gpustat -i
-[0] Tesla V100-SXM2-32GB | 60'C,  95 % | 10488 / 32510 MB                                                                                                                                          │·······················
-[1] Tesla V100-SXM2-32GB | 60'C,  97 % | 10344 / 32510 MB                                                                                                                                          │·······················
-[2] Tesla V100-SXM2-32GB | 61'C,  95 % | 10340 / 32510 MB                                                                                                                                          │·······················
-[3] Tesla V100-SXM2-32GB | 66'C,  95 % | 10340 / 32510 MB                                                                                                                                          │·······················
-[4] Tesla V100-SXM2-32GB | 65'C,  94 % | 10356 / 32510 MB                                                                                                                                          │·······················
-[5] Tesla V100-SXM2-32GB | 61'C,  95 % | 10400 / 32510 MB                                                                                                                                          │·······················
-[6] Tesla V100-SXM2-32GB | 68'C,  96 % | 10384 / 32510 MB                                                                                                                                          │·······················
-[7] Tesla V100-SXM2-32GB | 64'C,  95 % | 10328 / 32510 MB                                                                                                                                        │·······················
-```
-
-- Training Speed
-
-```python
-# (Model Parallel) trainging.log
-Training: Speed 2271.33 samples/sec   Loss 1.1624   LearningRate 0.2000   Epoch: 0   Global Step: 100 
-Training: Speed 2269.94 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 150 
-Training: Speed 2272.67 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 200 
-Training: Speed 2266.55 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 250 
-Training: Speed 2272.54 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 300 
-
-# (Partial FC 0.1) trainging.log
-Training: Speed 5299.56 samples/sec   Loss 1.0965   LearningRate 0.2000   Epoch: 0   Global Step: 100  
-Training: Speed 5296.37 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 150  
-Training: Speed 5304.37 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 200  
-Training: Speed 5274.43 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 250  
-Training: Speed 5300.10 samples/sec   Loss 0.0000   LearningRate 0.2000   Epoch: 0   Global Step: 300   
-```
-
-In this test case, Partial FC 0.1 only use1 1/3 of the GPU memory of the model parallel, 
-and the training speed is 2.5 times faster than the model parallel.
-
-
-## Speed Benchmark
-
-1. Training speed of different parallel methods (samples/second), Tesla V100 32GB * 8. (Larger is better)
-
-| Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 |
-| :---    | :--- | :--- | :--- |
-|125000   | 4681 | 4824 | 5004 |
-|250000   | 4047 | 4521 | 4976 |
-|500000   | 3087 | 4013 | 4900 |
-|1000000  | 2090 | 3449 | 4803 |
-|1400000  | 1672 | 3043 | 4738 |
-|2000000  | -    | 2593 | 4626 |
-|4000000  | -    | 1748 | 4208 |
-|5500000  | -    | 1389 | 3975 |
-|8000000  | -    | -    | 3565 |
-|16000000 | -    | -    | 2679 |
-|29000000 | -    | -    | 1855 |
-
-2. GPU memory cost of different parallel methods (GB per GPU), Tesla V100 32GB * 8. (Smaller is better)
-
-| Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 |
-| :---    | :---  | :---  | :---  |
-|125000   | 7358  | 5306  | 4868  |
-|250000   | 9940  | 5826  | 5004  |
-|500000   | 14220 | 7114  | 5202  |
-|1000000  | 23708 | 9966  | 5620  |
-|1400000  | 32252 | 11178 | 6056  |
-|2000000  | -     | 13978 | 6472  |
-|4000000  | -     | 23238 | 8284  |
-|5500000  | -     | 32188 | 9854  |
-|8000000  | -     | -     | 12310 |
-|16000000 | -     | -     | 19950 |
-|29000000 | -     | -     | 32324 |
diff --git a/src/src/face3d/models/arcface_torch/eval/__init__.py b/src/src/face3d/models/arcface_torch/eval/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/src/face3d/models/arcface_torch/eval/verification.py b/src/src/face3d/models/arcface_torch/eval/verification.py
deleted file mode 100644
index 253343b83dbf9d1bd154d14ec068e098bf0968db..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/eval/verification.py
+++ /dev/null
@@ -1,407 +0,0 @@
-"""Helper for evaluation on the Labeled Faces in the Wild dataset 
-"""
-
-# MIT License
-#
-# Copyright (c) 2016 David Sandberg
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-
-import datetime
-import os
-import pickle
-
-import mxnet as mx
-import numpy as np
-import sklearn
-import torch
-from mxnet import ndarray as nd
-from scipy import interpolate
-from sklearn.decomposition import PCA
-from sklearn.model_selection import KFold
-
-
-class LFold:
-    def __init__(self, n_splits=2, shuffle=False):
-        self.n_splits = n_splits
-        if self.n_splits > 1:
-            self.k_fold = KFold(n_splits=n_splits, shuffle=shuffle)
-
-    def split(self, indices):
-        if self.n_splits > 1:
-            return self.k_fold.split(indices)
-        else:
-            return [(indices, indices)]
-
-
-def calculate_roc(thresholds,
-                  embeddings1,
-                  embeddings2,
-                  actual_issame,
-                  nrof_folds=10,
-                  pca=0):
-    assert (embeddings1.shape[0] == embeddings2.shape[0])
-    assert (embeddings1.shape[1] == embeddings2.shape[1])
-    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
-    nrof_thresholds = len(thresholds)
-    k_fold = LFold(n_splits=nrof_folds, shuffle=False)
-
-    tprs = np.zeros((nrof_folds, nrof_thresholds))
-    fprs = np.zeros((nrof_folds, nrof_thresholds))
-    accuracy = np.zeros((nrof_folds))
-    indices = np.arange(nrof_pairs)
-
-    if pca == 0:
-        diff = np.subtract(embeddings1, embeddings2)
-        dist = np.sum(np.square(diff), 1)
-
-    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
-        if pca > 0:
-            print('doing pca on', fold_idx)
-            embed1_train = embeddings1[train_set]
-            embed2_train = embeddings2[train_set]
-            _embed_train = np.concatenate((embed1_train, embed2_train), axis=0)
-            pca_model = PCA(n_components=pca)
-            pca_model.fit(_embed_train)
-            embed1 = pca_model.transform(embeddings1)
-            embed2 = pca_model.transform(embeddings2)
-            embed1 = sklearn.preprocessing.normalize(embed1)
-            embed2 = sklearn.preprocessing.normalize(embed2)
-            diff = np.subtract(embed1, embed2)
-            dist = np.sum(np.square(diff), 1)
-
-        # Find the best threshold for the fold
-        acc_train = np.zeros((nrof_thresholds))
-        for threshold_idx, threshold in enumerate(thresholds):
-            _, _, acc_train[threshold_idx] = calculate_accuracy(
-                threshold, dist[train_set], actual_issame[train_set])
-        best_threshold_index = np.argmax(acc_train)
-        for threshold_idx, threshold in enumerate(thresholds):
-            tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(
-                threshold, dist[test_set],
-                actual_issame[test_set])
-        _, _, accuracy[fold_idx] = calculate_accuracy(
-            thresholds[best_threshold_index], dist[test_set],
-            actual_issame[test_set])
-
-    tpr = np.mean(tprs, 0)
-    fpr = np.mean(fprs, 0)
-    return tpr, fpr, accuracy
-
-
-def calculate_accuracy(threshold, dist, actual_issame):
-    predict_issame = np.less(dist, threshold)
-    tp = np.sum(np.logical_and(predict_issame, actual_issame))
-    fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
-    tn = np.sum(
-        np.logical_and(np.logical_not(predict_issame),
-                       np.logical_not(actual_issame)))
-    fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
-
-    tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)
-    fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)
-    acc = float(tp + tn) / dist.size
-    return tpr, fpr, acc
-
-
-def calculate_val(thresholds,
-                  embeddings1,
-                  embeddings2,
-                  actual_issame,
-                  far_target,
-                  nrof_folds=10):
-    assert (embeddings1.shape[0] == embeddings2.shape[0])
-    assert (embeddings1.shape[1] == embeddings2.shape[1])
-    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
-    nrof_thresholds = len(thresholds)
-    k_fold = LFold(n_splits=nrof_folds, shuffle=False)
-
-    val = np.zeros(nrof_folds)
-    far = np.zeros(nrof_folds)
-
-    diff = np.subtract(embeddings1, embeddings2)
-    dist = np.sum(np.square(diff), 1)
-    indices = np.arange(nrof_pairs)
-
-    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
-
-        # Find the threshold that gives FAR = far_target
-        far_train = np.zeros(nrof_thresholds)
-        for threshold_idx, threshold in enumerate(thresholds):
-            _, far_train[threshold_idx] = calculate_val_far(
-                threshold, dist[train_set], actual_issame[train_set])
-        if np.max(far_train) >= far_target:
-            f = interpolate.interp1d(far_train, thresholds, kind='slinear')
-            threshold = f(far_target)
-        else:
-            threshold = 0.0
-
-        val[fold_idx], far[fold_idx] = calculate_val_far(
-            threshold, dist[test_set], actual_issame[test_set])
-
-    val_mean = np.mean(val)
-    far_mean = np.mean(far)
-    val_std = np.std(val)
-    return val_mean, val_std, far_mean
-
-
-def calculate_val_far(threshold, dist, actual_issame):
-    predict_issame = np.less(dist, threshold)
-    true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
-    false_accept = np.sum(
-        np.logical_and(predict_issame, np.logical_not(actual_issame)))
-    n_same = np.sum(actual_issame)
-    n_diff = np.sum(np.logical_not(actual_issame))
-    # print(true_accept, false_accept)
-    # print(n_same, n_diff)
-    val = float(true_accept) / float(n_same)
-    far = float(false_accept) / float(n_diff)
-    return val, far
-
-
-def evaluate(embeddings, actual_issame, nrof_folds=10, pca=0):
-    # Calculate evaluation metrics
-    thresholds = np.arange(0, 4, 0.01)
-    embeddings1 = embeddings[0::2]
-    embeddings2 = embeddings[1::2]
-    tpr, fpr, accuracy = calculate_roc(thresholds,
-                                       embeddings1,
-                                       embeddings2,
-                                       np.asarray(actual_issame),
-                                       nrof_folds=nrof_folds,
-                                       pca=pca)
-    thresholds = np.arange(0, 4, 0.001)
-    val, val_std, far = calculate_val(thresholds,
-                                      embeddings1,
-                                      embeddings2,
-                                      np.asarray(actual_issame),
-                                      1e-3,
-                                      nrof_folds=nrof_folds)
-    return tpr, fpr, accuracy, val, val_std, far
-
-@torch.no_grad()
-def load_bin(path, image_size):
-    try:
-        with open(path, 'rb') as f:
-            bins, issame_list = pickle.load(f)  # py2
-    except UnicodeDecodeError as e:
-        with open(path, 'rb') as f:
-            bins, issame_list = pickle.load(f, encoding='bytes')  # py3
-    data_list = []
-    for flip in [0, 1]:
-        data = torch.empty((len(issame_list) * 2, 3, image_size[0], image_size[1]))
-        data_list.append(data)
-    for idx in range(len(issame_list) * 2):
-        _bin = bins[idx]
-        img = mx.image.imdecode(_bin)
-        if img.shape[1] != image_size[0]:
-            img = mx.image.resize_short(img, image_size[0])
-        img = nd.transpose(img, axes=(2, 0, 1))
-        for flip in [0, 1]:
-            if flip == 1:
-                img = mx.ndarray.flip(data=img, axis=2)
-            data_list[flip][idx][:] = torch.from_numpy(img.asnumpy())
-        if idx % 1000 == 0:
-            print('loading bin', idx)
-    print(data_list[0].shape)
-    return data_list, issame_list
-
-@torch.no_grad()
-def test(data_set, backbone, batch_size, nfolds=10):
-    print('testing verification..')
-    data_list = data_set[0]
-    issame_list = data_set[1]
-    embeddings_list = []
-    time_consumed = 0.0
-    for i in range(len(data_list)):
-        data = data_list[i]
-        embeddings = None
-        ba = 0
-        while ba < data.shape[0]:
-            bb = min(ba + batch_size, data.shape[0])
-            count = bb - ba
-            _data = data[bb - batch_size: bb]
-            time0 = datetime.datetime.now()
-            img = ((_data / 255) - 0.5) / 0.5
-            net_out: torch.Tensor = backbone(img)
-            _embeddings = net_out.detach().cpu().numpy()
-            time_now = datetime.datetime.now()
-            diff = time_now - time0
-            time_consumed += diff.total_seconds()
-            if embeddings is None:
-                embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
-            embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
-            ba = bb
-        embeddings_list.append(embeddings)
-
-    _xnorm = 0.0
-    _xnorm_cnt = 0
-    for embed in embeddings_list:
-        for i in range(embed.shape[0]):
-            _em = embed[i]
-            _norm = np.linalg.norm(_em)
-            _xnorm += _norm
-            _xnorm_cnt += 1
-    _xnorm /= _xnorm_cnt
-
-    acc1 = 0.0
-    std1 = 0.0
-    embeddings = embeddings_list[0] + embeddings_list[1]
-    embeddings = sklearn.preprocessing.normalize(embeddings)
-    print(embeddings.shape)
-    print('infer time', time_consumed)
-    _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds)
-    acc2, std2 = np.mean(accuracy), np.std(accuracy)
-    return acc1, std1, acc2, std2, _xnorm, embeddings_list
-
-
-def dumpR(data_set,
-          backbone,
-          batch_size,
-          name='',
-          data_extra=None,
-          label_shape=None):
-    print('dump verification embedding..')
-    data_list = data_set[0]
-    issame_list = data_set[1]
-    embeddings_list = []
-    time_consumed = 0.0
-    for i in range(len(data_list)):
-        data = data_list[i]
-        embeddings = None
-        ba = 0
-        while ba < data.shape[0]:
-            bb = min(ba + batch_size, data.shape[0])
-            count = bb - ba
-
-            _data = nd.slice_axis(data, axis=0, begin=bb - batch_size, end=bb)
-            time0 = datetime.datetime.now()
-            if data_extra is None:
-                db = mx.io.DataBatch(data=(_data,), label=(_label,))
-            else:
-                db = mx.io.DataBatch(data=(_data, _data_extra),
-                                     label=(_label,))
-            model.forward(db, is_train=False)
-            net_out = model.get_outputs()
-            _embeddings = net_out[0].asnumpy()
-            time_now = datetime.datetime.now()
-            diff = time_now - time0
-            time_consumed += diff.total_seconds()
-            if embeddings is None:
-                embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
-            embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
-            ba = bb
-        embeddings_list.append(embeddings)
-    embeddings = embeddings_list[0] + embeddings_list[1]
-    embeddings = sklearn.preprocessing.normalize(embeddings)
-    actual_issame = np.asarray(issame_list)
-    outname = os.path.join('temp.bin')
-    with open(outname, 'wb') as f:
-        pickle.dump((embeddings, issame_list),
-                    f,
-                    protocol=pickle.HIGHEST_PROTOCOL)
-
-
-# if __name__ == '__main__':
-#
-#     parser = argparse.ArgumentParser(description='do verification')
-#     # general
-#     parser.add_argument('--data-dir', default='', help='')
-#     parser.add_argument('--model',
-#                         default='../model/softmax,50',
-#                         help='path to load model.')
-#     parser.add_argument('--target',
-#                         default='lfw,cfp_ff,cfp_fp,agedb_30',
-#                         help='test targets.')
-#     parser.add_argument('--gpu', default=0, type=int, help='gpu id')
-#     parser.add_argument('--batch-size', default=32, type=int, help='')
-#     parser.add_argument('--max', default='', type=str, help='')
-#     parser.add_argument('--mode', default=0, type=int, help='')
-#     parser.add_argument('--nfolds', default=10, type=int, help='')
-#     args = parser.parse_args()
-#     image_size = [112, 112]
-#     print('image_size', image_size)
-#     ctx = mx.gpu(args.gpu)
-#     nets = []
-#     vec = args.model.split(',')
-#     prefix = args.model.split(',')[0]
-#     epochs = []
-#     if len(vec) == 1:
-#         pdir = os.path.dirname(prefix)
-#         for fname in os.listdir(pdir):
-#             if not fname.endswith('.params'):
-#                 continue
-#             _file = os.path.join(pdir, fname)
-#             if _file.startswith(prefix):
-#                 epoch = int(fname.split('.')[0].split('-')[1])
-#                 epochs.append(epoch)
-#         epochs = sorted(epochs, reverse=True)
-#         if len(args.max) > 0:
-#             _max = [int(x) for x in args.max.split(',')]
-#             assert len(_max) == 2
-#             if len(epochs) > _max[1]:
-#                 epochs = epochs[_max[0]:_max[1]]
-#
-#     else:
-#         epochs = [int(x) for x in vec[1].split('|')]
-#     print('model number', len(epochs))
-#     time0 = datetime.datetime.now()
-#     for epoch in epochs:
-#         print('loading', prefix, epoch)
-#         sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
-#         # arg_params, aux_params = ch_dev(arg_params, aux_params, ctx)
-#         all_layers = sym.get_internals()
-#         sym = all_layers['fc1_output']
-#         model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
-#         # model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
-#         model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0],
-#                                           image_size[1]))])
-#         model.set_params(arg_params, aux_params)
-#         nets.append(model)
-#     time_now = datetime.datetime.now()
-#     diff = time_now - time0
-#     print('model loading time', diff.total_seconds())
-#
-#     ver_list = []
-#     ver_name_list = []
-#     for name in args.target.split(','):
-#         path = os.path.join(args.data_dir, name + ".bin")
-#         if os.path.exists(path):
-#             print('loading.. ', name)
-#             data_set = load_bin(path, image_size)
-#             ver_list.append(data_set)
-#             ver_name_list.append(name)
-#
-#     if args.mode == 0:
-#         for i in range(len(ver_list)):
-#             results = []
-#             for model in nets:
-#                 acc1, std1, acc2, std2, xnorm, embeddings_list = test(
-#                     ver_list[i], model, args.batch_size, args.nfolds)
-#                 print('[%s]XNorm: %f' % (ver_name_list[i], xnorm))
-#                 print('[%s]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], acc1, std1))
-#                 print('[%s]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], acc2, std2))
-#                 results.append(acc2)
-#             print('Max of [%s] is %1.5f' % (ver_name_list[i], np.max(results)))
-#     elif args.mode == 1:
-#         raise ValueError
-#     else:
-#         model = nets[0]
-#         dumpR(ver_list[0], model, args.batch_size, args.target)
diff --git a/src/src/face3d/models/arcface_torch/eval_ijbc.py b/src/src/face3d/models/arcface_torch/eval_ijbc.py
deleted file mode 100644
index 9c5a650d486d18eb02d6f60d448fc3b315261f5d..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/eval_ijbc.py
+++ /dev/null
@@ -1,483 +0,0 @@
-# coding: utf-8
-
-import os
-import pickle
-
-import matplotlib
-import pandas as pd
-
-matplotlib.use('Agg')
-import matplotlib.pyplot as plt
-import timeit
-import sklearn
-import argparse
-import cv2
-import numpy as np
-import torch
-from skimage import transform as trans
-from backbones import get_model
-from sklearn.metrics import roc_curve, auc
-
-from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap
-from prettytable import PrettyTable
-from pathlib import Path
-
-import sys
-import warnings
-
-sys.path.insert(0, "../")
-warnings.filterwarnings("ignore")
-
-parser = argparse.ArgumentParser(description='do ijb test')
-# general
-parser.add_argument('--model-prefix', default='', help='path to load model.')
-parser.add_argument('--image-path', default='', type=str, help='')
-parser.add_argument('--result-dir', default='.', type=str, help='')
-parser.add_argument('--batch-size', default=128, type=int, help='')
-parser.add_argument('--network', default='iresnet50', type=str, help='')
-parser.add_argument('--job', default='insightface', type=str, help='job name')
-parser.add_argument('--target', default='IJBC', type=str, help='target, set to IJBC or IJBB')
-args = parser.parse_args()
-
-target = args.target
-model_path = args.model_prefix
-image_path = args.image_path
-result_dir = args.result_dir
-gpu_id = None
-use_norm_score = True  # if Ture, TestMode(N1)
-use_detector_score = True  # if Ture, TestMode(D1)
-use_flip_test = True  # if Ture, TestMode(F1)
-job = args.job
-batch_size = args.batch_size
-
-
-class Embedding(object):
-    def __init__(self, prefix, data_shape, batch_size=1):
-        image_size = (112, 112)
-        self.image_size = image_size
-        weight = torch.load(prefix)
-        resnet = get_model(args.network, dropout=0, fp16=False).cuda()
-        resnet.load_state_dict(weight)
-        model = torch.nn.DataParallel(resnet)
-        self.model = model
-        self.model.eval()
-        src = np.array([
-            [30.2946, 51.6963],
-            [65.5318, 51.5014],
-            [48.0252, 71.7366],
-            [33.5493, 92.3655],
-            [62.7299, 92.2041]], dtype=np.float32)
-        src[:, 0] += 8.0
-        self.src = src
-        self.batch_size = batch_size
-        self.data_shape = data_shape
-
-    def get(self, rimg, landmark):
-
-        assert landmark.shape[0] == 68 or landmark.shape[0] == 5
-        assert landmark.shape[1] == 2
-        if landmark.shape[0] == 68:
-            landmark5 = np.zeros((5, 2), dtype=np.float32)
-            landmark5[0] = (landmark[36] + landmark[39]) / 2
-            landmark5[1] = (landmark[42] + landmark[45]) / 2
-            landmark5[2] = landmark[30]
-            landmark5[3] = landmark[48]
-            landmark5[4] = landmark[54]
-        else:
-            landmark5 = landmark
-        tform = trans.SimilarityTransform()
-        tform.estimate(landmark5, self.src)
-        M = tform.params[0:2, :]
-        img = cv2.warpAffine(rimg,
-                             M, (self.image_size[1], self.image_size[0]),
-                             borderValue=0.0)
-        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-        img_flip = np.fliplr(img)
-        img = np.transpose(img, (2, 0, 1))  # 3*112*112, RGB
-        img_flip = np.transpose(img_flip, (2, 0, 1))
-        input_blob = np.zeros((2, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8)
-        input_blob[0] = img
-        input_blob[1] = img_flip
-        return input_blob
-
-    @torch.no_grad()
-    def forward_db(self, batch_data):
-        imgs = torch.Tensor(batch_data).cuda()
-        imgs.div_(255).sub_(0.5).div_(0.5)
-        feat = self.model(imgs)
-        feat = feat.reshape([self.batch_size, 2 * feat.shape[1]])
-        return feat.cpu().numpy()
-
-
-# 将一个list尽量均分成n份，限制len(list)==n，份数大于原list内元素个数则分配空list[]
-def divideIntoNstrand(listTemp, n):
-    twoList = [[] for i in range(n)]
-    for i, e in enumerate(listTemp):
-        twoList[i % n].append(e)
-    return twoList
-
-
-def read_template_media_list(path):
-    # ijb_meta = np.loadtxt(path, dtype=str)
-    ijb_meta = pd.read_csv(path, sep=' ', header=None).values
-    templates = ijb_meta[:, 1].astype(np.int)
-    medias = ijb_meta[:, 2].astype(np.int)
-    return templates, medias
-
-
-# In[ ]:
-
-
-def read_template_pair_list(path):
-    # pairs = np.loadtxt(path, dtype=str)
-    pairs = pd.read_csv(path, sep=' ', header=None).values
-    # print(pairs.shape)
-    # print(pairs[:, 0].astype(np.int))
-    t1 = pairs[:, 0].astype(np.int)
-    t2 = pairs[:, 1].astype(np.int)
-    label = pairs[:, 2].astype(np.int)
-    return t1, t2, label
-
-
-# In[ ]:
-
-
-def read_image_feature(path):
-    with open(path, 'rb') as fid:
-        img_feats = pickle.load(fid)
-    return img_feats
-
-
-# In[ ]:
-
-
-def get_image_feature(img_path, files_list, model_path, epoch, gpu_id):
-    batch_size = args.batch_size
-    data_shape = (3, 112, 112)
-
-    files = files_list
-    print('files:', len(files))
-    rare_size = len(files) % batch_size
-    faceness_scores = []
-    batch = 0
-    img_feats = np.empty((len(files), 1024), dtype=np.float32)
-
-    batch_data = np.empty((2 * batch_size, 3, 112, 112))
-    embedding = Embedding(model_path, data_shape, batch_size)
-    for img_index, each_line in enumerate(files[:len(files) - rare_size]):
-        name_lmk_score = each_line.strip().split(' ')
-        img_name = os.path.join(img_path, name_lmk_score[0])
-        img = cv2.imread(img_name)
-        lmk = np.array([float(x) for x in name_lmk_score[1:-1]],
-                       dtype=np.float32)
-        lmk = lmk.reshape((5, 2))
-        input_blob = embedding.get(img, lmk)
-
-        batch_data[2 * (img_index - batch * batch_size)][:] = input_blob[0]
-        batch_data[2 * (img_index - batch * batch_size) + 1][:] = input_blob[1]
-        if (img_index + 1) % batch_size == 0:
-            print('batch', batch)
-            img_feats[batch * batch_size:batch * batch_size +
-                                         batch_size][:] = embedding.forward_db(batch_data)
-            batch += 1
-        faceness_scores.append(name_lmk_score[-1])
-
-    batch_data = np.empty((2 * rare_size, 3, 112, 112))
-    embedding = Embedding(model_path, data_shape, rare_size)
-    for img_index, each_line in enumerate(files[len(files) - rare_size:]):
-        name_lmk_score = each_line.strip().split(' ')
-        img_name = os.path.join(img_path, name_lmk_score[0])
-        img = cv2.imread(img_name)
-        lmk = np.array([float(x) for x in name_lmk_score[1:-1]],
-                       dtype=np.float32)
-        lmk = lmk.reshape((5, 2))
-        input_blob = embedding.get(img, lmk)
-        batch_data[2 * img_index][:] = input_blob[0]
-        batch_data[2 * img_index + 1][:] = input_blob[1]
-        if (img_index + 1) % rare_size == 0:
-            print('batch', batch)
-            img_feats[len(files) -
-                      rare_size:][:] = embedding.forward_db(batch_data)
-            batch += 1
-        faceness_scores.append(name_lmk_score[-1])
-    faceness_scores = np.array(faceness_scores).astype(np.float32)
-    # img_feats = np.ones( (len(files), 1024), dtype=np.float32) * 0.01
-    # faceness_scores = np.ones( (len(files), ), dtype=np.float32 )
-    return img_feats, faceness_scores
-
-
-# In[ ]:
-
-
-def image2template_feature(img_feats=None, templates=None, medias=None):
-    # ==========================================================
-    # 1. face image feature l2 normalization. img_feats:[number_image x feats_dim]
-    # 2. compute media feature.
-    # 3. compute template feature.
-    # ==========================================================
-    unique_templates = np.unique(templates)
-    template_feats = np.zeros((len(unique_templates), img_feats.shape[1]))
-
-    for count_template, uqt in enumerate(unique_templates):
-
-        (ind_t,) = np.where(templates == uqt)
-        face_norm_feats = img_feats[ind_t]
-        face_medias = medias[ind_t]
-        unique_medias, unique_media_counts = np.unique(face_medias,
-                                                       return_counts=True)
-        media_norm_feats = []
-        for u, ct in zip(unique_medias, unique_media_counts):
-            (ind_m,) = np.where(face_medias == u)
-            if ct == 1:
-                media_norm_feats += [face_norm_feats[ind_m]]
-            else:  # image features from the same video will be aggregated into one feature
-                media_norm_feats += [
-                    np.mean(face_norm_feats[ind_m], axis=0, keepdims=True)
-                ]
-        media_norm_feats = np.array(media_norm_feats)
-        # media_norm_feats = media_norm_feats / np.sqrt(np.sum(media_norm_feats ** 2, -1, keepdims=True))
-        template_feats[count_template] = np.sum(media_norm_feats, axis=0)
-        if count_template % 2000 == 0:
-            print('Finish Calculating {} template features.'.format(
-                count_template))
-    # template_norm_feats = template_feats / np.sqrt(np.sum(template_feats ** 2, -1, keepdims=True))
-    template_norm_feats = sklearn.preprocessing.normalize(template_feats)
-    # print(template_norm_feats.shape)
-    return template_norm_feats, unique_templates
-
-
-# In[ ]:
-
-
-def verification(template_norm_feats=None,
-                 unique_templates=None,
-                 p1=None,
-                 p2=None):
-    # ==========================================================
-    #         Compute set-to-set Similarity Score.
-    # ==========================================================
-    template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int)
-    for count_template, uqt in enumerate(unique_templates):
-        template2id[uqt] = count_template
-
-    score = np.zeros((len(p1),))  # save cosine distance between pairs
-
-    total_pairs = np.array(range(len(p1)))
-    batchsize = 100000  # small batchsize instead of all pairs in one batch due to the memory limiation
-    sublists = [
-        total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)
-    ]
-    total_sublists = len(sublists)
-    for c, s in enumerate(sublists):
-        feat1 = template_norm_feats[template2id[p1[s]]]
-        feat2 = template_norm_feats[template2id[p2[s]]]
-        similarity_score = np.sum(feat1 * feat2, -1)
-        score[s] = similarity_score.flatten()
-        if c % 10 == 0:
-            print('Finish {}/{} pairs.'.format(c, total_sublists))
-    return score
-
-
-# In[ ]:
-def verification2(template_norm_feats=None,
-                  unique_templates=None,
-                  p1=None,
-                  p2=None):
-    template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int)
-    for count_template, uqt in enumerate(unique_templates):
-        template2id[uqt] = count_template
-    score = np.zeros((len(p1),))  # save cosine distance between pairs
-    total_pairs = np.array(range(len(p1)))
-    batchsize = 100000  # small batchsize instead of all pairs in one batch due to the memory limiation
-    sublists = [
-        total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)
-    ]
-    total_sublists = len(sublists)
-    for c, s in enumerate(sublists):
-        feat1 = template_norm_feats[template2id[p1[s]]]
-        feat2 = template_norm_feats[template2id[p2[s]]]
-        similarity_score = np.sum(feat1 * feat2, -1)
-        score[s] = similarity_score.flatten()
-        if c % 10 == 0:
-            print('Finish {}/{} pairs.'.format(c, total_sublists))
-    return score
-
-
-def read_score(path):
-    with open(path, 'rb') as fid:
-        img_feats = pickle.load(fid)
-    return img_feats
-
-
-# # Step1: Load Meta Data
-
-# In[ ]:
-
-assert target == 'IJBC' or target == 'IJBB'
-
-# =============================================================
-# load image and template relationships for template feature embedding
-# tid --> template id,  mid --> media id
-# format:
-#           image_name tid mid
-# =============================================================
-start = timeit.default_timer()
-templates, medias = read_template_media_list(
-    os.path.join('%s/meta' % image_path,
-                 '%s_face_tid_mid.txt' % target.lower()))
-stop = timeit.default_timer()
-print('Time: %.2f s. ' % (stop - start))
-
-# In[ ]:
-
-# =============================================================
-# load template pairs for template-to-template verification
-# tid : template id,  label : 1/0
-# format:
-#           tid_1 tid_2 label
-# =============================================================
-start = timeit.default_timer()
-p1, p2, label = read_template_pair_list(
-    os.path.join('%s/meta' % image_path,
-                 '%s_template_pair_label.txt' % target.lower()))
-stop = timeit.default_timer()
-print('Time: %.2f s. ' % (stop - start))
-
-# # Step 2: Get Image Features
-
-# In[ ]:
-
-# =============================================================
-# load image features
-# format:
-#           img_feats: [image_num x feats_dim] (227630, 512)
-# =============================================================
-start = timeit.default_timer()
-img_path = '%s/loose_crop' % image_path
-img_list_path = '%s/meta/%s_name_5pts_score.txt' % (image_path, target.lower())
-img_list = open(img_list_path)
-files = img_list.readlines()
-# files_list = divideIntoNstrand(files, rank_size)
-files_list = files
-
-# img_feats
-# for i in range(rank_size):
-img_feats, faceness_scores = get_image_feature(img_path, files_list,
-                                               model_path, 0, gpu_id)
-stop = timeit.default_timer()
-print('Time: %.2f s. ' % (stop - start))
-print('Feature Shape: ({} , {}) .'.format(img_feats.shape[0],
-                                          img_feats.shape[1]))
-
-# # Step3: Get Template Features
-
-# In[ ]:
-
-# =============================================================
-# compute template features from image features.
-# =============================================================
-start = timeit.default_timer()
-# ==========================================================
-# Norm feature before aggregation into template feature?
-# Feature norm from embedding network and faceness score are able to decrease weights for noise samples (not face).
-# ==========================================================
-# 1. FaceScore （Feature Norm）
-# 2. FaceScore （Detector）
-
-if use_flip_test:
-    # concat --- F1
-    # img_input_feats = img_feats
-    # add --- F2
-    img_input_feats = img_feats[:, 0:img_feats.shape[1] //
-                                     2] + img_feats[:, img_feats.shape[1] // 2:]
-else:
-    img_input_feats = img_feats[:, 0:img_feats.shape[1] // 2]
-
-if use_norm_score:
-    img_input_feats = img_input_feats
-else:
-    # normalise features to remove norm information
-    img_input_feats = img_input_feats / np.sqrt(
-        np.sum(img_input_feats ** 2, -1, keepdims=True))
-
-if use_detector_score:
-    print(img_input_feats.shape, faceness_scores.shape)
-    img_input_feats = img_input_feats * faceness_scores[:, np.newaxis]
-else:
-    img_input_feats = img_input_feats
-
-template_norm_feats, unique_templates = image2template_feature(
-    img_input_feats, templates, medias)
-stop = timeit.default_timer()
-print('Time: %.2f s. ' % (stop - start))
-
-# # Step 4: Get Template Similarity Scores
-
-# In[ ]:
-
-# =============================================================
-# compute verification scores between template pairs.
-# =============================================================
-start = timeit.default_timer()
-score = verification(template_norm_feats, unique_templates, p1, p2)
-stop = timeit.default_timer()
-print('Time: %.2f s. ' % (stop - start))
-
-# In[ ]:
-save_path = os.path.join(result_dir, args.job)
-# save_path = result_dir + '/%s_result' % target
-
-if not os.path.exists(save_path):
-    os.makedirs(save_path)
-
-score_save_file = os.path.join(save_path, "%s.npy" % target.lower())
-np.save(score_save_file, score)
-
-# # Step 5: Get ROC Curves and TPR@FPR Table
-
-# In[ ]:
-
-files = [score_save_file]
-methods = []
-scores = []
-for file in files:
-    methods.append(Path(file).stem)
-    scores.append(np.load(file))
-
-methods = np.array(methods)
-scores = dict(zip(methods, scores))
-colours = dict(
-    zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2')))
-x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1]
-tpr_fpr_table = PrettyTable(['Methods'] + [str(x) for x in x_labels])
-fig = plt.figure()
-for method in methods:
-    fpr, tpr, _ = roc_curve(label, scores[method])
-    roc_auc = auc(fpr, tpr)
-    fpr = np.flipud(fpr)
-    tpr = np.flipud(tpr)  # select largest tpr at same fpr
-    plt.plot(fpr,
-             tpr,
-             color=colours[method],
-             lw=1,
-             label=('[%s (AUC = %0.4f %%)]' %
-                    (method.split('-')[-1], roc_auc * 100)))
-    tpr_fpr_row = []
-    tpr_fpr_row.append("%s-%s" % (method, target))
-    for fpr_iter in np.arange(len(x_labels)):
-        _, min_index = min(
-            list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr)))))
-        tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100))
-    tpr_fpr_table.add_row(tpr_fpr_row)
-plt.xlim([10 ** -6, 0.1])
-plt.ylim([0.3, 1.0])
-plt.grid(linestyle='--', linewidth=1)
-plt.xticks(x_labels)
-plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True))
-plt.xscale('log')
-plt.xlabel('False Positive Rate')
-plt.ylabel('True Positive Rate')
-plt.title('ROC on IJB')
-plt.legend(loc="lower right")
-fig.savefig(os.path.join(save_path, '%s.pdf' % target.lower()))
-print(tpr_fpr_table)
diff --git a/src/src/face3d/models/arcface_torch/inference.py b/src/src/face3d/models/arcface_torch/inference.py
deleted file mode 100644
index 3e5156e8d649954837e397c2ff15ec29995e7502..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/inference.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import argparse
-
-import cv2
-import numpy as np
-import torch
-
-from backbones import get_model
-
-
-@torch.no_grad()
-def inference(weight, name, img):
-    if img is None:
-        img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.uint8)
-    else:
-        img = cv2.imread(img)
-        img = cv2.resize(img, (112, 112))
-
-    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-    img = np.transpose(img, (2, 0, 1))
-    img = torch.from_numpy(img).unsqueeze(0).float()
-    img.div_(255).sub_(0.5).div_(0.5)
-    net = get_model(name, fp16=False)
-    net.load_state_dict(torch.load(weight))
-    net.eval()
-    feat = net(img).numpy()
-    print(feat)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='PyTorch ArcFace Training')
-    parser.add_argument('--network', type=str, default='r50', help='backbone network')
-    parser.add_argument('--weight', type=str, default='')
-    parser.add_argument('--img', type=str, default=None)
-    args = parser.parse_args()
-    inference(args.weight, args.network, args.img)
diff --git a/src/src/face3d/models/arcface_torch/losses.py b/src/src/face3d/models/arcface_torch/losses.py
deleted file mode 100644
index 87aeaa107af4d53f5a6132b3739d5cafdcded7fc..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/losses.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import torch
-from torch import nn
-
-
-def get_loss(name):
-    if name == "cosface":
-        return CosFace()
-    elif name == "arcface":
-        return ArcFace()
-    else:
-        raise ValueError()
-
-
-class CosFace(nn.Module):
-    def __init__(self, s=64.0, m=0.40):
-        super(CosFace, self).__init__()
-        self.s = s
-        self.m = m
-
-    def forward(self, cosine, label):
-        index = torch.where(label != -1)[0]
-        m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device)
-        m_hot.scatter_(1, label[index, None], self.m)
-        cosine[index] -= m_hot
-        ret = cosine * self.s
-        return ret
-
-
-class ArcFace(nn.Module):
-    def __init__(self, s=64.0, m=0.5):
-        super(ArcFace, self).__init__()
-        self.s = s
-        self.m = m
-
-    def forward(self, cosine: torch.Tensor, label):
-        index = torch.where(label != -1)[0]
-        m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device)
-        m_hot.scatter_(1, label[index, None], self.m)
-        cosine.acos_()
-        cosine[index] += m_hot
-        cosine.cos_().mul_(self.s)
-        return cosine
diff --git a/src/src/face3d/models/arcface_torch/onnx_helper.py b/src/src/face3d/models/arcface_torch/onnx_helper.py
deleted file mode 100644
index ca922ca6d410655029e459cf8fd1c323d276c34c..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/onnx_helper.py
+++ /dev/null
@@ -1,250 +0,0 @@
-from __future__ import division
-import datetime
-import os
-import os.path as osp
-import glob
-import numpy as np
-import cv2
-import sys
-import onnxruntime
-import onnx
-import argparse
-from onnx import numpy_helper
-from insightface.data import get_image
-
-class ArcFaceORT:
-    def __init__(self, model_path, cpu=False):
-        self.model_path = model_path
-        # providers = None will use available provider, for onnxruntime-gpu it will be "CUDAExecutionProvider"
-        self.providers = ['CPUExecutionProvider'] if cpu else None
-
-    #input_size is (w,h), return error message, return None if success
-    def check(self, track='cfat', test_img = None):
-        #default is cfat
-        max_model_size_mb=1024
-        max_feat_dim=512
-        max_time_cost=15
-        if track.startswith('ms1m'):
-            max_model_size_mb=1024
-            max_feat_dim=512
-            max_time_cost=10
-        elif track.startswith('glint'):
-            max_model_size_mb=1024
-            max_feat_dim=1024
-            max_time_cost=20
-        elif track.startswith('cfat'):
-            max_model_size_mb = 1024
-            max_feat_dim = 512
-            max_time_cost = 15
-        elif track.startswith('unconstrained'):
-            max_model_size_mb=1024
-            max_feat_dim=1024
-            max_time_cost=30
-        else:
-            return "track not found"
-
-        if not os.path.exists(self.model_path):
-            return "model_path not exists"
-        if not os.path.isdir(self.model_path):
-            return "model_path should be directory"
-        onnx_files = []
-        for _file in os.listdir(self.model_path):
-            if _file.endswith('.onnx'):
-                onnx_files.append(osp.join(self.model_path, _file))
-        if len(onnx_files)==0:
-            return "do not have onnx files"
-        self.model_file = sorted(onnx_files)[-1]
-        print('use onnx-model:', self.model_file)
-        try:
-            session = onnxruntime.InferenceSession(self.model_file, providers=self.providers)
-        except:
-            return "load onnx failed"
-        input_cfg = session.get_inputs()[0]
-        input_shape = input_cfg.shape
-        print('input-shape:', input_shape)
-        if len(input_shape)!=4:
-            return "length of input_shape should be 4"
-        if not isinstance(input_shape[0], str):
-            #return "input_shape[0] should be str to support batch-inference"
-            print('reset input-shape[0] to None')
-            model = onnx.load(self.model_file)
-            model.graph.input[0].type.tensor_type.shape.dim[0].dim_param = 'None'
-            new_model_file = osp.join(self.model_path, 'zzzzrefined.onnx')
-            onnx.save(model, new_model_file)
-            self.model_file = new_model_file
-            print('use new onnx-model:', self.model_file)
-            try:
-                session = onnxruntime.InferenceSession(self.model_file, providers=self.providers)
-            except:
-                return "load onnx failed"
-            input_cfg = session.get_inputs()[0]
-            input_shape = input_cfg.shape
-            print('new-input-shape:', input_shape)
-
-        self.image_size = tuple(input_shape[2:4][::-1])
-        #print('image_size:', self.image_size)
-        input_name = input_cfg.name
-        outputs = session.get_outputs()
-        output_names = []
-        for o in outputs:
-            output_names.append(o.name)
-            #print(o.name, o.shape)
-        if len(output_names)!=1:
-            return "number of output nodes should be 1"
-        self.session = session
-        self.input_name = input_name
-        self.output_names = output_names
-        #print(self.output_names)
-        model = onnx.load(self.model_file)
-        graph = model.graph
-        if len(graph.node)<8:
-            return "too small onnx graph"
-
-        input_size = (112,112)
-        self.crop = None
-        if track=='cfat':
-            crop_file = osp.join(self.model_path, 'crop.txt')
-            if osp.exists(crop_file):
-                lines = open(crop_file,'r').readlines()
-                if len(lines)!=6:
-                    return "crop.txt should contain 6 lines"
-                lines = [int(x) for x in lines]
-                self.crop = lines[:4]
-                input_size = tuple(lines[4:6])
-        if input_size!=self.image_size:
-            return "input-size is inconsistant with onnx model input, %s vs %s"%(input_size, self.image_size)
-
-        self.model_size_mb = os.path.getsize(self.model_file) / float(1024*1024)
-        if self.model_size_mb > max_model_size_mb:
-            return "max model size exceed, given %.3f-MB"%self.model_size_mb
-
-        input_mean = None
-        input_std = None
-        if track=='cfat':
-            pn_file = osp.join(self.model_path, 'pixel_norm.txt')
-            if osp.exists(pn_file):
-                lines = open(pn_file,'r').readlines()
-                if len(lines)!=2:
-                    return "pixel_norm.txt should contain 2 lines"
-                input_mean = float(lines[0])
-                input_std = float(lines[1])
-        if input_mean is not None or input_std is not None:
-            if input_mean is None or input_std is None:
-                return "please set input_mean and input_std simultaneously"
-        else:
-            find_sub = False
-            find_mul = False
-            for nid, node in enumerate(graph.node[:8]):
-                print(nid, node.name)
-                if node.name.startswith('Sub') or node.name.startswith('_minus'):
-                    find_sub = True
-                if node.name.startswith('Mul') or node.name.startswith('_mul') or node.name.startswith('Div'):
-                    find_mul = True
-            if find_sub and find_mul:
-                print("find sub and mul")
-                #mxnet arcface model
-                input_mean = 0.0
-                input_std = 1.0
-            else:
-                input_mean = 127.5
-                input_std = 127.5
-        self.input_mean = input_mean
-        self.input_std = input_std
-        for initn in graph.initializer:
-            weight_array = numpy_helper.to_array(initn)
-            dt = weight_array.dtype
-            if dt.itemsize<4:
-                return 'invalid weight type - (%s:%s)' % (initn.name, dt.name)
-        if test_img is None:
-            test_img = get_image('Tom_Hanks_54745')
-            test_img = cv2.resize(test_img, self.image_size)
-        else:
-            test_img = cv2.resize(test_img, self.image_size)
-        feat, cost = self.benchmark(test_img)
-        batch_result = self.check_batch(test_img)
-        batch_result_sum = float(np.sum(batch_result))
-        if batch_result_sum in [float('inf'), -float('inf')] or batch_result_sum != batch_result_sum:
-            print(batch_result)
-            print(batch_result_sum)
-            return "batch result output contains NaN!"
-
-        if len(feat.shape) < 2:
-           return "the shape of the feature must be two, but get {}".format(str(feat.shape))
-
-        if feat.shape[1] > max_feat_dim:
-            return "max feat dim exceed, given %d"%feat.shape[1]
-        self.feat_dim = feat.shape[1]
-        cost_ms = cost*1000
-        if cost_ms>max_time_cost:
-            return "max time cost exceed, given %.4f"%cost_ms
-        self.cost_ms = cost_ms
-        print('check stat:, model-size-mb: %.4f, feat-dim: %d, time-cost-ms: %.4f, input-mean: %.3f, input-std: %.3f'%(self.model_size_mb, self.feat_dim, self.cost_ms, self.input_mean, self.input_std))
-        return None
-
-    def check_batch(self, img):
-        if not isinstance(img, list):
-            imgs = [img, ] * 32
-        if self.crop is not None:
-            nimgs = []
-            for img in imgs:
-                nimg = img[self.crop[1]:self.crop[3], self.crop[0]:self.crop[2], :]
-                if nimg.shape[0] != self.image_size[1] or nimg.shape[1] != self.image_size[0]:
-                    nimg = cv2.resize(nimg, self.image_size)
-                nimgs.append(nimg)
-            imgs = nimgs
-        blob = cv2.dnn.blobFromImages(
-            images=imgs, scalefactor=1.0 / self.input_std, size=self.image_size,
-            mean=(self.input_mean, self.input_mean, self.input_mean), swapRB=True)
-        net_out = self.session.run(self.output_names, {self.input_name: blob})[0]
-        return net_out
-
-
-    def meta_info(self):
-        return {'model-size-mb':self.model_size_mb, 'feature-dim':self.feat_dim, 'infer': self.cost_ms}
-
-
-    def forward(self, imgs):
-        if not isinstance(imgs, list):
-            imgs = [imgs]
-        input_size = self.image_size
-        if self.crop is not None:
-            nimgs = []
-            for img in imgs:
-                nimg = img[self.crop[1]:self.crop[3],self.crop[0]:self.crop[2],:]
-                if nimg.shape[0]!=input_size[1] or nimg.shape[1]!=input_size[0]:
-                    nimg = cv2.resize(nimg, input_size)
-                nimgs.append(nimg)
-            imgs = nimgs
-        blob = cv2.dnn.blobFromImages(imgs, 1.0/self.input_std, input_size, (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
-        net_out = self.session.run(self.output_names, {self.input_name : blob})[0]
-        return net_out
-
-    def benchmark(self, img):
-        input_size = self.image_size
-        if self.crop is not None:
-            nimg = img[self.crop[1]:self.crop[3],self.crop[0]:self.crop[2],:]
-            if nimg.shape[0]!=input_size[1] or nimg.shape[1]!=input_size[0]:
-                nimg = cv2.resize(nimg, input_size)
-            img = nimg
-        blob = cv2.dnn.blobFromImage(img, 1.0/self.input_std, input_size, (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
-        costs = []
-        for _ in range(50):
-            ta = datetime.datetime.now()
-            net_out = self.session.run(self.output_names, {self.input_name : blob})[0]
-            tb = datetime.datetime.now()
-            cost = (tb-ta).total_seconds()
-            costs.append(cost)
-        costs = sorted(costs)
-        cost = costs[5]
-        return net_out, cost
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='')
-    # general
-    parser.add_argument('workdir', help='submitted work dir', type=str)
-    parser.add_argument('--track', help='track name, for different challenge', type=str, default='cfat')
-    args = parser.parse_args()
-    handler = ArcFaceORT(args.workdir)
-    err = handler.check(args.track)
-    print('err:', err)
diff --git a/src/src/face3d/models/arcface_torch/onnx_ijbc.py b/src/src/face3d/models/arcface_torch/onnx_ijbc.py
deleted file mode 100644
index 05b50bfad4b4cf38903b89f596263a8e29a50d3e..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/onnx_ijbc.py
+++ /dev/null
@@ -1,267 +0,0 @@
-import argparse
-import os
-import pickle
-import timeit
-
-import cv2
-import mxnet as mx
-import numpy as np
-import pandas as pd
-import prettytable
-import skimage.transform
-from sklearn.metrics import roc_curve
-from sklearn.preprocessing import normalize
-
-from onnx_helper import ArcFaceORT
-
-SRC = np.array(
-    [
-        [30.2946, 51.6963],
-        [65.5318, 51.5014],
-        [48.0252, 71.7366],
-        [33.5493, 92.3655],
-        [62.7299, 92.2041]]
-    , dtype=np.float32)
-SRC[:, 0] += 8.0
-
-
-class AlignedDataSet(mx.gluon.data.Dataset):
-    def __init__(self, root, lines, align=True):
-        self.lines = lines
-        self.root = root
-        self.align = align
-
-    def __len__(self):
-        return len(self.lines)
-
-    def __getitem__(self, idx):
-        each_line = self.lines[idx]
-        name_lmk_score = each_line.strip().split(' ')
-        name = os.path.join(self.root, name_lmk_score[0])
-        img = cv2.cvtColor(cv2.imread(name), cv2.COLOR_BGR2RGB)
-        landmark5 = np.array([float(x) for x in name_lmk_score[1:-1]], dtype=np.float32).reshape((5, 2))
-        st = skimage.transform.SimilarityTransform()
-        st.estimate(landmark5, SRC)
-        img = cv2.warpAffine(img, st.params[0:2, :], (112, 112), borderValue=0.0)
-        img_1 = np.expand_dims(img, 0)
-        img_2 = np.expand_dims(np.fliplr(img), 0)
-        output = np.concatenate((img_1, img_2), axis=0).astype(np.float32)
-        output = np.transpose(output, (0, 3, 1, 2))
-        output = mx.nd.array(output)
-        return output
-
-
-def extract(model_root, dataset):
-    model = ArcFaceORT(model_path=model_root)
-    model.check()
-    feat_mat = np.zeros(shape=(len(dataset), 2 * model.feat_dim))
-
-    def batchify_fn(data):
-        return mx.nd.concat(*data, dim=0)
-
-    data_loader = mx.gluon.data.DataLoader(
-        dataset, 128, last_batch='keep', num_workers=4,
-        thread_pool=True, prefetch=16, batchify_fn=batchify_fn)
-    num_iter = 0
-    for batch in data_loader:
-        batch = batch.asnumpy()
-        batch = (batch - model.input_mean) / model.input_std
-        feat = model.session.run(model.output_names, {model.input_name: batch})[0]
-        feat = np.reshape(feat, (-1, model.feat_dim * 2))
-        feat_mat[128 * num_iter: 128 * num_iter + feat.shape[0], :] = feat
-        num_iter += 1
-        if num_iter % 50 == 0:
-            print(num_iter)
-    return feat_mat
-
-
-def read_template_media_list(path):
-    ijb_meta = pd.read_csv(path, sep=' ', header=None).values
-    templates = ijb_meta[:, 1].astype(np.int)
-    medias = ijb_meta[:, 2].astype(np.int)
-    return templates, medias
-
-
-def read_template_pair_list(path):
-    pairs = pd.read_csv(path, sep=' ', header=None).values
-    t1 = pairs[:, 0].astype(np.int)
-    t2 = pairs[:, 1].astype(np.int)
-    label = pairs[:, 2].astype(np.int)
-    return t1, t2, label
-
-
-def read_image_feature(path):
-    with open(path, 'rb') as fid:
-        img_feats = pickle.load(fid)
-    return img_feats
-
-
-def image2template_feature(img_feats=None,
-                           templates=None,
-                           medias=None):
-    unique_templates = np.unique(templates)
-    template_feats = np.zeros((len(unique_templates), img_feats.shape[1]))
-    for count_template, uqt in enumerate(unique_templates):
-        (ind_t,) = np.where(templates == uqt)
-        face_norm_feats = img_feats[ind_t]
-        face_medias = medias[ind_t]
-        unique_medias, unique_media_counts = np.unique(face_medias, return_counts=True)
-        media_norm_feats = []
-        for u, ct in zip(unique_medias, unique_media_counts):
-            (ind_m,) = np.where(face_medias == u)
-            if ct == 1:
-                media_norm_feats += [face_norm_feats[ind_m]]
-            else:  # image features from the same video will be aggregated into one feature
-                media_norm_feats += [np.mean(face_norm_feats[ind_m], axis=0, keepdims=True), ]
-        media_norm_feats = np.array(media_norm_feats)
-        template_feats[count_template] = np.sum(media_norm_feats, axis=0)
-        if count_template % 2000 == 0:
-            print('Finish Calculating {} template features.'.format(
-                count_template))
-    template_norm_feats = normalize(template_feats)
-    return template_norm_feats, unique_templates
-
-
-def verification(template_norm_feats=None,
-                 unique_templates=None,
-                 p1=None,
-                 p2=None):
-    template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int)
-    for count_template, uqt in enumerate(unique_templates):
-        template2id[uqt] = count_template
-    score = np.zeros((len(p1),))
-    total_pairs = np.array(range(len(p1)))
-    batchsize = 100000
-    sublists = [total_pairs[i: i + batchsize] for i in range(0, len(p1), batchsize)]
-    total_sublists = len(sublists)
-    for c, s in enumerate(sublists):
-        feat1 = template_norm_feats[template2id[p1[s]]]
-        feat2 = template_norm_feats[template2id[p2[s]]]
-        similarity_score = np.sum(feat1 * feat2, -1)
-        score[s] = similarity_score.flatten()
-        if c % 10 == 0:
-            print('Finish {}/{} pairs.'.format(c, total_sublists))
-    return score
-
-
-def verification2(template_norm_feats=None,
-                  unique_templates=None,
-                  p1=None,
-                  p2=None):
-    template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int)
-    for count_template, uqt in enumerate(unique_templates):
-        template2id[uqt] = count_template
-    score = np.zeros((len(p1),))  # save cosine distance between pairs
-    total_pairs = np.array(range(len(p1)))
-    batchsize = 100000  # small batchsize instead of all pairs in one batch due to the memory limiation
-    sublists = [total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)]
-    total_sublists = len(sublists)
-    for c, s in enumerate(sublists):
-        feat1 = template_norm_feats[template2id[p1[s]]]
-        feat2 = template_norm_feats[template2id[p2[s]]]
-        similarity_score = np.sum(feat1 * feat2, -1)
-        score[s] = similarity_score.flatten()
-        if c % 10 == 0:
-            print('Finish {}/{} pairs.'.format(c, total_sublists))
-    return score
-
-
-def main(args):
-    use_norm_score = True  # if Ture, TestMode(N1)
-    use_detector_score = True  # if Ture, TestMode(D1)
-    use_flip_test = True  # if Ture, TestMode(F1)
-    assert args.target == 'IJBC' or args.target == 'IJBB'
-
-    start = timeit.default_timer()
-    templates, medias = read_template_media_list(
-        os.path.join('%s/meta' % args.image_path, '%s_face_tid_mid.txt' % args.target.lower()))
-    stop = timeit.default_timer()
-    print('Time: %.2f s. ' % (stop - start))
-
-    start = timeit.default_timer()
-    p1, p2, label = read_template_pair_list(
-        os.path.join('%s/meta' % args.image_path,
-                     '%s_template_pair_label.txt' % args.target.lower()))
-    stop = timeit.default_timer()
-    print('Time: %.2f s. ' % (stop - start))
-
-    start = timeit.default_timer()
-    img_path = '%s/loose_crop' % args.image_path
-    img_list_path = '%s/meta/%s_name_5pts_score.txt' % (args.image_path, args.target.lower())
-    img_list = open(img_list_path)
-    files = img_list.readlines()
-    dataset = AlignedDataSet(root=img_path, lines=files, align=True)
-    img_feats = extract(args.model_root, dataset)
-
-    faceness_scores = []
-    for each_line in files:
-        name_lmk_score = each_line.split()
-        faceness_scores.append(name_lmk_score[-1])
-    faceness_scores = np.array(faceness_scores).astype(np.float32)
-    stop = timeit.default_timer()
-    print('Time: %.2f s. ' % (stop - start))
-    print('Feature Shape: ({} , {}) .'.format(img_feats.shape[0], img_feats.shape[1]))
-    start = timeit.default_timer()
-
-    if use_flip_test:
-        img_input_feats = img_feats[:, 0:img_feats.shape[1] // 2] + img_feats[:, img_feats.shape[1] // 2:]
-    else:
-        img_input_feats = img_feats[:, 0:img_feats.shape[1] // 2]
-
-    if use_norm_score:
-        img_input_feats = img_input_feats
-    else:
-        img_input_feats = img_input_feats / np.sqrt(np.sum(img_input_feats ** 2, -1, keepdims=True))
-
-    if use_detector_score:
-        print(img_input_feats.shape, faceness_scores.shape)
-        img_input_feats = img_input_feats * faceness_scores[:, np.newaxis]
-    else:
-        img_input_feats = img_input_feats
-
-    template_norm_feats, unique_templates = image2template_feature(
-        img_input_feats, templates, medias)
-    stop = timeit.default_timer()
-    print('Time: %.2f s. ' % (stop - start))
-
-    start = timeit.default_timer()
-    score = verification(template_norm_feats, unique_templates, p1, p2)
-    stop = timeit.default_timer()
-    print('Time: %.2f s. ' % (stop - start))
-    save_path = os.path.join(args.result_dir, "{}_result".format(args.target))
-    if not os.path.exists(save_path):
-        os.makedirs(save_path)
-    score_save_file = os.path.join(save_path, "{}.npy".format(args.model_root))
-    np.save(score_save_file, score)
-    files = [score_save_file]
-    methods = []
-    scores = []
-    for file in files:
-        methods.append(os.path.basename(file))
-        scores.append(np.load(file))
-    methods = np.array(methods)
-    scores = dict(zip(methods, scores))
-    x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1]
-    tpr_fpr_table = prettytable.PrettyTable(['Methods'] + [str(x) for x in x_labels])
-    for method in methods:
-        fpr, tpr, _ = roc_curve(label, scores[method])
-        fpr = np.flipud(fpr)
-        tpr = np.flipud(tpr)
-        tpr_fpr_row = []
-        tpr_fpr_row.append("%s-%s" % (method, args.target))
-        for fpr_iter in np.arange(len(x_labels)):
-            _, min_index = min(
-                list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr)))))
-            tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100))
-        tpr_fpr_table.add_row(tpr_fpr_row)
-    print(tpr_fpr_table)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='do ijb test')
-    # general
-    parser.add_argument('--model-root', default='', help='path to load model.')
-    parser.add_argument('--image-path', default='', type=str, help='')
-    parser.add_argument('--result-dir', default='.', type=str, help='')
-    parser.add_argument('--target', default='IJBC', type=str, help='target, set to IJBC or IJBB')
-    main(parser.parse_args())
diff --git a/src/src/face3d/models/arcface_torch/partial_fc.py b/src/src/face3d/models/arcface_torch/partial_fc.py
deleted file mode 100644
index 17e2d25715d10ba446c957e1d2528b0687ed71d5..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/partial_fc.py
+++ /dev/null
@@ -1,222 +0,0 @@
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-from torch.nn import Module
-from torch.nn.functional import normalize, linear
-from torch.nn.parameter import Parameter
-
-
-class PartialFC(Module):
-    """
-    Author: {Xiang An, Yang Xiao, XuHan Zhu} in DeepGlint,
-    Partial FC: Training 10 Million Identities on a Single Machine
-    See the original paper:
-    https://arxiv.org/abs/2010.05222
-    """
-
-    @torch.no_grad()
-    def __init__(self, rank, local_rank, world_size, batch_size, resume,
-                 margin_softmax, num_classes, sample_rate=1.0, embedding_size=512, prefix="./"):
-        """
-        rank: int
-            Unique process(GPU) ID from 0 to world_size - 1.
-        local_rank: int
-            Unique process(GPU) ID within the server from 0 to 7.
-        world_size: int
-            Number of GPU.
-        batch_size: int
-            Batch size on current rank(GPU).
-        resume: bool
-            Select whether to restore the weight of softmax.
-        margin_softmax: callable
-            A function of margin softmax, eg: cosface, arcface.
-        num_classes: int
-            The number of class center storage in current rank(CPU/GPU), usually is total_classes // world_size,
-            required.
-        sample_rate: float
-            The partial fc sampling rate, when the number of classes increases to more than 2 millions, Sampling
-            can greatly speed up training, and reduce a lot of GPU memory, default is 1.0.
-        embedding_size: int
-            The feature dimension, default is 512.
-        prefix: str
-            Path for save checkpoint, default is './'.
-        """
-        super(PartialFC, self).__init__()
-        #
-        self.num_classes: int = num_classes
-        self.rank: int = rank
-        self.local_rank: int = local_rank
-        self.device: torch.device = torch.device("cuda:{}".format(self.local_rank))
-        self.world_size: int = world_size
-        self.batch_size: int = batch_size
-        self.margin_softmax: callable = margin_softmax
-        self.sample_rate: float = sample_rate
-        self.embedding_size: int = embedding_size
-        self.prefix: str = prefix
-        self.num_local: int = num_classes // world_size + int(rank < num_classes % world_size)
-        self.class_start: int = num_classes // world_size * rank + min(rank, num_classes % world_size)
-        self.num_sample: int = int(self.sample_rate * self.num_local)
-
-        self.weight_name = os.path.join(self.prefix, "rank_{}_softmax_weight.pt".format(self.rank))
-        self.weight_mom_name = os.path.join(self.prefix, "rank_{}_softmax_weight_mom.pt".format(self.rank))
-
-        if resume:
-            try:
-                self.weight: torch.Tensor = torch.load(self.weight_name)
-                self.weight_mom: torch.Tensor = torch.load(self.weight_mom_name)
-                if self.weight.shape[0] != self.num_local or self.weight_mom.shape[0] != self.num_local:
-                    raise IndexError
-                logging.info("softmax weight resume successfully!")
-                logging.info("softmax weight mom resume successfully!")
-            except (FileNotFoundError, KeyError, IndexError):
-                self.weight = torch.normal(0, 0.01, (self.num_local, self.embedding_size), device=self.device)
-                self.weight_mom: torch.Tensor = torch.zeros_like(self.weight)
-                logging.info("softmax weight init!")
-                logging.info("softmax weight mom init!")
-        else:
-            self.weight = torch.normal(0, 0.01, (self.num_local, self.embedding_size), device=self.device)
-            self.weight_mom: torch.Tensor = torch.zeros_like(self.weight)
-            logging.info("softmax weight init successfully!")
-            logging.info("softmax weight mom init successfully!")
-        self.stream: torch.cuda.Stream = torch.cuda.Stream(local_rank)
-
-        self.index = None
-        if int(self.sample_rate) == 1:
-            self.update = lambda: 0
-            self.sub_weight = Parameter(self.weight)
-            self.sub_weight_mom = self.weight_mom
-        else:
-            self.sub_weight = Parameter(torch.empty((0, 0)).cuda(local_rank))
-
-    def save_params(self):
-        """ Save softmax weight for each rank on prefix
-        """
-        torch.save(self.weight.data, self.weight_name)
-        torch.save(self.weight_mom, self.weight_mom_name)
-
-    @torch.no_grad()
-    def sample(self, total_label):
-        """
-        Sample all positive class centers in each rank, and random select neg class centers to filling a fixed
-        `num_sample`.
-
-        total_label: tensor
-            Label after all gather, which cross all GPUs.
-        """
-        index_positive = (self.class_start <= total_label) & (total_label < self.class_start + self.num_local)
-        total_label[~index_positive] = -1
-        total_label[index_positive] -= self.class_start
-        if int(self.sample_rate) != 1:
-            positive = torch.unique(total_label[index_positive], sorted=True)
-            if self.num_sample - positive.size(0) >= 0:
-                perm = torch.rand(size=[self.num_local], device=self.device)
-                perm[positive] = 2.0
-                index = torch.topk(perm, k=self.num_sample)[1]
-                index = index.sort()[0]
-            else:
-                index = positive
-            self.index = index
-            total_label[index_positive] = torch.searchsorted(index, total_label[index_positive])
-            self.sub_weight = Parameter(self.weight[index])
-            self.sub_weight_mom = self.weight_mom[index]
-
-    def forward(self, total_features, norm_weight):
-        """ Partial fc forward, `logits = X * sample(W)`
-        """
-        torch.cuda.current_stream().wait_stream(self.stream)
-        logits = linear(total_features, norm_weight)
-        return logits
-
-    @torch.no_grad()
-    def update(self):
-        """ Set updated weight and weight_mom to memory bank.
-        """
-        self.weight_mom[self.index] = self.sub_weight_mom
-        self.weight[self.index] = self.sub_weight
-
-    def prepare(self, label, optimizer):
-        """
-        get sampled class centers for cal softmax.
-
-        label: tensor
-            Label tensor on each rank.
-        optimizer: opt
-            Optimizer for partial fc, which need to get weight mom.
-        """
-        with torch.cuda.stream(self.stream):
-            total_label = torch.zeros(
-                size=[self.batch_size * self.world_size], device=self.device, dtype=torch.long)
-            dist.all_gather(list(total_label.chunk(self.world_size, dim=0)), label)
-            self.sample(total_label)
-            optimizer.state.pop(optimizer.param_groups[-1]['params'][0], None)
-            optimizer.param_groups[-1]['params'][0] = self.sub_weight
-            optimizer.state[self.sub_weight]['momentum_buffer'] = self.sub_weight_mom
-            norm_weight = normalize(self.sub_weight)
-            return total_label, norm_weight
-
-    def forward_backward(self, label, features, optimizer):
-        """
-        Partial fc forward and backward with model parallel
-
-        label: tensor
-            Label tensor on each rank(GPU)
-        features: tensor
-            Features tensor on each rank(GPU)
-        optimizer: optimizer
-            Optimizer for partial fc
-
-        Returns:
-        --------
-        x_grad: tensor
-            The gradient of features.
-        loss_v: tensor
-            Loss value for cross entropy.
-        """
-        total_label, norm_weight = self.prepare(label, optimizer)
-        total_features = torch.zeros(
-            size=[self.batch_size * self.world_size, self.embedding_size], device=self.device)
-        dist.all_gather(list(total_features.chunk(self.world_size, dim=0)), features.data)
-        total_features.requires_grad = True
-
-        logits = self.forward(total_features, norm_weight)
-        logits = self.margin_softmax(logits, total_label)
-
-        with torch.no_grad():
-            max_fc = torch.max(logits, dim=1, keepdim=True)[0]
-            dist.all_reduce(max_fc, dist.ReduceOp.MAX)
-
-            # calculate exp(logits) and all-reduce
-            logits_exp = torch.exp(logits - max_fc)
-            logits_sum_exp = logits_exp.sum(dim=1, keepdims=True)
-            dist.all_reduce(logits_sum_exp, dist.ReduceOp.SUM)
-
-            # calculate prob
-            logits_exp.div_(logits_sum_exp)
-
-            # get one-hot
-            grad = logits_exp
-            index = torch.where(total_label != -1)[0]
-            one_hot = torch.zeros(size=[index.size()[0], grad.size()[1]], device=grad.device)
-            one_hot.scatter_(1, total_label[index, None], 1)
-
-            # calculate loss
-            loss = torch.zeros(grad.size()[0], 1, device=grad.device)
-            loss[index] = grad[index].gather(1, total_label[index, None])
-            dist.all_reduce(loss, dist.ReduceOp.SUM)
-            loss_v = loss.clamp_min_(1e-30).log_().mean() * (-1)
-
-            # calculate grad
-            grad[index] -= one_hot
-            grad.div_(self.batch_size * self.world_size)
-
-        logits.backward(grad)
-        if total_features.grad is not None:
-            total_features.grad.detach_()
-        x_grad: torch.Tensor = torch.zeros_like(features, requires_grad=True)
-        # feature gradient all-reduce
-        dist.reduce_scatter(x_grad, list(total_features.grad.chunk(self.world_size, dim=0)))
-        x_grad = x_grad * self.world_size
-        # backward backbone
-        return x_grad, loss_v
diff --git a/src/src/face3d/models/arcface_torch/requirement.txt b/src/src/face3d/models/arcface_torch/requirement.txt
deleted file mode 100644
index f72c1b3ba814ae1e0bc1c1f56402026978b9e870..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/requirement.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-tensorboard
-easydict
-mxnet
-onnx
-sklearn
diff --git a/src/src/face3d/models/arcface_torch/run.sh b/src/src/face3d/models/arcface_torch/run.sh
deleted file mode 100644
index 61af4b4950eb11334e55362e3e3c5e2796979a01..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/run.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/ms1mv3_r50
-ps -ef | grep "train" | grep -v grep | awk '{print "kill -9 "$2}' | sh
diff --git a/src/src/face3d/models/arcface_torch/torch2onnx.py b/src/src/face3d/models/arcface_torch/torch2onnx.py
deleted file mode 100644
index fc26ab82e552331bc8d75b34e81000418f4d38ec..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/torch2onnx.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import numpy as np
-import onnx
-import torch
-
-
-def convert_onnx(net, path_module, output, opset=11, simplify=False):
-    assert isinstance(net, torch.nn.Module)
-    img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.int32)
-    img = img.astype(np.float)
-    img = (img / 255. - 0.5) / 0.5  # torch style norm
-    img = img.transpose((2, 0, 1))
-    img = torch.from_numpy(img).unsqueeze(0).float()
-
-    weight = torch.load(path_module)
-    net.load_state_dict(weight)
-    net.eval()
-    torch.onnx.export(net, img, output, keep_initializers_as_inputs=False, verbose=False, opset_version=opset)
-    model = onnx.load(output)
-    graph = model.graph
-    graph.input[0].type.tensor_type.shape.dim[0].dim_param = 'None'
-    if simplify:
-        from onnxsim import simplify
-        model, check = simplify(model)
-        assert check, "Simplified ONNX model could not be validated"
-    onnx.save(model, output)
-
-    
-if __name__ == '__main__':
-    import os
-    import argparse
-    from backbones import get_model
-
-    parser = argparse.ArgumentParser(description='ArcFace PyTorch to onnx')
-    parser.add_argument('input', type=str, help='input backbone.pth file or path')
-    parser.add_argument('--output', type=str, default=None, help='output onnx path')
-    parser.add_argument('--network', type=str, default=None, help='backbone network')
-    parser.add_argument('--simplify', type=bool, default=False, help='onnx simplify')
-    args = parser.parse_args()
-    input_file = args.input
-    if os.path.isdir(input_file):
-        input_file = os.path.join(input_file, "backbone.pth")
-    assert os.path.exists(input_file)
-    model_name = os.path.basename(os.path.dirname(input_file)).lower()
-    params = model_name.split("_")
-    if len(params) >= 3 and params[1] in ('arcface', 'cosface'):
-        if args.network is None:
-            args.network = params[2]
-    assert args.network is not None
-    print(args)
-    backbone_onnx = get_model(args.network, dropout=0)
-
-    output_path = args.output
-    if output_path is None:
-        output_path = os.path.join(os.path.dirname(__file__), 'onnx')
-    if not os.path.exists(output_path):
-        os.makedirs(output_path)
-    assert os.path.isdir(output_path)
-    output_file = os.path.join(output_path, "%s.onnx" % model_name)
-    convert_onnx(backbone_onnx, input_file, output_file, simplify=args.simplify)
diff --git a/src/src/face3d/models/arcface_torch/train.py b/src/src/face3d/models/arcface_torch/train.py
deleted file mode 100644
index 55eca2d0ad9463415970e09bccab8b722e496704..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/train.py
+++ /dev/null
@@ -1,141 +0,0 @@
-import argparse
-import logging
-import os
-
-import torch
-import torch.distributed as dist
-import torch.nn.functional as F
-import torch.utils.data.distributed
-from torch.nn.utils import clip_grad_norm_
-
-import losses
-from backbones import get_model
-from dataset import MXFaceDataset, SyntheticDataset, DataLoaderX
-from partial_fc import PartialFC
-from utils.utils_amp import MaxClipGradScaler
-from utils.utils_callbacks import CallBackVerification, CallBackLogging, CallBackModelCheckpoint
-from utils.utils_config import get_config
-from utils.utils_logging import AverageMeter, init_logging
-
-
-def main(args):
-    cfg = get_config(args.config)
-    try:
-        world_size = int(os.environ['WORLD_SIZE'])
-        rank = int(os.environ['RANK'])
-        dist.init_process_group('nccl')
-    except KeyError:
-        world_size = 1
-        rank = 0
-        dist.init_process_group(backend='nccl', init_method="tcp://127.0.0.1:12584", rank=rank, world_size=world_size)
-
-    local_rank = args.local_rank
-    torch.cuda.set_device(local_rank)
-    os.makedirs(cfg.output, exist_ok=True)
-    init_logging(rank, cfg.output)
-
-    if cfg.rec == "synthetic":
-        train_set = SyntheticDataset(local_rank=local_rank)
-    else:
-        train_set = MXFaceDataset(root_dir=cfg.rec, local_rank=local_rank)
-
-    train_sampler = torch.utils.data.distributed.DistributedSampler(train_set, shuffle=True)
-    train_loader = DataLoaderX(
-        local_rank=local_rank, dataset=train_set, batch_size=cfg.batch_size,
-        sampler=train_sampler, num_workers=2, pin_memory=True, drop_last=True)
-    backbone = get_model(cfg.network, dropout=0.0, fp16=cfg.fp16, num_features=cfg.embedding_size).to(local_rank)
-
-    if cfg.resume:
-        try:
-            backbone_pth = os.path.join(cfg.output, "backbone.pth")
-            backbone.load_state_dict(torch.load(backbone_pth, map_location=torch.device(local_rank)))
-            if rank == 0:
-                logging.info("backbone resume successfully!")
-        except (FileNotFoundError, KeyError, IndexError, RuntimeError):
-            if rank == 0:
-                logging.info("resume fail, backbone init successfully!")
-
-    backbone = torch.nn.parallel.DistributedDataParallel(
-        module=backbone, broadcast_buffers=False, device_ids=[local_rank])
-    backbone.train()
-    margin_softmax = losses.get_loss(cfg.loss)
-    module_partial_fc = PartialFC(
-        rank=rank, local_rank=local_rank, world_size=world_size, resume=cfg.resume,
-        batch_size=cfg.batch_size, margin_softmax=margin_softmax, num_classes=cfg.num_classes,
-        sample_rate=cfg.sample_rate, embedding_size=cfg.embedding_size, prefix=cfg.output)
-
-    opt_backbone = torch.optim.SGD(
-        params=[{'params': backbone.parameters()}],
-        lr=cfg.lr / 512 * cfg.batch_size * world_size,
-        momentum=0.9, weight_decay=cfg.weight_decay)
-    opt_pfc = torch.optim.SGD(
-        params=[{'params': module_partial_fc.parameters()}],
-        lr=cfg.lr / 512 * cfg.batch_size * world_size,
-        momentum=0.9, weight_decay=cfg.weight_decay)
-
-    num_image = len(train_set)
-    total_batch_size = cfg.batch_size * world_size
-    cfg.warmup_step = num_image // total_batch_size * cfg.warmup_epoch
-    cfg.total_step = num_image // total_batch_size * cfg.num_epoch
-
-    def lr_step_func(current_step):
-        cfg.decay_step = [x * num_image // total_batch_size for x in cfg.decay_epoch]
-        if current_step < cfg.warmup_step:
-            return current_step / cfg.warmup_step
-        else:
-            return 0.1 ** len([m for m in cfg.decay_step if m <= current_step])
-
-    scheduler_backbone = torch.optim.lr_scheduler.LambdaLR(
-        optimizer=opt_backbone, lr_lambda=lr_step_func)
-    scheduler_pfc = torch.optim.lr_scheduler.LambdaLR(
-        optimizer=opt_pfc, lr_lambda=lr_step_func)
-
-    for key, value in cfg.items():
-        num_space = 25 - len(key)
-        logging.info(": " + key + " " * num_space + str(value))
-
-    val_target = cfg.val_targets
-    callback_verification = CallBackVerification(2000, rank, val_target, cfg.rec)
-    callback_logging = CallBackLogging(50, rank, cfg.total_step, cfg.batch_size, world_size, None)
-    callback_checkpoint = CallBackModelCheckpoint(rank, cfg.output)
-
-    loss = AverageMeter()
-    start_epoch = 0
-    global_step = 0
-    grad_amp = MaxClipGradScaler(cfg.batch_size, 128 * cfg.batch_size, growth_interval=100) if cfg.fp16 else None
-    for epoch in range(start_epoch, cfg.num_epoch):
-        train_sampler.set_epoch(epoch)
-        for step, (img, label) in enumerate(train_loader):
-            global_step += 1
-            features = F.normalize(backbone(img))
-            x_grad, loss_v = module_partial_fc.forward_backward(label, features, opt_pfc)
-            if cfg.fp16:
-                features.backward(grad_amp.scale(x_grad))
-                grad_amp.unscale_(opt_backbone)
-                clip_grad_norm_(backbone.parameters(), max_norm=5, norm_type=2)
-                grad_amp.step(opt_backbone)
-                grad_amp.update()
-            else:
-                features.backward(x_grad)
-                clip_grad_norm_(backbone.parameters(), max_norm=5, norm_type=2)
-                opt_backbone.step()
-
-            opt_pfc.step()
-            module_partial_fc.update()
-            opt_backbone.zero_grad()
-            opt_pfc.zero_grad()
-            loss.update(loss_v, 1)
-            callback_logging(global_step, loss, epoch, cfg.fp16, scheduler_backbone.get_last_lr()[0], grad_amp)
-            callback_verification(global_step, backbone)
-            scheduler_backbone.step()
-            scheduler_pfc.step()
-        callback_checkpoint(global_step, backbone, module_partial_fc)
-    dist.destroy_process_group()
-
-
-if __name__ == "__main__":
-    torch.backends.cudnn.benchmark = True
-    parser = argparse.ArgumentParser(description='PyTorch ArcFace Training')
-    parser.add_argument('config', type=str, help='py config file')
-    parser.add_argument('--local_rank', type=int, default=0, help='local_rank')
-    main(parser.parse_args())
diff --git a/src/src/face3d/models/arcface_torch/utils/__init__.py b/src/src/face3d/models/arcface_torch/utils/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/src/face3d/models/arcface_torch/utils/plot.py b/src/src/face3d/models/arcface_torch/utils/plot.py
deleted file mode 100644
index ccc588e5c01ca550b69c385aeb3fd139c59fb88a..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/utils/plot.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# coding: utf-8
-
-import os
-from pathlib import Path
-
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap
-from prettytable import PrettyTable
-from sklearn.metrics import roc_curve, auc
-
-image_path = "/data/anxiang/IJB_release/IJBC"
-files = [
-        "./ms1mv3_arcface_r100/ms1mv3_arcface_r100/ijbc.npy"
-]
-
-
-def read_template_pair_list(path):
-    pairs = pd.read_csv(path, sep=' ', header=None).values
-    t1 = pairs[:, 0].astype(np.int)
-    t2 = pairs[:, 1].astype(np.int)
-    label = pairs[:, 2].astype(np.int)
-    return t1, t2, label
-
-
-p1, p2, label = read_template_pair_list(
-    os.path.join('%s/meta' % image_path,
-                 '%s_template_pair_label.txt' % 'ijbc'))
-
-methods = []
-scores = []
-for file in files:
-    methods.append(file.split('/')[-2])
-    scores.append(np.load(file))
-
-methods = np.array(methods)
-scores = dict(zip(methods, scores))
-colours = dict(
-    zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2')))
-x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1]
-tpr_fpr_table = PrettyTable(['Methods'] + [str(x) for x in x_labels])
-fig = plt.figure()
-for method in methods:
-    fpr, tpr, _ = roc_curve(label, scores[method])
-    roc_auc = auc(fpr, tpr)
-    fpr = np.flipud(fpr)
-    tpr = np.flipud(tpr)  # select largest tpr at same fpr
-    plt.plot(fpr,
-             tpr,
-             color=colours[method],
-             lw=1,
-             label=('[%s (AUC = %0.4f %%)]' %
-                    (method.split('-')[-1], roc_auc * 100)))
-    tpr_fpr_row = []
-    tpr_fpr_row.append("%s-%s" % (method, "IJBC"))
-    for fpr_iter in np.arange(len(x_labels)):
-        _, min_index = min(
-            list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr)))))
-        tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100))
-    tpr_fpr_table.add_row(tpr_fpr_row)
-plt.xlim([10 ** -6, 0.1])
-plt.ylim([0.3, 1.0])
-plt.grid(linestyle='--', linewidth=1)
-plt.xticks(x_labels)
-plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True))
-plt.xscale('log')
-plt.xlabel('False Positive Rate')
-plt.ylabel('True Positive Rate')
-plt.title('ROC on IJB')
-plt.legend(loc="lower right")
-print(tpr_fpr_table)
diff --git a/src/src/face3d/models/arcface_torch/utils/utils_amp.py b/src/src/face3d/models/arcface_torch/utils/utils_amp.py
deleted file mode 100644
index 9ac2a03f4212faa129faed447a8f4519c0a00a8b..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/utils/utils_amp.py
+++ /dev/null
@@ -1,88 +0,0 @@
-from typing import Dict, List
-
-import torch
-
-if torch.__version__ < '1.9':
-    Iterable = torch._six.container_abcs.Iterable
-else:
-    import collections
-
-    Iterable = collections.abc.Iterable
-from torch.cuda.amp import GradScaler
-
-
-class _MultiDeviceReplicator(object):
-    """
-    Lazily serves copies of a tensor to requested devices.  Copies are cached per-device.
-    """
-
-    def __init__(self, master_tensor: torch.Tensor) -> None:
-        assert master_tensor.is_cuda
-        self.master = master_tensor
-        self._per_device_tensors: Dict[torch.device, torch.Tensor] = {}
-
-    def get(self, device) -> torch.Tensor:
-        retval = self._per_device_tensors.get(device, None)
-        if retval is None:
-            retval = self.master.to(device=device, non_blocking=True, copy=True)
-            self._per_device_tensors[device] = retval
-        return retval
-
-
-class MaxClipGradScaler(GradScaler):
-    def __init__(self, init_scale, max_scale: float, growth_interval=100):
-        GradScaler.__init__(self, init_scale=init_scale, growth_interval=growth_interval)
-        self.max_scale = max_scale
-
-    def scale_clip(self):
-        if self.get_scale() == self.max_scale:
-            self.set_growth_factor(1)
-        elif self.get_scale() < self.max_scale:
-            self.set_growth_factor(2)
-        elif self.get_scale() > self.max_scale:
-            self._scale.fill_(self.max_scale)
-            self.set_growth_factor(1)
-
-    def scale(self, outputs):
-        """
-        Multiplies ('scales') a tensor or list of tensors by the scale factor.
-
-        Returns scaled outputs.  If this instance of :class:`GradScaler` is not enabled, outputs are returned
-        unmodified.
-
-        Arguments:
-            outputs (Tensor or iterable of Tensors):  Outputs to scale.
-        """
-        if not self._enabled:
-            return outputs
-        self.scale_clip()
-        # Short-circuit for the common case.
-        if isinstance(outputs, torch.Tensor):
-            assert outputs.is_cuda
-            if self._scale is None:
-                self._lazy_init_scale_growth_tracker(outputs.device)
-            assert self._scale is not None
-            return outputs * self._scale.to(device=outputs.device, non_blocking=True)
-
-        # Invoke the more complex machinery only if we're treating multiple outputs.
-        stash: List[_MultiDeviceReplicator] = []  # holds a reference that can be overwritten by apply_scale
-
-        def apply_scale(val):
-            if isinstance(val, torch.Tensor):
-                assert val.is_cuda
-                if len(stash) == 0:
-                    if self._scale is None:
-                        self._lazy_init_scale_growth_tracker(val.device)
-                    assert self._scale is not None
-                    stash.append(_MultiDeviceReplicator(self._scale))
-                return val * stash[0].get(val.device)
-            elif isinstance(val, Iterable):
-                iterable = map(apply_scale, val)
-                if isinstance(val, list) or isinstance(val, tuple):
-                    return type(val)(iterable)
-                else:
-                    return iterable
-            else:
-                raise ValueError("outputs must be a Tensor or an iterable of Tensors")
-
-        return apply_scale(outputs)
diff --git a/src/src/face3d/models/arcface_torch/utils/utils_callbacks.py b/src/src/face3d/models/arcface_torch/utils/utils_callbacks.py
deleted file mode 100644
index bd2f56cba47c57de102710ff56eaac591e59f4da..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/utils/utils_callbacks.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import logging
-import os
-import time
-from typing import List
-
-import torch
-
-from eval import verification
-from utils.utils_logging import AverageMeter
-
-
-class CallBackVerification(object):
-    def __init__(self, frequent, rank, val_targets, rec_prefix, image_size=(112, 112)):
-        self.frequent: int = frequent
-        self.rank: int = rank
-        self.highest_acc: float = 0.0
-        self.highest_acc_list: List[float] = [0.0] * len(val_targets)
-        self.ver_list: List[object] = []
-        self.ver_name_list: List[str] = []
-        if self.rank is 0:
-            self.init_dataset(val_targets=val_targets, data_dir=rec_prefix, image_size=image_size)
-
-    def ver_test(self, backbone: torch.nn.Module, global_step: int):
-        results = []
-        for i in range(len(self.ver_list)):
-            acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(
-                self.ver_list[i], backbone, 10, 10)
-            logging.info('[%s][%d]XNorm: %f' % (self.ver_name_list[i], global_step, xnorm))
-            logging.info('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (self.ver_name_list[i], global_step, acc2, std2))
-            if acc2 > self.highest_acc_list[i]:
-                self.highest_acc_list[i] = acc2
-            logging.info(
-                '[%s][%d]Accuracy-Highest: %1.5f' % (self.ver_name_list[i], global_step, self.highest_acc_list[i]))
-            results.append(acc2)
-
-    def init_dataset(self, val_targets, data_dir, image_size):
-        for name in val_targets:
-            path = os.path.join(data_dir, name + ".bin")
-            if os.path.exists(path):
-                data_set = verification.load_bin(path, image_size)
-                self.ver_list.append(data_set)
-                self.ver_name_list.append(name)
-
-    def __call__(self, num_update, backbone: torch.nn.Module):
-        if self.rank is 0 and num_update > 0 and num_update % self.frequent == 0:
-            backbone.eval()
-            self.ver_test(backbone, num_update)
-            backbone.train()
-
-
-class CallBackLogging(object):
-    def __init__(self, frequent, rank, total_step, batch_size, world_size, writer=None):
-        self.frequent: int = frequent
-        self.rank: int = rank
-        self.time_start = time.time()
-        self.total_step: int = total_step
-        self.batch_size: int = batch_size
-        self.world_size: int = world_size
-        self.writer = writer
-
-        self.init = False
-        self.tic = 0
-
-    def __call__(self,
-                 global_step: int,
-                 loss: AverageMeter,
-                 epoch: int,
-                 fp16: bool,
-                 learning_rate: float,
-                 grad_scaler: torch.cuda.amp.GradScaler):
-        if self.rank == 0 and global_step > 0 and global_step % self.frequent == 0:
-            if self.init:
-                try:
-                    speed: float = self.frequent * self.batch_size / (time.time() - self.tic)
-                    speed_total = speed * self.world_size
-                except ZeroDivisionError:
-                    speed_total = float('inf')
-
-                time_now = (time.time() - self.time_start) / 3600
-                time_total = time_now / ((global_step + 1) / self.total_step)
-                time_for_end = time_total - time_now
-                if self.writer is not None:
-                    self.writer.add_scalar('time_for_end', time_for_end, global_step)
-                    self.writer.add_scalar('learning_rate', learning_rate, global_step)
-                    self.writer.add_scalar('loss', loss.avg, global_step)
-                if fp16:
-                    msg = "Speed %.2f samples/sec   Loss %.4f   LearningRate %.4f   Epoch: %d   Global Step: %d   " \
-                          "Fp16 Grad Scale: %2.f   Required: %1.f hours" % (
-                              speed_total, loss.avg, learning_rate, epoch, global_step,
-                              grad_scaler.get_scale(), time_for_end
-                          )
-                else:
-                    msg = "Speed %.2f samples/sec   Loss %.4f   LearningRate %.4f   Epoch: %d   Global Step: %d   " \
-                          "Required: %1.f hours" % (
-                              speed_total, loss.avg, learning_rate, epoch, global_step, time_for_end
-                          )
-                logging.info(msg)
-                loss.reset()
-                self.tic = time.time()
-            else:
-                self.init = True
-                self.tic = time.time()
-
-
-class CallBackModelCheckpoint(object):
-    def __init__(self, rank, output="./"):
-        self.rank: int = rank
-        self.output: str = output
-
-    def __call__(self, global_step, backbone, partial_fc, ):
-        if global_step > 100 and self.rank == 0:
-            path_module = os.path.join(self.output, "backbone.pth")
-            torch.save(backbone.module.state_dict(), path_module)
-            logging.info("Pytorch Model Saved in '{}'".format(path_module))
-
-        if global_step > 100 and partial_fc is not None:
-            partial_fc.save_params()
diff --git a/src/src/face3d/models/arcface_torch/utils/utils_config.py b/src/src/face3d/models/arcface_torch/utils/utils_config.py
deleted file mode 100644
index 0c02eaf70fc0140aca7925f621c29a496f491cae..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/utils/utils_config.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import importlib
-import os.path as osp
-
-
-def get_config(config_file):
-    assert config_file.startswith('configs/'), 'config file setting must start with configs/'
-    temp_config_name = osp.basename(config_file)
-    temp_module_name = osp.splitext(temp_config_name)[0]
-    config = importlib.import_module("configs.base")
-    cfg = config.config
-    config = importlib.import_module("configs.%s" % temp_module_name)
-    job_cfg = config.config
-    cfg.update(job_cfg)
-    if cfg.output is None:
-        cfg.output = osp.join('work_dirs', temp_module_name)
-    return cfg
\ No newline at end of file
diff --git a/src/src/face3d/models/arcface_torch/utils/utils_logging.py b/src/src/face3d/models/arcface_torch/utils/utils_logging.py
deleted file mode 100644
index c787b6aae7cd037a4718df44d672b8ffa9e5c249..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/arcface_torch/utils/utils_logging.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import logging
-import os
-import sys
-
-
-class AverageMeter(object):
-    """Computes and stores the average and current value
-    """
-
-    def __init__(self):
-        self.val = None
-        self.avg = None
-        self.sum = None
-        self.count = None
-        self.reset()
-
-    def reset(self):
-        self.val = 0
-        self.avg = 0
-        self.sum = 0
-        self.count = 0
-
-    def update(self, val, n=1):
-        self.val = val
-        self.sum += val * n
-        self.count += n
-        self.avg = self.sum / self.count
-
-
-def init_logging(rank, models_root):
-    if rank == 0:
-        log_root = logging.getLogger()
-        log_root.setLevel(logging.INFO)
-        formatter = logging.Formatter("Training: %(asctime)s-%(message)s")
-        handler_file = logging.FileHandler(os.path.join(models_root, "training.log"))
-        handler_stream = logging.StreamHandler(sys.stdout)
-        handler_file.setFormatter(formatter)
-        handler_stream.setFormatter(formatter)
-        log_root.addHandler(handler_file)
-        log_root.addHandler(handler_stream)
-        log_root.info('rank_id: %d' % rank)
diff --git a/src/src/face3d/models/arcface_torch/utils/utils_os.py b/src/src/face3d/models/arcface_torch/utils/utils_os.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/src/face3d/models/base_model.py b/src/src/face3d/models/base_model.py
deleted file mode 100644
index cfe64a7f739ad8f8cfbf3073a2bf49e1468127fd..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/base_model.py
+++ /dev/null
@@ -1,316 +0,0 @@
-"""This script defines the base network model for Deep3DFaceRecon_pytorch
-"""
-
-import os
-import numpy as np
-import torch
-from collections import OrderedDict
-from abc import ABC, abstractmethod
-from . import networks
-
-
-class BaseModel(ABC):
-    """This class is an abstract base class (ABC) for models.
-    To create a subclass, you need to implement the following five functions:
-        -- <__init__>:                      initialize the class; first call BaseModel.__init__(self, opt).
-        -- <set_input>:                     unpack data from dataset and apply preprocessing.
-        -- <forward>:                       produce intermediate results.
-        -- <optimize_parameters>:           calculate losses, gradients, and update network weights.
-        -- <modify_commandline_options>:    (optionally) add model-specific options and set default options.
-    """
-
-    def __init__(self, opt):
-        """Initialize the BaseModel class.
-
-        Parameters:
-            opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions
-
-        When creating your custom class, you need to implement your own initialization.
-        In this fucntion, you should first call <BaseModel.__init__(self, opt)>
-        Then, you need to define four lists:
-            -- self.loss_names (str list):          specify the training losses that you want to plot and save.
-            -- self.model_names (str list):         specify the images that you want to display and save.
-            -- self.visual_names (str list):        define networks used in our training.
-            -- self.optimizers (optimizer list):    define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an example.
-        """
-        self.opt = opt
-        self.isTrain = False
-        self.device = torch.device('cpu') 
-        self.save_dir = " " # os.path.join(opt.checkpoints_dir, opt.name)  # save all the checkpoints to save_dir
-        self.loss_names = []
-        self.model_names = []
-        self.visual_names = []
-        self.parallel_names = []
-        self.optimizers = []
-        self.image_paths = []
-        self.metric = 0  # used for learning rate policy 'plateau'
-
-    @staticmethod
-    def dict_grad_hook_factory(add_func=lambda x: x):
-        saved_dict = dict()
-
-        def hook_gen(name):
-            def grad_hook(grad):
-                saved_vals = add_func(grad)
-                saved_dict[name] = saved_vals
-            return grad_hook
-        return hook_gen, saved_dict
-
-    @staticmethod
-    def modify_commandline_options(parser, is_train):
-        """Add new model-specific options, and rewrite default values for existing options.
-
-        Parameters:
-            parser          -- original option parser
-            is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options.
-
-        Returns:
-            the modified parser.
-        """
-        return parser
-
-    @abstractmethod
-    def set_input(self, input):
-        """Unpack input data from the dataloader and perform necessary pre-processing steps.
-
-        Parameters:
-            input (dict): includes the data itself and its metadata information.
-        """
-        pass
-
-    @abstractmethod
-    def forward(self):
-        """Run forward pass; called by both functions <optimize_parameters> and <test>."""
-        pass
-
-    @abstractmethod
-    def optimize_parameters(self):
-        """Calculate losses, gradients, and update network weights; called in every training iteration"""
-        pass
-
-    def setup(self, opt):
-        """Load and print networks; create schedulers
-
-        Parameters:
-            opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
-        """
-        if self.isTrain:
-            self.schedulers = [networks.get_scheduler(optimizer, opt) for optimizer in self.optimizers]
-        
-        if not self.isTrain or opt.continue_train:
-            load_suffix = opt.epoch
-            self.load_networks(load_suffix)
- 
-            
-        # self.print_networks(opt.verbose)
-
-    def parallelize(self, convert_sync_batchnorm=True):
-        if not self.opt.use_ddp:
-            for name in self.parallel_names:
-                if isinstance(name, str):
-                    module = getattr(self, name)
-                    setattr(self, name, module.to(self.device))
-        else:
-            for name in self.model_names:
-                if isinstance(name, str):
-                    module = getattr(self, name)
-                    if convert_sync_batchnorm:
-                        module = torch.nn.SyncBatchNorm.convert_sync_batchnorm(module)
-                    setattr(self, name, torch.nn.parallel.DistributedDataParallel(module.to(self.device),
-                        device_ids=[self.device.index], 
-                        find_unused_parameters=True, broadcast_buffers=True))
-            
-            # DistributedDataParallel is not needed when a module doesn't have any parameter that requires a gradient.
-            for name in self.parallel_names:
-                if isinstance(name, str) and name not in self.model_names:
-                    module = getattr(self, name)
-                    setattr(self, name, module.to(self.device))
-            
-        # put state_dict of optimizer to gpu device
-        if self.opt.phase != 'test':
-            if self.opt.continue_train:
-                for optim in self.optimizers:
-                    for state in optim.state.values():
-                        for k, v in state.items():
-                            if isinstance(v, torch.Tensor):
-                                state[k] = v.to(self.device)
-
-    def data_dependent_initialize(self, data):
-        pass
-
-    def train(self):
-        """Make models train mode"""
-        for name in self.model_names:
-            if isinstance(name, str):
-                net = getattr(self, name)
-                net.train()
-
-    def eval(self):
-        """Make models eval mode"""
-        for name in self.model_names:
-            if isinstance(name, str):
-                net = getattr(self, name)
-                net.eval()
-
-    def test(self):
-        """Forward function used in test time.
-
-        This function wraps <forward> function in no_grad() so we don't save intermediate steps for backprop
-        It also calls <compute_visuals> to produce additional visualization results
-        """
-        with torch.no_grad():
-            self.forward()
-            self.compute_visuals()
-
-    def compute_visuals(self):
-        """Calculate additional output images for visdom and HTML visualization"""
-        pass
-
-    def get_image_paths(self, name='A'):
-        """ Return image paths that are used to load current data"""
-        return self.image_paths if name =='A' else self.image_paths_B
-
-    def update_learning_rate(self):
-        """Update learning rates for all the networks; called at the end of every epoch"""
-        for scheduler in self.schedulers:
-            if self.opt.lr_policy == 'plateau':
-                scheduler.step(self.metric)
-            else:
-                scheduler.step()
-
-        lr = self.optimizers[0].param_groups[0]['lr']
-        print('learning rate = %.7f' % lr)
-
-    def get_current_visuals(self):
-        """Return visualization images. train.py will display these images with visdom, and save the images to a HTML"""
-        visual_ret = OrderedDict()
-        for name in self.visual_names:
-            if isinstance(name, str):
-                visual_ret[name] = getattr(self, name)[:, :3, ...]
-        return visual_ret
-
-    def get_current_losses(self):
-        """Return traning losses / errors. train.py will print out these errors on console, and save them to a file"""
-        errors_ret = OrderedDict()
-        for name in self.loss_names:
-            if isinstance(name, str):
-                errors_ret[name] = float(getattr(self, 'loss_' + name))  # float(...) works for both scalar tensor and float number
-        return errors_ret
-
-    def save_networks(self, epoch):
-        """Save all the networks to the disk.
-
-        Parameters:
-            epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name)
-        """
-        if not os.path.isdir(self.save_dir):
-            os.makedirs(self.save_dir)
-
-        save_filename = 'epoch_%s.pth' % (epoch)
-        save_path = os.path.join(self.save_dir, save_filename)
-        
-        save_dict = {}
-        for name in self.model_names:
-            if isinstance(name, str):
-                net = getattr(self, name)
-                if isinstance(net, torch.nn.DataParallel) or isinstance(net,
-                        torch.nn.parallel.DistributedDataParallel):
-                    net = net.module
-                save_dict[name] = net.state_dict()
-                
-
-        for i, optim in enumerate(self.optimizers):
-            save_dict['opt_%02d'%i] = optim.state_dict()
-
-        for i, sched in enumerate(self.schedulers):
-            save_dict['sched_%02d'%i] = sched.state_dict()
-        
-        torch.save(save_dict, save_path)
-
-    def __patch_instance_norm_state_dict(self, state_dict, module, keys, i=0):
-        """Fix InstanceNorm checkpoints incompatibility (prior to 0.4)"""
-        key = keys[i]
-        if i + 1 == len(keys):  # at the end, pointing to a parameter/buffer
-            if module.__class__.__name__.startswith('InstanceNorm') and \
-                    (key == 'running_mean' or key == 'running_var'):
-                if getattr(module, key) is None:
-                    state_dict.pop('.'.join(keys))
-            if module.__class__.__name__.startswith('InstanceNorm') and \
-               (key == 'num_batches_tracked'):
-                state_dict.pop('.'.join(keys))
-        else:
-            self.__patch_instance_norm_state_dict(state_dict, getattr(module, key), keys, i + 1)
-
-    def load_networks(self, epoch):
-        """Load all the networks from the disk.
-
-        Parameters:
-            epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name)
-        """
-        if self.opt.isTrain and self.opt.pretrained_name is not None:
-            load_dir = os.path.join(self.opt.checkpoints_dir, self.opt.pretrained_name)
-        else:
-            load_dir = self.save_dir    
-        load_filename = 'epoch_%s.pth' % (epoch)
-        load_path = os.path.join(load_dir, load_filename)
-        state_dict = torch.load(load_path, map_location=self.device)
-        print('loading the model from %s' % load_path)
-
-        for name in self.model_names:
-            if isinstance(name, str):
-                net = getattr(self, name)
-                if isinstance(net, torch.nn.DataParallel):
-                    net = net.module
-                net.load_state_dict(state_dict[name])
-        
-        if self.opt.phase != 'test':
-            if self.opt.continue_train:
-                print('loading the optim from %s' % load_path)
-                for i, optim in enumerate(self.optimizers):
-                    optim.load_state_dict(state_dict['opt_%02d'%i])
-
-                try:
-                    print('loading the sched from %s' % load_path)
-                    for i, sched in enumerate(self.schedulers):
-                        sched.load_state_dict(state_dict['sched_%02d'%i])
-                except:
-                    print('Failed to load schedulers, set schedulers according to epoch count manually')
-                    for i, sched in enumerate(self.schedulers):
-                        sched.last_epoch = self.opt.epoch_count - 1
-                    
-
-            
-
-    def print_networks(self, verbose):
-        """Print the total number of parameters in the network and (if verbose) network architecture
-
-        Parameters:
-            verbose (bool) -- if verbose: print the network architecture
-        """
-        print('---------- Networks initialized -------------')
-        for name in self.model_names:
-            if isinstance(name, str):
-                net = getattr(self, name)
-                num_params = 0
-                for param in net.parameters():
-                    num_params += param.numel()
-                if verbose:
-                    print(net)
-                print('[Network %s] Total number of parameters : %.3f M' % (name, num_params / 1e6))
-        print('-----------------------------------------------')
-
-    def set_requires_grad(self, nets, requires_grad=False):
-        """Set requies_grad=Fasle for all the networks to avoid unnecessary computations
-        Parameters:
-            nets (network list)   -- a list of networks
-            requires_grad (bool)  -- whether the networks require gradients or not
-        """
-        if not isinstance(nets, list):
-            nets = [nets]
-        for net in nets:
-            if net is not None:
-                for param in net.parameters():
-                    param.requires_grad = requires_grad
-
-    def generate_visuals_for_evaluation(self, data, mode):
-        return {}
diff --git a/src/src/face3d/models/bfm.py b/src/src/face3d/models/bfm.py
deleted file mode 100644
index a75db682f02dd1979d4a7de1d11dd3aa5cdf5279..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/bfm.py
+++ /dev/null
@@ -1,331 +0,0 @@
-"""This script defines the parametric 3d face model for Deep3DFaceRecon_pytorch
-"""
-
-import numpy as np
-import  torch
-import torch.nn.functional as F
-from scipy.io import loadmat
-from src.face3d.util.load_mats import transferBFM09
-import os
-
-def perspective_projection(focal, center):
-    # return p.T (N, 3) @ (3, 3) 
-    return np.array([
-        focal, 0, center,
-        0, focal, center,
-        0, 0, 1
-    ]).reshape([3, 3]).astype(np.float32).transpose()
-
-class SH:
-    def __init__(self):
-        self.a = [np.pi, 2 * np.pi / np.sqrt(3.), 2 * np.pi / np.sqrt(8.)]
-        self.c = [1/np.sqrt(4 * np.pi), np.sqrt(3.) / np.sqrt(4 * np.pi), 3 * np.sqrt(5.) / np.sqrt(12 * np.pi)]
-
-
-
-class ParametricFaceModel:
-    def __init__(self, 
-                bfm_folder='./BFM', 
-                recenter=True,
-                camera_distance=10.,
-                init_lit=np.array([
-                    0.8, 0, 0, 0, 0, 0, 0, 0, 0
-                    ]),
-                focal=1015.,
-                center=112.,
-                is_train=True,
-                default_name='BFM_model_front.mat'):
-        
-        if not os.path.isfile(os.path.join(bfm_folder, default_name)):
-            transferBFM09(bfm_folder)
-            
-        model = loadmat(os.path.join(bfm_folder, default_name))
-        # mean face shape. [3*N,1]
-        self.mean_shape = model['meanshape'].astype(np.float32)
-        # identity basis. [3*N,80]
-        self.id_base = model['idBase'].astype(np.float32)
-        # expression basis. [3*N,64]
-        self.exp_base = model['exBase'].astype(np.float32)
-        # mean face texture. [3*N,1] (0-255)
-        self.mean_tex = model['meantex'].astype(np.float32)
-        # texture basis. [3*N,80]
-        self.tex_base = model['texBase'].astype(np.float32)
-        # face indices for each vertex that lies in. starts from 0. [N,8]
-        self.point_buf = model['point_buf'].astype(np.int64) - 1
-        # vertex indices for each face. starts from 0. [F,3]
-        self.face_buf = model['tri'].astype(np.int64) - 1
-        # vertex indices for 68 landmarks. starts from 0. [68,1]
-        self.keypoints = np.squeeze(model['keypoints']).astype(np.int64) - 1
-
-        if is_train:
-            # vertex indices for small face region to compute photometric error. starts from 0.
-            self.front_mask = np.squeeze(model['frontmask2_idx']).astype(np.int64) - 1
-            # vertex indices for each face from small face region. starts from 0. [f,3]
-            self.front_face_buf = model['tri_mask2'].astype(np.int64) - 1
-            # vertex indices for pre-defined skin region to compute reflectance loss
-            self.skin_mask = np.squeeze(model['skinmask'])
-        
-        if recenter:
-            mean_shape = self.mean_shape.reshape([-1, 3])
-            mean_shape = mean_shape - np.mean(mean_shape, axis=0, keepdims=True)
-            self.mean_shape = mean_shape.reshape([-1, 1])
-
-        self.persc_proj = perspective_projection(focal, center)
-        self.device = 'cpu'
-        self.camera_distance = camera_distance
-        self.SH = SH()
-        self.init_lit = init_lit.reshape([1, 1, -1]).astype(np.float32)
-        
-
-    def to(self, device):
-        self.device = device
-        for key, value in self.__dict__.items():
-            if type(value).__module__ == np.__name__:
-                setattr(self, key, torch.tensor(value).to(device))
-
-    
-    def compute_shape(self, id_coeff, exp_coeff):
-        """
-        Return:
-            face_shape       -- torch.tensor, size (B, N, 3)
-
-        Parameters:
-            id_coeff         -- torch.tensor, size (B, 80), identity coeffs
-            exp_coeff        -- torch.tensor, size (B, 64), expression coeffs
-        """
-        batch_size = id_coeff.shape[0]
-        id_part = torch.einsum('ij,aj->ai', self.id_base, id_coeff)
-        exp_part = torch.einsum('ij,aj->ai', self.exp_base, exp_coeff)
-        face_shape = id_part + exp_part + self.mean_shape.reshape([1, -1])
-        return face_shape.reshape([batch_size, -1, 3])
-    
-
-    def compute_texture(self, tex_coeff, normalize=True):
-        """
-        Return:
-            face_texture     -- torch.tensor, size (B, N, 3), in RGB order, range (0, 1.)
-
-        Parameters:
-            tex_coeff        -- torch.tensor, size (B, 80)
-        """
-        batch_size = tex_coeff.shape[0]
-        face_texture = torch.einsum('ij,aj->ai', self.tex_base, tex_coeff) + self.mean_tex
-        if normalize:
-            face_texture = face_texture / 255.
-        return face_texture.reshape([batch_size, -1, 3])
-
-
-    def compute_norm(self, face_shape):
-        """
-        Return:
-            vertex_norm      -- torch.tensor, size (B, N, 3)
-
-        Parameters:
-            face_shape       -- torch.tensor, size (B, N, 3)
-        """
-
-        v1 = face_shape[:, self.face_buf[:, 0]]
-        v2 = face_shape[:, self.face_buf[:, 1]]
-        v3 = face_shape[:, self.face_buf[:, 2]]
-        e1 = v1 - v2
-        e2 = v2 - v3
-        face_norm = torch.cross(e1, e2, dim=-1)
-        face_norm = F.normalize(face_norm, dim=-1, p=2)
-        face_norm = torch.cat([face_norm, torch.zeros(face_norm.shape[0], 1, 3).to(self.device)], dim=1)
-        
-        vertex_norm = torch.sum(face_norm[:, self.point_buf], dim=2)
-        vertex_norm = F.normalize(vertex_norm, dim=-1, p=2)
-        return vertex_norm
-
-
-    def compute_color(self, face_texture, face_norm, gamma):
-        """
-        Return:
-            face_color       -- torch.tensor, size (B, N, 3), range (0, 1.)
-
-        Parameters:
-            face_texture     -- torch.tensor, size (B, N, 3), from texture model, range (0, 1.)
-            face_norm        -- torch.tensor, size (B, N, 3), rotated face normal
-            gamma            -- torch.tensor, size (B, 27), SH coeffs
-        """
-        batch_size = gamma.shape[0]
-        v_num = face_texture.shape[1]
-        a, c = self.SH.a, self.SH.c
-        gamma = gamma.reshape([batch_size, 3, 9])
-        gamma = gamma + self.init_lit
-        gamma = gamma.permute(0, 2, 1)
-        Y = torch.cat([
-             a[0] * c[0] * torch.ones_like(face_norm[..., :1]).to(self.device),
-            -a[1] * c[1] * face_norm[..., 1:2],
-             a[1] * c[1] * face_norm[..., 2:],
-            -a[1] * c[1] * face_norm[..., :1],
-             a[2] * c[2] * face_norm[..., :1] * face_norm[..., 1:2],
-            -a[2] * c[2] * face_norm[..., 1:2] * face_norm[..., 2:],
-            0.5 * a[2] * c[2] / np.sqrt(3.) * (3 * face_norm[..., 2:] ** 2 - 1),
-            -a[2] * c[2] * face_norm[..., :1] * face_norm[..., 2:],
-            0.5 * a[2] * c[2] * (face_norm[..., :1] ** 2  - face_norm[..., 1:2] ** 2)
-        ], dim=-1)
-        r = Y @ gamma[..., :1]
-        g = Y @ gamma[..., 1:2]
-        b = Y @ gamma[..., 2:]
-        face_color = torch.cat([r, g, b], dim=-1) * face_texture
-        return face_color
-
-    
-    def compute_rotation(self, angles):
-        """
-        Return:
-            rot              -- torch.tensor, size (B, 3, 3) pts @ trans_mat
-
-        Parameters:
-            angles           -- torch.tensor, size (B, 3), radian
-        """
-
-        batch_size = angles.shape[0]
-        ones = torch.ones([batch_size, 1]).to(self.device)
-        zeros = torch.zeros([batch_size, 1]).to(self.device)
-        x, y, z = angles[:, :1], angles[:, 1:2], angles[:, 2:],
-        
-        rot_x = torch.cat([
-            ones, zeros, zeros,
-            zeros, torch.cos(x), -torch.sin(x), 
-            zeros, torch.sin(x), torch.cos(x)
-        ], dim=1).reshape([batch_size, 3, 3])
-        
-        rot_y = torch.cat([
-            torch.cos(y), zeros, torch.sin(y),
-            zeros, ones, zeros,
-            -torch.sin(y), zeros, torch.cos(y)
-        ], dim=1).reshape([batch_size, 3, 3])
-
-        rot_z = torch.cat([
-            torch.cos(z), -torch.sin(z), zeros,
-            torch.sin(z), torch.cos(z), zeros,
-            zeros, zeros, ones
-        ], dim=1).reshape([batch_size, 3, 3])
-
-        rot = rot_z @ rot_y @ rot_x
-        return rot.permute(0, 2, 1)
-
-
-    def to_camera(self, face_shape):
-        face_shape[..., -1] = self.camera_distance - face_shape[..., -1]
-        return face_shape
-
-    def to_image(self, face_shape):
-        """
-        Return:
-            face_proj        -- torch.tensor, size (B, N, 2), y direction is opposite to v direction
-
-        Parameters:
-            face_shape       -- torch.tensor, size (B, N, 3)
-        """
-        # to image_plane
-        face_proj = face_shape @ self.persc_proj
-        face_proj = face_proj[..., :2] / face_proj[..., 2:]
-
-        return face_proj
-
-
-    def transform(self, face_shape, rot, trans):
-        """
-        Return:
-            face_shape       -- torch.tensor, size (B, N, 3) pts @ rot + trans
-
-        Parameters:
-            face_shape       -- torch.tensor, size (B, N, 3)
-            rot              -- torch.tensor, size (B, 3, 3)
-            trans            -- torch.tensor, size (B, 3)
-        """
-        return face_shape @ rot + trans.unsqueeze(1)
-
-
-    def get_landmarks(self, face_proj):
-        """
-        Return:
-            face_lms         -- torch.tensor, size (B, 68, 2)
-
-        Parameters:
-            face_proj       -- torch.tensor, size (B, N, 2)
-        """  
-        return face_proj[:, self.keypoints]
-
-    def split_coeff(self, coeffs):
-        """
-        Return:
-            coeffs_dict     -- a dict of torch.tensors
-
-        Parameters:
-            coeffs          -- torch.tensor, size (B, 256)
-        """
-        id_coeffs = coeffs[:, :80]
-        exp_coeffs = coeffs[:, 80: 144]
-        tex_coeffs = coeffs[:, 144: 224]
-        angles = coeffs[:, 224: 227]
-        gammas = coeffs[:, 227: 254]
-        translations = coeffs[:, 254:]
-        return {
-            'id': id_coeffs,
-            'exp': exp_coeffs,
-            'tex': tex_coeffs,
-            'angle': angles,
-            'gamma': gammas,
-            'trans': translations
-        }
-    def compute_for_render(self, coeffs):
-        """
-        Return:
-            face_vertex     -- torch.tensor, size (B, N, 3), in camera coordinate
-            face_color      -- torch.tensor, size (B, N, 3), in RGB order
-            landmark        -- torch.tensor, size (B, 68, 2), y direction is opposite to v direction
-        Parameters:
-            coeffs          -- torch.tensor, size (B, 257)
-        """
-        coef_dict = self.split_coeff(coeffs)
-        face_shape = self.compute_shape(coef_dict['id'], coef_dict['exp'])
-        rotation = self.compute_rotation(coef_dict['angle'])
-
-
-        face_shape_transformed = self.transform(face_shape, rotation, coef_dict['trans'])
-        face_vertex = self.to_camera(face_shape_transformed)
-        
-        face_proj = self.to_image(face_vertex)
-        landmark = self.get_landmarks(face_proj)
-
-        face_texture = self.compute_texture(coef_dict['tex'])
-        face_norm = self.compute_norm(face_shape)
-        face_norm_roted = face_norm @ rotation
-        face_color = self.compute_color(face_texture, face_norm_roted, coef_dict['gamma'])
-
-        return face_vertex, face_texture, face_color, landmark
-
-    def compute_for_render_woRotation(self, coeffs):
-        """
-        Return:
-            face_vertex     -- torch.tensor, size (B, N, 3), in camera coordinate
-            face_color      -- torch.tensor, size (B, N, 3), in RGB order
-            landmark        -- torch.tensor, size (B, 68, 2), y direction is opposite to v direction
-        Parameters:
-            coeffs          -- torch.tensor, size (B, 257)
-        """
-        coef_dict = self.split_coeff(coeffs)
-        face_shape = self.compute_shape(coef_dict['id'], coef_dict['exp'])
-        #rotation = self.compute_rotation(coef_dict['angle'])
-
-
-        #face_shape_transformed = self.transform(face_shape, rotation, coef_dict['trans'])
-        face_vertex = self.to_camera(face_shape)
-        
-        face_proj = self.to_image(face_vertex)
-        landmark = self.get_landmarks(face_proj)
-
-        face_texture = self.compute_texture(coef_dict['tex'])
-        face_norm = self.compute_norm(face_shape)
-        face_norm_roted = face_norm                                    # @ rotation
-        face_color = self.compute_color(face_texture, face_norm_roted, coef_dict['gamma'])
-
-        return face_vertex, face_texture, face_color, landmark
-
-
-if __name__ == '__main__':
-    transferBFM09()
\ No newline at end of file
diff --git a/src/src/face3d/models/facerecon_model.py b/src/src/face3d/models/facerecon_model.py
deleted file mode 100644
index 7de8ca6eebc50ff1ed52c5ba37d31b43f977b5e1..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/facerecon_model.py
+++ /dev/null
@@ -1,220 +0,0 @@
-"""This script defines the face reconstruction model for Deep3DFaceRecon_pytorch
-"""
-
-import numpy as np
-import torch
-from src.face3d.models.base_model import BaseModel
-from src.face3d.models import networks
-from src.face3d.models.bfm import ParametricFaceModel
-from src.face3d.models.losses import perceptual_loss, photo_loss, reg_loss, reflectance_loss, landmark_loss
-from src.face3d.util import util 
-from src.face3d.util.nvdiffrast import MeshRenderer
-# from src.face3d.util.preprocess import estimate_norm_torch
-
-import trimesh
-from scipy.io import savemat
-
-class FaceReconModel(BaseModel):
-
-    @staticmethod
-    def modify_commandline_options(parser, is_train=False):
-        """  Configures options specific for CUT model
-        """
-        # net structure and parameters
-        parser.add_argument('--net_recon', type=str, default='resnet50', choices=['resnet18', 'resnet34', 'resnet50'], help='network structure')
-        parser.add_argument('--init_path', type=str, default='./checkpoints/init_model/resnet50-0676ba61.pth')
-        parser.add_argument('--use_last_fc', type=util.str2bool, nargs='?', const=True, default=False, help='zero initialize the last fc')
-        parser.add_argument('--bfm_folder', type=str, default='./checkpoints/BFM_Fitting/')
-        parser.add_argument('--bfm_model', type=str, default='BFM_model_front.mat', help='bfm model')
-
-        # renderer parameters
-        parser.add_argument('--focal', type=float, default=1015.)
-        parser.add_argument('--center', type=float, default=112.)
-        parser.add_argument('--camera_d', type=float, default=10.)
-        parser.add_argument('--z_near', type=float, default=5.)
-        parser.add_argument('--z_far', type=float, default=15.)
-
-        if is_train:
-            # training parameters
-            parser.add_argument('--net_recog', type=str, default='r50', choices=['r18', 'r43', 'r50'], help='face recog network structure')
-            parser.add_argument('--net_recog_path', type=str, default='checkpoints/recog_model/ms1mv3_arcface_r50_fp16/backbone.pth')
-            parser.add_argument('--use_crop_face', type=util.str2bool, nargs='?', const=True, default=False, help='use crop mask for photo loss')
-            parser.add_argument('--use_predef_M', type=util.str2bool, nargs='?', const=True, default=False, help='use predefined M for predicted face')
-
-            
-            # augmentation parameters
-            parser.add_argument('--shift_pixs', type=float, default=10., help='shift pixels')
-            parser.add_argument('--scale_delta', type=float, default=0.1, help='delta scale factor')
-            parser.add_argument('--rot_angle', type=float, default=10., help='rot angles, degree')
-
-            # loss weights
-            parser.add_argument('--w_feat', type=float, default=0.2, help='weight for feat loss')
-            parser.add_argument('--w_color', type=float, default=1.92, help='weight for loss loss')
-            parser.add_argument('--w_reg', type=float, default=3.0e-4, help='weight for reg loss')
-            parser.add_argument('--w_id', type=float, default=1.0, help='weight for id_reg loss')
-            parser.add_argument('--w_exp', type=float, default=0.8, help='weight for exp_reg loss')
-            parser.add_argument('--w_tex', type=float, default=1.7e-2, help='weight for tex_reg loss')
-            parser.add_argument('--w_gamma', type=float, default=10.0, help='weight for gamma loss')
-            parser.add_argument('--w_lm', type=float, default=1.6e-3, help='weight for lm loss')
-            parser.add_argument('--w_reflc', type=float, default=5.0, help='weight for reflc loss')
-
-        opt, _ = parser.parse_known_args()
-        parser.set_defaults(
-                focal=1015., center=112., camera_d=10., use_last_fc=False, z_near=5., z_far=15.
-            )
-        if is_train:
-            parser.set_defaults(
-                use_crop_face=True, use_predef_M=False
-            )
-        return parser
-
-    def __init__(self, opt):
-        """Initialize this model class.
-
-        Parameters:
-            opt -- training/test options
-
-        A few things can be done here.
-        - (required) call the initialization function of BaseModel
-        - define loss function, visualization images, model names, and optimizers
-        """
-        BaseModel.__init__(self, opt)  # call the initialization method of BaseModel
-        
-        self.visual_names = ['output_vis']
-        self.model_names = ['net_recon']
-        self.parallel_names = self.model_names + ['renderer']
-
-        self.facemodel = ParametricFaceModel(
-            bfm_folder=opt.bfm_folder, camera_distance=opt.camera_d, focal=opt.focal, center=opt.center,
-            is_train=self.isTrain, default_name=opt.bfm_model
-        )
-        
-        fov = 2 * np.arctan(opt.center / opt.focal) * 180 / np.pi
-        self.renderer = MeshRenderer(
-            rasterize_fov=fov, znear=opt.z_near, zfar=opt.z_far, rasterize_size=int(2 * opt.center)
-        )
-
-        if self.isTrain:
-            self.loss_names = ['all', 'feat', 'color', 'lm', 'reg', 'gamma', 'reflc']
-
-            self.net_recog = networks.define_net_recog(
-                net_recog=opt.net_recog, pretrained_path=opt.net_recog_path
-                )
-            # loss func name: (compute_%s_loss) % loss_name
-            self.compute_feat_loss = perceptual_loss
-            self.comupte_color_loss = photo_loss
-            self.compute_lm_loss = landmark_loss
-            self.compute_reg_loss = reg_loss
-            self.compute_reflc_loss = reflectance_loss
-
-            self.optimizer = torch.optim.Adam(self.net_recon.parameters(), lr=opt.lr)
-            self.optimizers = [self.optimizer]
-            self.parallel_names += ['net_recog']
-        # Our program will automatically call <model.setup> to define schedulers, load networks, and print networks
-
-    def set_input(self, input):
-        """Unpack input data from the dataloader and perform necessary pre-processing steps.
-
-        Parameters:
-            input: a dictionary that contains the data itself and its metadata information.
-        """
-        self.input_img = input['imgs'].to(self.device) 
-        self.atten_mask = input['msks'].to(self.device) if 'msks' in input else None
-        self.gt_lm = input['lms'].to(self.device)  if 'lms' in input else None
-        self.trans_m = input['M'].to(self.device) if 'M' in input else None
-        self.image_paths = input['im_paths'] if 'im_paths' in input else None
-
-    def forward(self, output_coeff, device):
-        self.facemodel.to(device)
-        self.pred_vertex, self.pred_tex, self.pred_color, self.pred_lm = \
-            self.facemodel.compute_for_render(output_coeff)
-        self.pred_mask, _, self.pred_face = self.renderer(
-            self.pred_vertex, self.facemodel.face_buf, feat=self.pred_color)
-        
-        self.pred_coeffs_dict = self.facemodel.split_coeff(output_coeff)
-
-
-    def compute_losses(self):
-        """Calculate losses, gradients, and update network weights; called in every training iteration"""
-
-        assert self.net_recog.training == False
-        trans_m = self.trans_m
-        if not self.opt.use_predef_M:
-            trans_m = estimate_norm_torch(self.pred_lm, self.input_img.shape[-2])
-
-        pred_feat = self.net_recog(self.pred_face, trans_m)
-        gt_feat = self.net_recog(self.input_img, self.trans_m)
-        self.loss_feat = self.opt.w_feat * self.compute_feat_loss(pred_feat, gt_feat)
-
-        face_mask = self.pred_mask
-        if self.opt.use_crop_face:
-            face_mask, _, _ = self.renderer(self.pred_vertex, self.facemodel.front_face_buf)
-        
-        face_mask = face_mask.detach()
-        self.loss_color = self.opt.w_color * self.comupte_color_loss(
-            self.pred_face, self.input_img, self.atten_mask * face_mask)
-        
-        loss_reg, loss_gamma = self.compute_reg_loss(self.pred_coeffs_dict, self.opt)
-        self.loss_reg = self.opt.w_reg * loss_reg
-        self.loss_gamma = self.opt.w_gamma * loss_gamma
-
-        self.loss_lm = self.opt.w_lm * self.compute_lm_loss(self.pred_lm, self.gt_lm)
-
-        self.loss_reflc = self.opt.w_reflc * self.compute_reflc_loss(self.pred_tex, self.facemodel.skin_mask)
-
-        self.loss_all = self.loss_feat + self.loss_color + self.loss_reg + self.loss_gamma \
-                        + self.loss_lm + self.loss_reflc
-            
-
-    def optimize_parameters(self, isTrain=True):
-        self.forward()               
-        self.compute_losses()
-        """Update network weights; it will be called in every training iteration."""
-        if isTrain:
-            self.optimizer.zero_grad()  
-            self.loss_all.backward()         
-            self.optimizer.step()        
-
-    def compute_visuals(self):
-        with torch.no_grad():
-            input_img_numpy = 255. * self.input_img.detach().cpu().permute(0, 2, 3, 1).numpy()
-            output_vis = self.pred_face * self.pred_mask + (1 - self.pred_mask) * self.input_img
-            output_vis_numpy_raw = 255. * output_vis.detach().cpu().permute(0, 2, 3, 1).numpy()
-            
-            if self.gt_lm is not None:
-                gt_lm_numpy = self.gt_lm.cpu().numpy()
-                pred_lm_numpy = self.pred_lm.detach().cpu().numpy()
-                output_vis_numpy = util.draw_landmarks(output_vis_numpy_raw, gt_lm_numpy, 'b')
-                output_vis_numpy = util.draw_landmarks(output_vis_numpy, pred_lm_numpy, 'r')
-            
-                output_vis_numpy = np.concatenate((input_img_numpy, 
-                                    output_vis_numpy_raw, output_vis_numpy), axis=-2)
-            else:
-                output_vis_numpy = np.concatenate((input_img_numpy, 
-                                    output_vis_numpy_raw), axis=-2)
-
-            self.output_vis = torch.tensor(
-                    output_vis_numpy / 255., dtype=torch.float32
-                ).permute(0, 3, 1, 2).to(self.device)
-
-    def save_mesh(self, name):
-
-        recon_shape = self.pred_vertex  # get reconstructed shape
-        recon_shape[..., -1] = 10 - recon_shape[..., -1] # from camera space to world space
-        recon_shape = recon_shape.cpu().numpy()[0]
-        recon_color = self.pred_color
-        recon_color = recon_color.cpu().numpy()[0]
-        tri = self.facemodel.face_buf.cpu().numpy()
-        mesh = trimesh.Trimesh(vertices=recon_shape, faces=tri, vertex_colors=np.clip(255. * recon_color, 0, 255).astype(np.uint8))
-        mesh.export(name)
-
-    def save_coeff(self,name):
-
-        pred_coeffs = {key:self.pred_coeffs_dict[key].cpu().numpy() for key in self.pred_coeffs_dict}
-        pred_lm = self.pred_lm.cpu().numpy()
-        pred_lm = np.stack([pred_lm[:,:,0],self.input_img.shape[2]-1-pred_lm[:,:,1]],axis=2) # transfer to image coordinate
-        pred_coeffs['lm68'] = pred_lm
-        savemat(name,pred_coeffs)
-
-
-
diff --git a/src/src/face3d/models/losses.py b/src/src/face3d/models/losses.py
deleted file mode 100644
index 09d6a85870af1ef2b857e4a3fdd4b2f7fc991317..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/losses.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import numpy as np
-import torch
-import torch.nn as nn
-from kornia.geometry import warp_affine
-import torch.nn.functional as F
-
-def resize_n_crop(image, M, dsize=112):
-    # image: (b, c, h, w)
-    # M   :  (b, 2, 3)
-    return warp_affine(image, M, dsize=(dsize, dsize), align_corners=True)
-
-### perceptual level loss
-class PerceptualLoss(nn.Module):
-    def __init__(self, recog_net, input_size=112):
-        super(PerceptualLoss, self).__init__()
-        self.recog_net = recog_net
-        self.preprocess = lambda x: 2 * x - 1
-        self.input_size=input_size
-    def forward(imageA, imageB, M):
-        """
-        1 - cosine distance
-        Parameters:
-            imageA       --torch.tensor (B, 3, H, W), range (0, 1) , RGB order
-            imageB       --same as imageA
-        """
-
-        imageA = self.preprocess(resize_n_crop(imageA, M, self.input_size))
-        imageB = self.preprocess(resize_n_crop(imageB, M, self.input_size))
-
-        # freeze bn
-        self.recog_net.eval()
-        
-        id_featureA = F.normalize(self.recog_net(imageA), dim=-1, p=2)
-        id_featureB = F.normalize(self.recog_net(imageB), dim=-1, p=2)  
-        cosine_d = torch.sum(id_featureA * id_featureB, dim=-1)
-        # assert torch.sum((cosine_d > 1).float()) == 0
-        return torch.sum(1 - cosine_d) / cosine_d.shape[0]        
-
-def perceptual_loss(id_featureA, id_featureB):
-    cosine_d = torch.sum(id_featureA * id_featureB, dim=-1)
-        # assert torch.sum((cosine_d > 1).float()) == 0
-    return torch.sum(1 - cosine_d) / cosine_d.shape[0]  
-
-### image level loss
-def photo_loss(imageA, imageB, mask, eps=1e-6):
-    """
-    l2 norm (with sqrt, to ensure backward stabililty, use eps, otherwise Nan may occur)
-    Parameters:
-        imageA       --torch.tensor (B, 3, H, W), range (0, 1), RGB order 
-        imageB       --same as imageA
-    """
-    loss = torch.sqrt(eps + torch.sum((imageA - imageB) ** 2, dim=1, keepdims=True)) * mask
-    loss = torch.sum(loss) / torch.max(torch.sum(mask), torch.tensor(1.0).to(mask.device))
-    return loss
-
-def landmark_loss(predict_lm, gt_lm, weight=None):
-    """
-    weighted mse loss
-    Parameters:
-        predict_lm    --torch.tensor (B, 68, 2)
-        gt_lm         --torch.tensor (B, 68, 2)
-        weight        --numpy.array (1, 68)
-    """
-    if not weight:
-        weight = np.ones([68])
-        weight[28:31] = 20
-        weight[-8:] = 20
-        weight = np.expand_dims(weight, 0)
-        weight = torch.tensor(weight).to(predict_lm.device)
-    loss = torch.sum((predict_lm - gt_lm)**2, dim=-1) * weight
-    loss = torch.sum(loss) / (predict_lm.shape[0] * predict_lm.shape[1])
-    return loss
-
-
-### regulization
-def reg_loss(coeffs_dict, opt=None):
-    """
-    l2 norm without the sqrt, from yu's implementation (mse)
-    tf.nn.l2_loss https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss
-    Parameters:
-        coeffs_dict     -- a  dict of torch.tensors , keys: id, exp, tex, angle, gamma, trans
-
-    """
-    # coefficient regularization to ensure plausible 3d faces
-    if opt:
-        w_id, w_exp, w_tex = opt.w_id, opt.w_exp, opt.w_tex
-    else:
-        w_id, w_exp, w_tex = 1, 1, 1, 1
-    creg_loss = w_id * torch.sum(coeffs_dict['id'] ** 2) +  \
-           w_exp * torch.sum(coeffs_dict['exp'] ** 2) + \
-           w_tex * torch.sum(coeffs_dict['tex'] ** 2)
-    creg_loss = creg_loss / coeffs_dict['id'].shape[0]
-
-    # gamma regularization to ensure a nearly-monochromatic light
-    gamma = coeffs_dict['gamma'].reshape([-1, 3, 9])
-    gamma_mean = torch.mean(gamma, dim=1, keepdims=True)
-    gamma_loss = torch.mean((gamma - gamma_mean) ** 2)
-
-    return creg_loss, gamma_loss
-
-def reflectance_loss(texture, mask):
-    """
-    minimize texture variance (mse), albedo regularization to ensure an uniform skin albedo
-    Parameters:
-        texture       --torch.tensor, (B, N, 3)
-        mask          --torch.tensor, (N), 1 or 0
-
-    """
-    mask = mask.reshape([1, mask.shape[0], 1])
-    texture_mean = torch.sum(mask * texture, dim=1, keepdims=True) / torch.sum(mask)
-    loss = torch.sum(((texture - texture_mean) * mask)**2) / (texture.shape[0] * torch.sum(mask))
-    return loss
-
diff --git a/src/src/face3d/models/networks.py b/src/src/face3d/models/networks.py
deleted file mode 100644
index ead9cdcb8720b845c233de79dc8a8d1668492108..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/networks.py
+++ /dev/null
@@ -1,521 +0,0 @@
-"""This script defines deep neural networks for Deep3DFaceRecon_pytorch
-"""
-
-import os
-import numpy as np
-import torch.nn.functional as F
-from torch.nn import init
-import functools
-from torch.optim import lr_scheduler
-import torch
-from torch import Tensor
-import torch.nn as nn
-try:
-    from torch.hub import load_state_dict_from_url
-except ImportError:
-    from torch.utils.model_zoo import load_url as load_state_dict_from_url
-from typing import Type, Any, Callable, Union, List, Optional
-from .arcface_torch.backbones import get_model
-from kornia.geometry import warp_affine
-
-def resize_n_crop(image, M, dsize=112):
-    # image: (b, c, h, w)
-    # M   :  (b, 2, 3)
-    return warp_affine(image, M, dsize=(dsize, dsize), align_corners=True)
-
-def filter_state_dict(state_dict, remove_name='fc'):
-    new_state_dict = {}
-    for key in state_dict:
-        if remove_name in key:
-            continue
-        new_state_dict[key] = state_dict[key]
-    return new_state_dict
-
-def get_scheduler(optimizer, opt):
-    """Return a learning rate scheduler
-
-    Parameters:
-        optimizer          -- the optimizer of the network
-        opt (option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions．　
-                              opt.lr_policy is the name of learning rate policy: linear | step | plateau | cosine
-
-    For other schedulers (step, plateau, and cosine), we use the default PyTorch schedulers.
-    See https://pytorch.org/docs/stable/optim.html for more details.
-    """
-    if opt.lr_policy == 'linear':
-        def lambda_rule(epoch):
-            lr_l = 1.0 - max(0, epoch + opt.epoch_count - opt.n_epochs) / float(opt.n_epochs + 1)
-            return lr_l
-        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)
-    elif opt.lr_policy == 'step':
-        scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_epochs, gamma=0.2)
-    elif opt.lr_policy == 'plateau':
-        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5)
-    elif opt.lr_policy == 'cosine':
-        scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=opt.n_epochs, eta_min=0)
-    else:
-        return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy)
-    return scheduler
-
-
-def define_net_recon(net_recon, use_last_fc=False, init_path=None):
-    return ReconNetWrapper(net_recon, use_last_fc=use_last_fc, init_path=init_path)
-
-def define_net_recog(net_recog, pretrained_path=None):
-    net = RecogNetWrapper(net_recog=net_recog, pretrained_path=pretrained_path)
-    net.eval()
-    return net
-
-class ReconNetWrapper(nn.Module):
-    fc_dim=257
-    def __init__(self, net_recon, use_last_fc=False, init_path=None):
-        super(ReconNetWrapper, self).__init__()
-        self.use_last_fc = use_last_fc
-        if net_recon not in func_dict:
-            return  NotImplementedError('network [%s] is not implemented', net_recon)
-        func, last_dim = func_dict[net_recon]
-        backbone = func(use_last_fc=use_last_fc, num_classes=self.fc_dim)
-        if init_path and os.path.isfile(init_path):
-            state_dict = filter_state_dict(torch.load(init_path, map_location='cpu'))
-            backbone.load_state_dict(state_dict)
-            print("loading init net_recon %s from %s" %(net_recon, init_path))
-        self.backbone = backbone
-        if not use_last_fc:
-            self.final_layers = nn.ModuleList([
-                conv1x1(last_dim, 80, bias=True), # id layer
-                conv1x1(last_dim, 64, bias=True), # exp layer
-                conv1x1(last_dim, 80, bias=True), # tex layer
-                conv1x1(last_dim, 3, bias=True),  # angle layer
-                conv1x1(last_dim, 27, bias=True), # gamma layer
-                conv1x1(last_dim, 2, bias=True),  # tx, ty
-                conv1x1(last_dim, 1, bias=True)   # tz
-            ])
-            for m in self.final_layers:
-                nn.init.constant_(m.weight, 0.)
-                nn.init.constant_(m.bias, 0.)
-
-    def forward(self, x):
-        x = self.backbone(x)
-        if not self.use_last_fc:
-            output = []
-            for layer in self.final_layers:
-                output.append(layer(x))
-            x = torch.flatten(torch.cat(output, dim=1), 1)
-        return x
-
-
-class RecogNetWrapper(nn.Module):
-    def __init__(self, net_recog, pretrained_path=None, input_size=112):
-        super(RecogNetWrapper, self).__init__()
-        net = get_model(name=net_recog, fp16=False)
-        if pretrained_path:
-            state_dict = torch.load(pretrained_path, map_location='cpu')
-            net.load_state_dict(state_dict)
-            print("loading pretrained net_recog %s from %s" %(net_recog, pretrained_path))
-        for param in net.parameters():
-            param.requires_grad = False
-        self.net = net
-        self.preprocess = lambda x: 2 * x - 1
-        self.input_size=input_size
-        
-    def forward(self, image, M):
-        image = self.preprocess(resize_n_crop(image, M, self.input_size))
-        id_feature = F.normalize(self.net(image), dim=-1, p=2)
-        return id_feature
-
-
-# adapted from https://github.com/pytorch/vision/edit/master/torchvision/models/resnet.py
-__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
-           'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
-           'wide_resnet50_2', 'wide_resnet101_2']
-
-
-model_urls = {
-    'resnet18': 'https://download.pytorch.org/models/resnet18-f37072fd.pth',
-    'resnet34': 'https://download.pytorch.org/models/resnet34-b627a593.pth',
-    'resnet50': 'https://download.pytorch.org/models/resnet50-0676ba61.pth',
-    'resnet101': 'https://download.pytorch.org/models/resnet101-63fe2227.pth',
-    'resnet152': 'https://download.pytorch.org/models/resnet152-394f9c45.pth',
-    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
-    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
-    'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
-    'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
-}
-
-
-def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
-    """3x3 convolution with padding"""
-    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
-                     padding=dilation, groups=groups, bias=False, dilation=dilation)
-
-
-def conv1x1(in_planes: int, out_planes: int, stride: int = 1, bias: bool = False) -> nn.Conv2d:
-    """1x1 convolution"""
-    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=bias)
-
-
-class BasicBlock(nn.Module):
-    expansion: int = 1
-
-    def __init__(
-        self,
-        inplanes: int,
-        planes: int,
-        stride: int = 1,
-        downsample: Optional[nn.Module] = None,
-        groups: int = 1,
-        base_width: int = 64,
-        dilation: int = 1,
-        norm_layer: Optional[Callable[..., nn.Module]] = None
-    ) -> None:
-        super(BasicBlock, self).__init__()
-        if norm_layer is None:
-            norm_layer = nn.BatchNorm2d
-        if groups != 1 or base_width != 64:
-            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
-        if dilation > 1:
-            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
-        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
-        self.conv1 = conv3x3(inplanes, planes, stride)
-        self.bn1 = norm_layer(planes)
-        self.relu = nn.ReLU(inplace=True)
-        self.conv2 = conv3x3(planes, planes)
-        self.bn2 = norm_layer(planes)
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x: Tensor) -> Tensor:
-        identity = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-
-        if self.downsample is not None:
-            identity = self.downsample(x)
-
-        out += identity
-        out = self.relu(out)
-
-        return out
-
-
-class Bottleneck(nn.Module):
-    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
-    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
-    # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
-    # This variant is also known as ResNet V1.5 and improves accuracy according to
-    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
-
-    expansion: int = 4
-
-    def __init__(
-        self,
-        inplanes: int,
-        planes: int,
-        stride: int = 1,
-        downsample: Optional[nn.Module] = None,
-        groups: int = 1,
-        base_width: int = 64,
-        dilation: int = 1,
-        norm_layer: Optional[Callable[..., nn.Module]] = None
-    ) -> None:
-        super(Bottleneck, self).__init__()
-        if norm_layer is None:
-            norm_layer = nn.BatchNorm2d
-        width = int(planes * (base_width / 64.)) * groups
-        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
-        self.conv1 = conv1x1(inplanes, width)
-        self.bn1 = norm_layer(width)
-        self.conv2 = conv3x3(width, width, stride, groups, dilation)
-        self.bn2 = norm_layer(width)
-        self.conv3 = conv1x1(width, planes * self.expansion)
-        self.bn3 = norm_layer(planes * self.expansion)
-        self.relu = nn.ReLU(inplace=True)
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x: Tensor) -> Tensor:
-        identity = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-        out = self.relu(out)
-
-        out = self.conv3(out)
-        out = self.bn3(out)
-
-        if self.downsample is not None:
-            identity = self.downsample(x)
-
-        out += identity
-        out = self.relu(out)
-
-        return out
-
-
-class ResNet(nn.Module):
-
-    def __init__(
-        self,
-        block: Type[Union[BasicBlock, Bottleneck]],
-        layers: List[int],
-        num_classes: int = 1000,
-        zero_init_residual: bool = False,
-        use_last_fc: bool = False,
-        groups: int = 1,
-        width_per_group: int = 64,
-        replace_stride_with_dilation: Optional[List[bool]] = None,
-        norm_layer: Optional[Callable[..., nn.Module]] = None
-    ) -> None:
-        super(ResNet, self).__init__()
-        if norm_layer is None:
-            norm_layer = nn.BatchNorm2d
-        self._norm_layer = norm_layer
-
-        self.inplanes = 64
-        self.dilation = 1
-        if replace_stride_with_dilation is None:
-            # each element in the tuple indicates if we should replace
-            # the 2x2 stride with a dilated convolution instead
-            replace_stride_with_dilation = [False, False, False]
-        if len(replace_stride_with_dilation) != 3:
-            raise ValueError("replace_stride_with_dilation should be None "
-                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
-        self.use_last_fc = use_last_fc
-        self.groups = groups
-        self.base_width = width_per_group
-        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
-                               bias=False)
-        self.bn1 = norm_layer(self.inplanes)
-        self.relu = nn.ReLU(inplace=True)
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        self.layer1 = self._make_layer(block, 64, layers[0])
-        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
-                                       dilate=replace_stride_with_dilation[0])
-        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
-                                       dilate=replace_stride_with_dilation[1])
-        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
-                                       dilate=replace_stride_with_dilation[2])
-        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
-        
-        if self.use_last_fc:
-            self.fc = nn.Linear(512 * block.expansion, num_classes)
-
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
-            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
-                nn.init.constant_(m.weight, 1)
-                nn.init.constant_(m.bias, 0)
-
-
-
-        # Zero-initialize the last BN in each residual branch,
-        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
-        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
-        if zero_init_residual:
-            for m in self.modules():
-                if isinstance(m, Bottleneck):
-                    nn.init.constant_(m.bn3.weight, 0)  # type: ignore[arg-type]
-                elif isinstance(m, BasicBlock):
-                    nn.init.constant_(m.bn2.weight, 0)  # type: ignore[arg-type]
-
-    def _make_layer(self, block: Type[Union[BasicBlock, Bottleneck]], planes: int, blocks: int,
-                    stride: int = 1, dilate: bool = False) -> nn.Sequential:
-        norm_layer = self._norm_layer
-        downsample = None
-        previous_dilation = self.dilation
-        if dilate:
-            self.dilation *= stride
-            stride = 1
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                conv1x1(self.inplanes, planes * block.expansion, stride),
-                norm_layer(planes * block.expansion),
-            )
-
-        layers = []
-        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
-                            self.base_width, previous_dilation, norm_layer))
-        self.inplanes = planes * block.expansion
-        for _ in range(1, blocks):
-            layers.append(block(self.inplanes, planes, groups=self.groups,
-                                base_width=self.base_width, dilation=self.dilation,
-                                norm_layer=norm_layer))
-
-        return nn.Sequential(*layers)
-
-    def _forward_impl(self, x: Tensor) -> Tensor:
-        # See note [TorchScript super()]
-        x = self.conv1(x)
-        x = self.bn1(x)
-        x = self.relu(x)
-        x = self.maxpool(x)
-
-        x = self.layer1(x)
-        x = self.layer2(x)
-        x = self.layer3(x)
-        x = self.layer4(x)
-
-        x = self.avgpool(x)
-        if self.use_last_fc:
-            x = torch.flatten(x, 1)
-            x = self.fc(x)
-        return x
-
-    def forward(self, x: Tensor) -> Tensor:
-        return self._forward_impl(x)
-
-
-def _resnet(
-    arch: str,
-    block: Type[Union[BasicBlock, Bottleneck]],
-    layers: List[int],
-    pretrained: bool,
-    progress: bool,
-    **kwargs: Any
-) -> ResNet:
-    model = ResNet(block, layers, **kwargs)
-    if pretrained:
-        state_dict = load_state_dict_from_url(model_urls[arch],
-                                              progress=progress)
-        model.load_state_dict(state_dict)
-    return model
-
-
-def resnet18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
-    r"""ResNet-18 model from
-    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
-
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
-    """
-    return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
-                   **kwargs)
-
-
-def resnet34(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
-    r"""ResNet-34 model from
-    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
-
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
-    """
-    return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
-                   **kwargs)
-
-
-def resnet50(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
-    r"""ResNet-50 model from
-    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
-
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
-    """
-    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
-                   **kwargs)
-
-
-def resnet101(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
-    r"""ResNet-101 model from
-    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
-
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
-    """
-    return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
-                   **kwargs)
-
-
-def resnet152(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
-    r"""ResNet-152 model from
-    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
-
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
-    """
-    return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
-                   **kwargs)
-
-
-def resnext50_32x4d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
-    r"""ResNeXt-50 32x4d model from
-    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_.
-
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
-    """
-    kwargs['groups'] = 32
-    kwargs['width_per_group'] = 4
-    return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
-                   pretrained, progress, **kwargs)
-
-
-def resnext101_32x8d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
-    r"""ResNeXt-101 32x8d model from
-    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_.
-
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
-    """
-    kwargs['groups'] = 32
-    kwargs['width_per_group'] = 8
-    return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
-                   pretrained, progress, **kwargs)
-
-
-def wide_resnet50_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
-    r"""Wide ResNet-50-2 model from
-    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_.
-
-    The model is the same as ResNet except for the bottleneck number of channels
-    which is twice larger in every block. The number of channels in outer 1x1
-    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
-    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
-
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
-    """
-    kwargs['width_per_group'] = 64 * 2
-    return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
-                   pretrained, progress, **kwargs)
-
-
-def wide_resnet101_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
-    r"""Wide ResNet-101-2 model from
-    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_.
-
-    The model is the same as ResNet except for the bottleneck number of channels
-    which is twice larger in every block. The number of channels in outer 1x1
-    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
-    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
-
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-        progress (bool): If True, displays a progress bar of the download to stderr
-    """
-    kwargs['width_per_group'] = 64 * 2
-    return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
-                   pretrained, progress, **kwargs)
-
-
-func_dict = {
-    'resnet18': (resnet18, 512),
-    'resnet50': (resnet50, 2048)
-}
diff --git a/src/src/face3d/models/template_model.py b/src/src/face3d/models/template_model.py
deleted file mode 100644
index dac7b33d5889777eb63c9882a3b9fa094dcab293..0000000000000000000000000000000000000000
--- a/src/src/face3d/models/template_model.py
+++ /dev/null
@@ -1,100 +0,0 @@
-"""Model class template
-
-This module provides a template for users to implement custom models.
-You can specify '--model template' to use this model.
-The class name should be consistent with both the filename and its model option.
-The filename should be <model>_dataset.py
-The class name should be <Model>Dataset.py
-It implements a simple image-to-image translation baseline based on regression loss.
-Given input-output pairs (data_A, data_B), it learns a network netG that can minimize the following L1 loss:
-    min_<netG> ||netG(data_A) - data_B||_1
-You need to implement the following functions:
-    <modify_commandline_options>:　Add model-specific options and rewrite default values for existing options.
-    <__init__>: Initialize this model class.
-    <set_input>: Unpack input data and perform data pre-processing.
-    <forward>: Run forward pass. This will be called by both <optimize_parameters> and <test>.
-    <optimize_parameters>: Update network weights; it will be called in every training iteration.
-"""
-import numpy as np
-import torch
-from .base_model import BaseModel
-from . import networks
-
-
-class TemplateModel(BaseModel):
-    @staticmethod
-    def modify_commandline_options(parser, is_train=True):
-        """Add new model-specific options and rewrite default values for existing options.
-
-        Parameters:
-            parser -- the option parser
-            is_train -- if it is training phase or test phase. You can use this flag to add training-specific or test-specific options.
-
-        Returns:
-            the modified parser.
-        """
-        parser.set_defaults(dataset_mode='aligned')  # You can rewrite default values for this model. For example, this model usually uses aligned dataset as its dataset.
-        if is_train:
-            parser.add_argument('--lambda_regression', type=float, default=1.0, help='weight for the regression loss')  # You can define new arguments for this model.
-
-        return parser
-
-    def __init__(self, opt):
-        """Initialize this model class.
-
-        Parameters:
-            opt -- training/test options
-
-        A few things can be done here.
-        - (required) call the initialization function of BaseModel
-        - define loss function, visualization images, model names, and optimizers
-        """
-        BaseModel.__init__(self, opt)  # call the initialization method of BaseModel
-        # specify the training losses you want to print out. The program will call base_model.get_current_losses to plot the losses to the console and save them to the disk.
-        self.loss_names = ['loss_G']
-        # specify the images you want to save and display. The program will call base_model.get_current_visuals to save and display these images.
-        self.visual_names = ['data_A', 'data_B', 'output']
-        # specify the models you want to save to the disk. The program will call base_model.save_networks and base_model.load_networks to save and load networks.
-        # you can use opt.isTrain to specify different behaviors for training and test. For example, some networks will not be used during test, and you don't need to load them.
-        self.model_names = ['G']
-        # define networks; you can use opt.isTrain to specify different behaviors for training and test.
-        self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, opt.netG, gpu_ids=self.gpu_ids)
-        if self.isTrain:  # only defined during training time
-            # define your loss functions. You can use losses provided by torch.nn such as torch.nn.L1Loss.
-            # We also provide a GANLoss class "networks.GANLoss". self.criterionGAN = networks.GANLoss().to(self.device)
-            self.criterionLoss = torch.nn.L1Loss()
-            # define and initialize optimizers. You can define one optimizer for each network.
-            # If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an example.
-            self.optimizer = torch.optim.Adam(self.netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
-            self.optimizers = [self.optimizer]
-
-        # Our program will automatically call <model.setup> to define schedulers, load networks, and print networks
-
-    def set_input(self, input):
-        """Unpack input data from the dataloader and perform necessary pre-processing steps.
-
-        Parameters:
-            input: a dictionary that contains the data itself and its metadata information.
-        """
-        AtoB = self.opt.direction == 'AtoB'  # use <direction> to swap data_A and data_B
-        self.data_A = input['A' if AtoB else 'B'].to(self.device)  # get image data A
-        self.data_B = input['B' if AtoB else 'A'].to(self.device)  # get image data B
-        self.image_paths = input['A_paths' if AtoB else 'B_paths']  # get image paths
-
-    def forward(self):
-        """Run forward pass. This will be called by both functions <optimize_parameters> and <test>."""
-        self.output = self.netG(self.data_A)  # generate output image given the input data_A
-
-    def backward(self):
-        """Calculate losses, gradients, and update network weights; called in every training iteration"""
-        # caculate the intermediate results if necessary; here self.output has been computed during function <forward>
-        # calculate loss given the input and intermediate results
-        self.loss_G = self.criterionLoss(self.output, self.data_B) * self.opt.lambda_regression
-        self.loss_G.backward()       # calculate gradients of network G w.r.t. loss_G
-
-    def optimize_parameters(self):
-        """Update network weights; it will be called in every training iteration."""
-        self.forward()               # first call forward to calculate intermediate results
-        self.optimizer.zero_grad()   # clear network G's existing gradients
-        self.backward()              # calculate gradients for network G
-        self.optimizer.step()        # update gradients for network G
diff --git a/src/src/face3d/options/__init__.py b/src/src/face3d/options/__init__.py
deleted file mode 100644
index e7eedebe54aa70169fd25951b3034d819e396c90..0000000000000000000000000000000000000000
--- a/src/src/face3d/options/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""This package options includes option modules: training options, test options, and basic options (used in both training and test)."""
diff --git a/src/src/face3d/options/base_options.py b/src/src/face3d/options/base_options.py
deleted file mode 100644
index d8f921d5a43434ae802a55a0fa3889c4b7ab9f6d..0000000000000000000000000000000000000000
--- a/src/src/face3d/options/base_options.py
+++ /dev/null
@@ -1,169 +0,0 @@
-"""This script contains base options for Deep3DFaceRecon_pytorch
-"""
-
-import argparse
-import os
-from util import util
-import numpy as np
-import torch
-import face3d.models as models
-import face3d.data as data
-
-
-class BaseOptions():
-    """This class defines options used during both training and test time.
-
-    It also implements several helper functions such as parsing, printing, and saving the options.
-    It also gathers additional options defined in <modify_commandline_options> functions in both dataset class and model class.
-    """
-
-    def __init__(self, cmd_line=None):
-        """Reset the class; indicates the class hasn't been initailized"""
-        self.initialized = False
-        self.cmd_line = None
-        if cmd_line is not None:
-            self.cmd_line = cmd_line.split()
-
-    def initialize(self, parser):
-        """Define the common options that are used in both training and test."""
-        # basic parameters
-        parser.add_argument('--name', type=str, default='face_recon', help='name of the experiment. It decides where to store samples and models')
-        parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0  0,1,2, 0,2. use -1 for CPU')
-        parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here')
-        parser.add_argument('--vis_batch_nums', type=float, default=1, help='batch nums of images for visulization')
-        parser.add_argument('--eval_batch_nums', type=float, default=float('inf'), help='batch nums of images for evaluation')
-        parser.add_argument('--use_ddp', type=util.str2bool, nargs='?', const=True, default=True, help='whether use distributed data parallel')
-        parser.add_argument('--ddp_port', type=str, default='12355', help='ddp port')
-        parser.add_argument('--display_per_batch', type=util.str2bool, nargs='?', const=True, default=True, help='whether use batch to show losses')
-        parser.add_argument('--add_image', type=util.str2bool, nargs='?', const=True, default=True, help='whether add image to tensorboard')
-        parser.add_argument('--world_size', type=int, default=1, help='batch nums of images for evaluation')
-
-        # model parameters
-        parser.add_argument('--model', type=str, default='facerecon', help='chooses which model to use.')
-
-        # additional parameters
-        parser.add_argument('--epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
-        parser.add_argument('--verbose', action='store_true', help='if specified, print more debugging information')
-        parser.add_argument('--suffix', default='', type=str, help='customized suffix: opt.name = opt.name + suffix: e.g., {model}_{netG}_size{load_size}')
-
-        self.initialized = True
-        return parser
-
-    def gather_options(self):
-        """Initialize our parser with basic options(only once).
-        Add additional model-specific and dataset-specific options.
-        These options are defined in the <modify_commandline_options> function
-        in model and dataset classes.
-        """
-        if not self.initialized:  # check if it has been initialized
-            parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-            parser = self.initialize(parser)
-
-        # get the basic options
-        if self.cmd_line is None:
-            opt, _ = parser.parse_known_args()
-        else:
-            opt, _ = parser.parse_known_args(self.cmd_line)
-
-        # set cuda visible devices
-        os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_ids
-
-        # modify model-related parser options
-        model_name = opt.model
-        model_option_setter = models.get_option_setter(model_name)
-        parser = model_option_setter(parser, self.isTrain)
-        if self.cmd_line is None:
-            opt, _ = parser.parse_known_args()  # parse again with new defaults
-        else:
-            opt, _ = parser.parse_known_args(self.cmd_line)  # parse again with new defaults
-
-        # modify dataset-related parser options
-        if opt.dataset_mode:
-            dataset_name = opt.dataset_mode
-            dataset_option_setter = data.get_option_setter(dataset_name)
-            parser = dataset_option_setter(parser, self.isTrain)
-
-        # save and return the parser
-        self.parser = parser
-        if self.cmd_line is None:
-            return parser.parse_args()
-        else:
-            return parser.parse_args(self.cmd_line)
-
-    def print_options(self, opt):
-        """Print and save options
-
-        It will print both current options and default values(if different).
-        It will save options into a text file / [checkpoints_dir] / opt.txt
-        """
-        message = ''
-        message += '----------------- Options ---------------\n'
-        for k, v in sorted(vars(opt).items()):
-            comment = ''
-            default = self.parser.get_default(k)
-            if v != default:
-                comment = '\t[default: %s]' % str(default)
-            message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment)
-        message += '----------------- End -------------------'
-        print(message)
-
-        # save to the disk
-        expr_dir = os.path.join(opt.checkpoints_dir, opt.name)
-        util.mkdirs(expr_dir)
-        file_name = os.path.join(expr_dir, '{}_opt.txt'.format(opt.phase))
-        try:
-            with open(file_name, 'wt') as opt_file:
-                opt_file.write(message)
-                opt_file.write('\n')
-        except PermissionError as error:
-            print("permission error {}".format(error))
-            pass
-
-    def parse(self):
-        """Parse our options, create checkpoints directory suffix, and set up gpu device."""
-        opt = self.gather_options()
-        opt.isTrain = self.isTrain   # train or test
-
-        # process opt.suffix
-        if opt.suffix:
-            suffix = ('_' + opt.suffix.format(**vars(opt))) if opt.suffix != '' else ''
-            opt.name = opt.name + suffix
-
-
-        # set gpu ids
-        str_ids = opt.gpu_ids.split(',')
-        gpu_ids = []
-        for str_id in str_ids:
-            id = int(str_id)
-            if id >= 0:
-                gpu_ids.append(id)
-        opt.world_size = len(gpu_ids)
-        # if len(opt.gpu_ids) > 0:
-        #     torch.cuda.set_device(gpu_ids[0])
-        if opt.world_size == 1:
-            opt.use_ddp = False
-
-        if opt.phase != 'test':
-            # set continue_train automatically
-            if opt.pretrained_name is None:
-                model_dir = os.path.join(opt.checkpoints_dir, opt.name)
-            else:
-                model_dir = os.path.join(opt.checkpoints_dir, opt.pretrained_name)
-            if os.path.isdir(model_dir):
-                model_pths = [i for i in os.listdir(model_dir) if i.endswith('pth')]
-                if os.path.isdir(model_dir) and len(model_pths) != 0:
-                    opt.continue_train= True
-        
-            # update the latest epoch count
-            if opt.continue_train:
-                if opt.epoch == 'latest':
-                    epoch_counts = [int(i.split('.')[0].split('_')[-1]) for i in model_pths if 'latest' not in i]
-                    if len(epoch_counts) != 0:
-                        opt.epoch_count = max(epoch_counts) + 1
-                else:
-                    opt.epoch_count = int(opt.epoch) + 1
-                    
-
-        self.print_options(opt)
-        self.opt = opt
-        return self.opt
diff --git a/src/src/face3d/options/inference_options.py b/src/src/face3d/options/inference_options.py
deleted file mode 100644
index c453965959ab4cfb31acbc424f994db68c3d4df5..0000000000000000000000000000000000000000
--- a/src/src/face3d/options/inference_options.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from face3d.options.base_options import BaseOptions
-
-
-class InferenceOptions(BaseOptions):
-    """This class includes test options.
-
-    It also includes shared options defined in BaseOptions.
-    """
-
-    def initialize(self, parser):
-        parser = BaseOptions.initialize(self, parser)  # define shared options
-        parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
-        parser.add_argument('--dataset_mode', type=str, default=None, help='chooses how datasets are loaded. [None | flist]')
-
-        parser.add_argument('--input_dir', type=str, help='the folder of the input files')
-        parser.add_argument('--keypoint_dir', type=str, help='the folder of the keypoint files')
-        parser.add_argument('--output_dir', type=str, default='mp4', help='the output dir to save the extracted coefficients')
-        parser.add_argument('--save_split_files', action='store_true', help='save split files or not')
-        parser.add_argument('--inference_batch_size', type=int, default=8)
-        
-        # Dropout and Batchnorm has different behavior during training and test.
-        self.isTrain = False
-        return parser
diff --git a/src/src/face3d/options/test_options.py b/src/src/face3d/options/test_options.py
deleted file mode 100644
index 4ff3ad142779850d1d5a1640bc00f70d34d4a862..0000000000000000000000000000000000000000
--- a/src/src/face3d/options/test_options.py
+++ /dev/null
@@ -1,21 +0,0 @@
-"""This script contains the test options for Deep3DFaceRecon_pytorch
-"""
-
-from .base_options import BaseOptions
-
-
-class TestOptions(BaseOptions):
-    """This class includes test options.
-
-    It also includes shared options defined in BaseOptions.
-    """
-
-    def initialize(self, parser):
-        parser = BaseOptions.initialize(self, parser)  # define shared options
-        parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
-        parser.add_argument('--dataset_mode', type=str, default=None, help='chooses how datasets are loaded. [None | flist]')
-        parser.add_argument('--img_folder', type=str, default='examples', help='folder for test images.')
-
-        # Dropout and Batchnorm has different behavior during training and test.
-        self.isTrain = False
-        return parser
diff --git a/src/src/face3d/options/train_options.py b/src/src/face3d/options/train_options.py
deleted file mode 100644
index 1337bfdd5f372b5c686a91b394a2aadbe5741f44..0000000000000000000000000000000000000000
--- a/src/src/face3d/options/train_options.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""This script contains the training options for Deep3DFaceRecon_pytorch
-"""
-
-from .base_options import BaseOptions
-from util import util
-
-class TrainOptions(BaseOptions):
-    """This class includes training options.
-
-    It also includes shared options defined in BaseOptions.
-    """
-
-    def initialize(self, parser):
-        parser = BaseOptions.initialize(self, parser)
-        # dataset parameters
-        # for train
-        parser.add_argument('--data_root', type=str, default='./', help='dataset root')
-        parser.add_argument('--flist', type=str, default='datalist/train/masks.txt', help='list of mask names of training set')
-        parser.add_argument('--batch_size', type=int, default=32)
-        parser.add_argument('--dataset_mode', type=str, default='flist', help='chooses how datasets are loaded. [None | flist]')
-        parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly')
-        parser.add_argument('--num_threads', default=4, type=int, help='# threads for loading data')
-        parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.')
-        parser.add_argument('--preprocess', type=str, default='shift_scale_rot_flip', help='scaling and cropping of images at load time [shift_scale_rot_flip | shift_scale | shift | shift_rot_flip ]')
-        parser.add_argument('--use_aug', type=util.str2bool, nargs='?', const=True, default=True, help='whether use data augmentation')
-
-        # for val
-        parser.add_argument('--flist_val', type=str, default='datalist/val/masks.txt', help='list of mask names of val set')
-        parser.add_argument('--batch_size_val', type=int, default=32)
-
-
-        # visualization parameters
-        parser.add_argument('--display_freq', type=int, default=1000, help='frequency of showing training results on screen')
-        parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console')
-        
-        # network saving and loading parameters
-        parser.add_argument('--save_latest_freq', type=int, default=5000, help='frequency of saving the latest results')
-        parser.add_argument('--save_epoch_freq', type=int, default=1, help='frequency of saving checkpoints at the end of epochs')
-        parser.add_argument('--evaluation_freq', type=int, default=5000, help='evaluation freq')
-        parser.add_argument('--save_by_iter', action='store_true', help='whether saves model by iteration')
-        parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model')
-        parser.add_argument('--epoch_count', type=int, default=1, help='the starting epoch count, we save the model by <epoch_count>, <epoch_count>+<save_latest_freq>, ...')
-        parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc')
-        parser.add_argument('--pretrained_name', type=str, default=None, help='resume training from another checkpoint')
-
-        # training parameters
-        parser.add_argument('--n_epochs', type=int, default=20, help='number of epochs with the initial learning rate')
-        parser.add_argument('--lr', type=float, default=0.0001, help='initial learning rate for adam')
-        parser.add_argument('--lr_policy', type=str, default='step', help='learning rate policy. [linear | step | plateau | cosine]')
-        parser.add_argument('--lr_decay_epochs', type=int, default=10, help='multiply by a gamma every lr_decay_epochs epoches')
-
-        self.isTrain = True
-        return parser
diff --git a/src/src/face3d/util/BBRegressorParam_r.mat b/src/src/face3d/util/BBRegressorParam_r.mat
deleted file mode 100644
index 1430a94ed2ab570a09f9d980d3585e8aaa933084..0000000000000000000000000000000000000000
Binary files a/src/src/face3d/util/BBRegressorParam_r.mat and /dev/null differ
diff --git a/src/src/face3d/util/__init__.py b/src/src/face3d/util/__init__.py
deleted file mode 100644
index 04eecb58b62f8c9d11d17606c6241d278a48b9b9..0000000000000000000000000000000000000000
--- a/src/src/face3d/util/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""This package includes a miscellaneous collection of useful helper functions."""
-from src.face3d.util import *
-
diff --git a/src/src/face3d/util/detect_lm68.py b/src/src/face3d/util/detect_lm68.py
deleted file mode 100644
index b7e40997289e17405e1fb6c408d21adce7b626ce..0000000000000000000000000000000000000000
--- a/src/src/face3d/util/detect_lm68.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import os
-import cv2
-import numpy as np
-from scipy.io import loadmat
-import tensorflow as tf
-from util.preprocess import align_for_lm
-from shutil import move
-
-mean_face = np.loadtxt('util/test_mean_face.txt')
-mean_face = mean_face.reshape([68, 2])
-
-def save_label(labels, save_path):
-    np.savetxt(save_path, labels)
-
-def draw_landmarks(img, landmark, save_name):
-    landmark = landmark
-    lm_img = np.zeros([img.shape[0], img.shape[1], 3])
-    lm_img[:] = img.astype(np.float32)
-    landmark = np.round(landmark).astype(np.int32)
-
-    for i in range(len(landmark)):
-        for j in range(-1, 1):
-            for k in range(-1, 1):
-                if img.shape[0] - 1 - landmark[i, 1]+j > 0 and \
-                        img.shape[0] - 1 - landmark[i, 1]+j < img.shape[0] and \
-                        landmark[i, 0]+k > 0 and \
-                        landmark[i, 0]+k < img.shape[1]:
-                    lm_img[img.shape[0] - 1 - landmark[i, 1]+j, landmark[i, 0]+k,
-                           :] = np.array([0, 0, 255])
-    lm_img = lm_img.astype(np.uint8)
-
-    cv2.imwrite(save_name, lm_img)
-
-
-def load_data(img_name, txt_name):
-    return cv2.imread(img_name), np.loadtxt(txt_name)
-
-# create tensorflow graph for landmark detector
-def load_lm_graph(graph_filename):
-    with tf.gfile.GFile(graph_filename, 'rb') as f:
-        graph_def = tf.GraphDef()
-        graph_def.ParseFromString(f.read())
-
-    with tf.Graph().as_default() as graph:
-        tf.import_graph_def(graph_def, name='net')
-        img_224 = graph.get_tensor_by_name('net/input_imgs:0')
-        output_lm = graph.get_tensor_by_name('net/lm:0')
-        lm_sess = tf.Session(graph=graph)
-
-    return lm_sess,img_224,output_lm
-
-# landmark detection
-def detect_68p(img_path,sess,input_op,output_op):
-    print('detecting landmarks......')
-    names = [i for i in sorted(os.listdir(
-        img_path)) if 'jpg' in i or 'png' in i or 'jpeg' in i or 'PNG' in i]
-    vis_path = os.path.join(img_path, 'vis')
-    remove_path = os.path.join(img_path, 'remove')
-    save_path = os.path.join(img_path, 'landmarks')
-    if not os.path.isdir(vis_path):
-        os.makedirs(vis_path)
-    if not os.path.isdir(remove_path):
-        os.makedirs(remove_path)
-    if not os.path.isdir(save_path):
-        os.makedirs(save_path)
-
-    for i in range(0, len(names)):
-        name = names[i]
-        print('%05d' % (i), ' ', name)
-        full_image_name = os.path.join(img_path, name)
-        txt_name = '.'.join(name.split('.')[:-1]) + '.txt'
-        full_txt_name = os.path.join(img_path, 'detections', txt_name) # 5 facial landmark path for each image
-
-        # if an image does not have detected 5 facial landmarks, remove it from the training list
-        if not os.path.isfile(full_txt_name):
-            move(full_image_name, os.path.join(remove_path, name))
-            continue 
-
-        # load data
-        img, five_points = load_data(full_image_name, full_txt_name)
-        input_img, scale, bbox = align_for_lm(img, five_points) # align for 68 landmark detection 
-
-        # if the alignment fails, remove corresponding image from the training list
-        if scale == 0:
-            move(full_txt_name, os.path.join(
-                remove_path, txt_name))
-            move(full_image_name, os.path.join(remove_path, name))
-            continue
-
-        # detect landmarks
-        input_img = np.reshape(
-            input_img, [1, 224, 224, 3]).astype(np.float32)
-        landmark = sess.run(
-            output_op, feed_dict={input_op: input_img})
-
-        # transform back to original image coordinate
-        landmark = landmark.reshape([68, 2]) + mean_face
-        landmark[:, 1] = 223 - landmark[:, 1]
-        landmark = landmark / scale
-        landmark[:, 0] = landmark[:, 0] + bbox[0]
-        landmark[:, 1] = landmark[:, 1] + bbox[1]
-        landmark[:, 1] = img.shape[0] - 1 - landmark[:, 1]
-
-        if i % 100 == 0:
-            draw_landmarks(img, landmark, os.path.join(vis_path, name))
-        save_label(landmark, os.path.join(save_path, txt_name))
diff --git a/src/src/face3d/util/generate_list.py b/src/src/face3d/util/generate_list.py
deleted file mode 100644
index 943d906781063c3584a7e5b5c784f8aac0694985..0000000000000000000000000000000000000000
--- a/src/src/face3d/util/generate_list.py
+++ /dev/null
@@ -1,34 +0,0 @@
-"""This script is to generate training list files for Deep3DFaceRecon_pytorch
-"""
-
-import os
-
-# save path to training data
-def write_list(lms_list, imgs_list, msks_list, mode='train',save_folder='datalist', save_name=''):
-    save_path = os.path.join(save_folder, mode)
-    if not os.path.isdir(save_path):
-        os.makedirs(save_path)
-    with open(os.path.join(save_path, save_name + 'landmarks.txt'), 'w') as fd:
-        fd.writelines([i + '\n' for i in lms_list])
-
-    with open(os.path.join(save_path, save_name + 'images.txt'), 'w') as fd:
-        fd.writelines([i + '\n' for i in imgs_list])
-
-    with open(os.path.join(save_path, save_name + 'masks.txt'), 'w') as fd:
-        fd.writelines([i + '\n' for i in msks_list])   
-
-# check if the path is valid
-def check_list(rlms_list, rimgs_list, rmsks_list):
-    lms_list, imgs_list, msks_list = [], [], []
-    for i in range(len(rlms_list)):
-        flag = 'false'
-        lm_path = rlms_list[i]
-        im_path = rimgs_list[i]
-        msk_path = rmsks_list[i]
-        if os.path.isfile(lm_path) and os.path.isfile(im_path) and os.path.isfile(msk_path):
-            flag = 'true'
-            lms_list.append(rlms_list[i])
-            imgs_list.append(rimgs_list[i])
-            msks_list.append(rmsks_list[i])
-        print(i, rlms_list[i], flag)
-    return lms_list, imgs_list, msks_list
diff --git a/src/src/face3d/util/html.py b/src/src/face3d/util/html.py
deleted file mode 100644
index cc3262a1eafda34842e4dbad47bb6ba72f0c5a68..0000000000000000000000000000000000000000
--- a/src/src/face3d/util/html.py
+++ /dev/null
@@ -1,86 +0,0 @@
-import dominate
-from dominate.tags import meta, h3, table, tr, td, p, a, img, br
-import os
-
-
-class HTML:
-    """This HTML class allows us to save images and write texts into a single HTML file.
-
-     It consists of functions such as <add_header> (add a text header to the HTML file),
-     <add_images> (add a row of images to the HTML file), and <save> (save the HTML to the disk).
-     It is based on Python library 'dominate', a Python library for creating and manipulating HTML documents using a DOM API.
-    """
-
-    def __init__(self, web_dir, title, refresh=0):
-        """Initialize the HTML classes
-
-        Parameters:
-            web_dir (str) -- a directory that stores the webpage. HTML file will be created at <web_dir>/index.html; images will be saved at <web_dir/images/
-            title (str)   -- the webpage name
-            refresh (int) -- how often the website refresh itself; if 0; no refreshing
-        """
-        self.title = title
-        self.web_dir = web_dir
-        self.img_dir = os.path.join(self.web_dir, 'images')
-        if not os.path.exists(self.web_dir):
-            os.makedirs(self.web_dir)
-        if not os.path.exists(self.img_dir):
-            os.makedirs(self.img_dir)
-
-        self.doc = dominate.document(title=title)
-        if refresh > 0:
-            with self.doc.head:
-                meta(http_equiv="refresh", content=str(refresh))
-
-    def get_image_dir(self):
-        """Return the directory that stores images"""
-        return self.img_dir
-
-    def add_header(self, text):
-        """Insert a header to the HTML file
-
-        Parameters:
-            text (str) -- the header text
-        """
-        with self.doc:
-            h3(text)
-
-    def add_images(self, ims, txts, links, width=400):
-        """add images to the HTML file
-
-        Parameters:
-            ims (str list)   -- a list of image paths
-            txts (str list)  -- a list of image names shown on the website
-            links (str list) --  a list of hyperref links; when you click an image, it will redirect you to a new page
-        """
-        self.t = table(border=1, style="table-layout: fixed;")  # Insert a table
-        self.doc.add(self.t)
-        with self.t:
-            with tr():
-                for im, txt, link in zip(ims, txts, links):
-                    with td(style="word-wrap: break-word;", halign="center", valign="top"):
-                        with p():
-                            with a(href=os.path.join('images', link)):
-                                img(style="width:%dpx" % width, src=os.path.join('images', im))
-                            br()
-                            p(txt)
-
-    def save(self):
-        """save the current content to the HMTL file"""
-        html_file = '%s/index.html' % self.web_dir
-        f = open(html_file, 'wt')
-        f.write(self.doc.render())
-        f.close()
-
-
-if __name__ == '__main__':  # we show an example usage here.
-    html = HTML('web/', 'test_html')
-    html.add_header('hello world')
-
-    ims, txts, links = [], [], []
-    for n in range(4):
-        ims.append('image_%d.png' % n)
-        txts.append('text_%d' % n)
-        links.append('image_%d.png' % n)
-    html.add_images(ims, txts, links)
-    html.save()
diff --git a/src/src/face3d/util/load_mats.py b/src/src/face3d/util/load_mats.py
deleted file mode 100644
index f9a6fcc71de1d7dad8b0f81c67dc1c213764ff0b..0000000000000000000000000000000000000000
--- a/src/src/face3d/util/load_mats.py
+++ /dev/null
@@ -1,120 +0,0 @@
-"""This script is to load 3D face model for Deep3DFaceRecon_pytorch
-"""
-
-import numpy as np
-from PIL import Image
-from scipy.io import loadmat, savemat
-from array import array
-import os.path as osp
-
-# load expression basis
-def LoadExpBasis(bfm_folder='BFM'):
-    n_vertex = 53215
-    Expbin = open(osp.join(bfm_folder, 'Exp_Pca.bin'), 'rb')
-    exp_dim = array('i')
-    exp_dim.fromfile(Expbin, 1)
-    expMU = array('f')
-    expPC = array('f')
-    expMU.fromfile(Expbin, 3*n_vertex)
-    expPC.fromfile(Expbin, 3*exp_dim[0]*n_vertex)
-    Expbin.close()
-
-    expPC = np.array(expPC)
-    expPC = np.reshape(expPC, [exp_dim[0], -1])
-    expPC = np.transpose(expPC)
-
-    expEV = np.loadtxt(osp.join(bfm_folder, 'std_exp.txt'))
-
-    return expPC, expEV
-
-
-# transfer original BFM09 to our face model
-def transferBFM09(bfm_folder='BFM'):
-    print('Transfer BFM09 to BFM_model_front......')
-    original_BFM = loadmat(osp.join(bfm_folder, '01_MorphableModel.mat'))
-    shapePC = original_BFM['shapePC']  # shape basis
-    shapeEV = original_BFM['shapeEV']  # corresponding eigen value
-    shapeMU = original_BFM['shapeMU']  # mean face
-    texPC = original_BFM['texPC']  # texture basis
-    texEV = original_BFM['texEV']  # eigen value
-    texMU = original_BFM['texMU']  # mean texture
-
-    expPC, expEV = LoadExpBasis(bfm_folder)
-
-    # transfer BFM09 to our face model
-
-    idBase = shapePC*np.reshape(shapeEV, [-1, 199])
-    idBase = idBase/1e5  # unify the scale to decimeter
-    idBase = idBase[:, :80]  # use only first 80 basis
-
-    exBase = expPC*np.reshape(expEV, [-1, 79])
-    exBase = exBase/1e5  # unify the scale to decimeter
-    exBase = exBase[:, :64]  # use only first 64 basis
-
-    texBase = texPC*np.reshape(texEV, [-1, 199])
-    texBase = texBase[:, :80]  # use only first 80 basis
-
-    # our face model is cropped along face landmarks and contains only 35709 vertex.
-    # original BFM09 contains 53490 vertex, and expression basis provided by Guo et al. contains 53215 vertex.
-    # thus we select corresponding vertex to get our face model.
-
-    index_exp = loadmat(osp.join(bfm_folder, 'BFM_front_idx.mat'))
-    index_exp = index_exp['idx'].astype(np.int32) - 1  # starts from 0 (to 53215)
-
-    index_shape = loadmat(osp.join(bfm_folder, 'BFM_exp_idx.mat'))
-    index_shape = index_shape['trimIndex'].astype(
-        np.int32) - 1  # starts from 0 (to 53490)
-    index_shape = index_shape[index_exp]
-
-    idBase = np.reshape(idBase, [-1, 3, 80])
-    idBase = idBase[index_shape, :, :]
-    idBase = np.reshape(idBase, [-1, 80])
-
-    texBase = np.reshape(texBase, [-1, 3, 80])
-    texBase = texBase[index_shape, :, :]
-    texBase = np.reshape(texBase, [-1, 80])
-
-    exBase = np.reshape(exBase, [-1, 3, 64])
-    exBase = exBase[index_exp, :, :]
-    exBase = np.reshape(exBase, [-1, 64])
-
-    meanshape = np.reshape(shapeMU, [-1, 3])/1e5
-    meanshape = meanshape[index_shape, :]
-    meanshape = np.reshape(meanshape, [1, -1])
-
-    meantex = np.reshape(texMU, [-1, 3])
-    meantex = meantex[index_shape, :]
-    meantex = np.reshape(meantex, [1, -1])
-
-    # other info contains triangles, region used for computing photometric loss,
-    # region used for skin texture regularization, and 68 landmarks index etc.
-    other_info = loadmat(osp.join(bfm_folder, 'facemodel_info.mat'))
-    frontmask2_idx = other_info['frontmask2_idx']
-    skinmask = other_info['skinmask']
-    keypoints = other_info['keypoints']
-    point_buf = other_info['point_buf']
-    tri = other_info['tri']
-    tri_mask2 = other_info['tri_mask2']
-
-    # save our face model
-    savemat(osp.join(bfm_folder, 'BFM_model_front.mat'), {'meanshape': meanshape, 'meantex': meantex, 'idBase': idBase, 'exBase': exBase, 'texBase': texBase,
-            'tri': tri, 'point_buf': point_buf, 'tri_mask2': tri_mask2, 'keypoints': keypoints, 'frontmask2_idx': frontmask2_idx, 'skinmask': skinmask})
-
-
-# load landmarks for standard face, which is used for image preprocessing
-def load_lm3d(bfm_folder):
-
-    Lm3D = loadmat(osp.join(bfm_folder, 'similarity_Lm3D_all.mat'))
-    Lm3D = Lm3D['lm']
-
-    # calculate 5 facial landmarks using 68 landmarks
-    lm_idx = np.array([31, 37, 40, 43, 46, 49, 55]) - 1
-    Lm3D = np.stack([Lm3D[lm_idx[0], :], np.mean(Lm3D[lm_idx[[1, 2]], :], 0), np.mean(
-        Lm3D[lm_idx[[3, 4]], :], 0), Lm3D[lm_idx[5], :], Lm3D[lm_idx[6], :]], axis=0)
-    Lm3D = Lm3D[[1, 2, 0, 3, 4], :]
-
-    return Lm3D
-
-
-if __name__ == '__main__':
-    transferBFM09()
\ No newline at end of file
diff --git a/src/src/face3d/util/nvdiffrast.py b/src/src/face3d/util/nvdiffrast.py
deleted file mode 100644
index f3245859c650afbfe841a66b74cddefaf28820d9..0000000000000000000000000000000000000000
--- a/src/src/face3d/util/nvdiffrast.py
+++ /dev/null
@@ -1,126 +0,0 @@
-"""This script is the differentiable renderer for Deep3DFaceRecon_pytorch
-    Attention, antialiasing step is missing in current version.
-"""
-import pytorch3d.ops
-import torch
-import torch.nn.functional as F
-import kornia
-from kornia.geometry.camera import pixel2cam
-import numpy as np
-from typing import List
-from scipy.io import loadmat
-from torch import nn
-
-from pytorch3d.structures import Meshes
-from pytorch3d.renderer import (
-    look_at_view_transform,
-    FoVPerspectiveCameras,
-    DirectionalLights,
-    RasterizationSettings,
-    MeshRenderer,
-    MeshRasterizer,
-    SoftPhongShader,
-    TexturesUV,
-)
-
-# def ndc_projection(x=0.1, n=1.0, f=50.0):
-#     return np.array([[n/x,    0,            0,              0],
-#                      [  0, n/-x,            0,              0],
-#                      [  0,    0, -(f+n)/(f-n), -(2*f*n)/(f-n)],
-#                      [  0,    0,           -1,              0]]).astype(np.float32)
-
-class MeshRenderer(nn.Module):
-    def __init__(self,
-                rasterize_fov,
-                znear=0.1,
-                zfar=10, 
-                rasterize_size=224):
-        super(MeshRenderer, self).__init__()
-
-        # x = np.tan(np.deg2rad(rasterize_fov * 0.5)) * znear
-        # self.ndc_proj = torch.tensor(ndc_projection(x=x, n=znear, f=zfar)).matmul(
-        #         torch.diag(torch.tensor([1., -1, -1, 1])))
-        self.rasterize_size = rasterize_size
-        self.fov = rasterize_fov
-        self.znear = znear
-        self.zfar = zfar
-
-        self.rasterizer = None
-    
-    def forward(self, vertex, tri, feat=None):
-        """
-        Return:
-            mask               -- torch.tensor, size (B, 1, H, W)
-            depth              -- torch.tensor, size (B, 1, H, W)
-            features(optional) -- torch.tensor, size (B, C, H, W) if feat is not None
-
-        Parameters:
-            vertex          -- torch.tensor, size (B, N, 3)
-            tri             -- torch.tensor, size (B, M, 3) or (M, 3), triangles
-            feat(optional)  -- torch.tensor, size (B, N ,C), features
-        """
-        device = vertex.device
-        rsize = int(self.rasterize_size)
-        # ndc_proj = self.ndc_proj.to(device)
-        # trans to homogeneous coordinates of 3d vertices, the direction of y is the same as v
-        if vertex.shape[-1] == 3:
-            vertex = torch.cat([vertex, torch.ones([*vertex.shape[:2], 1]).to(device)], dim=-1)
-            vertex[..., 0] = -vertex[..., 0]
-
-
-        # vertex_ndc = vertex @ ndc_proj.t()
-        if self.rasterizer is None:
-            self.rasterizer = MeshRasterizer()
-            print("create rasterizer on device cuda:%d"%device.index)
-        
-        # ranges = None
-        # if isinstance(tri, List) or len(tri.shape) == 3:
-        #     vum = vertex_ndc.shape[1]
-        #     fnum = torch.tensor([f.shape[0] for f in tri]).unsqueeze(1).to(device)
-        #     fstartidx = torch.cumsum(fnum, dim=0) - fnum
-        #     ranges = torch.cat([fstartidx, fnum], axis=1).type(torch.int32).cpu()
-        #     for i in range(tri.shape[0]):
-        #         tri[i] = tri[i] + i*vum
-        #     vertex_ndc = torch.cat(vertex_ndc, dim=0)
-        #     tri = torch.cat(tri, dim=0)
-
-        # for range_mode vetex: [B*N, 4], tri: [B*M, 3], for instance_mode vetex: [B, N, 4], tri: [M, 3]
-        tri = tri.type(torch.int32).contiguous()
-
-        # rasterize
-        cameras = FoVPerspectiveCameras(
-            device=device,
-            fov=self.fov,
-            znear=self.znear,
-            zfar=self.zfar,
-        )
-
-        raster_settings = RasterizationSettings(
-            image_size=rsize
-        )
-
-        # print(vertex.shape, tri.shape)
-        mesh = Meshes(vertex.contiguous()[...,:3], tri.unsqueeze(0).repeat((vertex.shape[0],1,1)))
-
-        fragments = self.rasterizer(mesh, cameras = cameras, raster_settings = raster_settings)
-        rast_out = fragments.pix_to_face.squeeze(-1)
-        depth = fragments.zbuf
-
-        # render depth
-        depth = depth.permute(0, 3, 1, 2)
-        mask = (rast_out > 0).float().unsqueeze(1)
-        depth = mask * depth
-        
-
-        image = None
-        if feat is not None:
-            attributes = feat.reshape(-1,3)[mesh.faces_packed()]
-            image = pytorch3d.ops.interpolate_face_attributes(fragments.pix_to_face,
-                                                      fragments.bary_coords,
-                                                      attributes)
-            # print(image.shape)
-            image = image.squeeze(-2).permute(0, 3, 1, 2)
-            image = mask * image
-        
-        return mask, depth, image
-
diff --git a/src/src/face3d/util/preprocess.py b/src/src/face3d/util/preprocess.py
deleted file mode 100644
index b77a3a4058c208e5ba8cb1cfbb563954a5f7a3e2..0000000000000000000000000000000000000000
--- a/src/src/face3d/util/preprocess.py
+++ /dev/null
@@ -1,103 +0,0 @@
-"""This script contains the image preprocessing code for Deep3DFaceRecon_pytorch
-"""
-
-import numpy as np
-from scipy.io import loadmat
-from PIL import Image
-import cv2
-import os
-from skimage import transform as trans
-import torch
-import warnings
-warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 
-warnings.filterwarnings("ignore", category=FutureWarning) 
-
-
-# calculating least square problem for image alignment
-def POS(xp, x):
-    npts = xp.shape[1]
-
-    A = np.zeros([2*npts, 8])
-
-    A[0:2*npts-1:2, 0:3] = x.transpose()
-    A[0:2*npts-1:2, 3] = 1
-
-    A[1:2*npts:2, 4:7] = x.transpose()
-    A[1:2*npts:2, 7] = 1
-
-    b = np.reshape(xp.transpose(), [2*npts, 1])
-
-    k, _, _, _ = np.linalg.lstsq(A, b)
-
-    R1 = k[0:3]
-    R2 = k[4:7]
-    sTx = k[3]
-    sTy = k[7]
-    s = (np.linalg.norm(R1) + np.linalg.norm(R2))/2
-    t = np.stack([sTx, sTy], axis=0)
-
-    return t, s
-    
-# resize and crop images for face reconstruction
-def resize_n_crop_img(img, lm, t, s, target_size=224., mask=None):
-    w0, h0 = img.size
-    w = (w0*s).astype(np.int32)
-    h = (h0*s).astype(np.int32)
-    left = (w/2 - target_size/2 + float((t[0] - w0/2)*s)).astype(np.int32)
-    right = left + target_size
-    up = (h/2 - target_size/2 + float((h0/2 - t[1])*s)).astype(np.int32)
-    below = up + target_size
-
-    img = img.resize((w, h), resample=Image.BICUBIC)
-    img = img.crop((left, up, right, below))
-
-    if mask is not None:
-        mask = mask.resize((w, h), resample=Image.BICUBIC)
-        mask = mask.crop((left, up, right, below))
-
-    lm = np.stack([lm[:, 0] - t[0] + w0/2, lm[:, 1] -
-                  t[1] + h0/2], axis=1)*s
-    lm = lm - np.reshape(
-            np.array([(w/2 - target_size/2), (h/2-target_size/2)]), [1, 2])
-
-    return img, lm, mask
-
-# utils for face reconstruction
-def extract_5p(lm):
-    lm_idx = np.array([31, 37, 40, 43, 46, 49, 55]) - 1
-    lm5p = np.stack([lm[lm_idx[0], :], np.mean(lm[lm_idx[[1, 2]], :], 0), np.mean(
-        lm[lm_idx[[3, 4]], :], 0), lm[lm_idx[5], :], lm[lm_idx[6], :]], axis=0)
-    lm5p = lm5p[[1, 2, 0, 3, 4], :]
-    return lm5p
-
-# utils for face reconstruction
-def align_img(img, lm, lm3D, mask=None, target_size=224., rescale_factor=102.):
-    """
-    Return:
-        transparams        --numpy.array  (raw_W, raw_H, scale, tx, ty)
-        img_new            --PIL.Image  (target_size, target_size, 3)
-        lm_new             --numpy.array  (68, 2), y direction is opposite to v direction
-        mask_new           --PIL.Image  (target_size, target_size)
-    
-    Parameters:
-        img                --PIL.Image  (raw_H, raw_W, 3)
-        lm                 --numpy.array  (68, 2), y direction is opposite to v direction
-        lm3D               --numpy.array  (5, 3)
-        mask               --PIL.Image  (raw_H, raw_W, 3)
-    """
-
-    w0, h0 = img.size
-    if lm.shape[0] != 5:
-        lm5p = extract_5p(lm)
-    else:
-        lm5p = lm
-
-    # calculate translation and scale factors using 5 facial landmarks and standard landmarks of a 3D face
-    t, s = POS(lm5p.transpose(), lm3D.transpose())
-    s = rescale_factor/s
-
-    # processing the image
-    img_new, lm_new, mask_new = resize_n_crop_img(img, lm, t, s, target_size=target_size, mask=mask)
-    trans_params = np.array([w0, h0, s, t[0], t[1]])
-
-    return trans_params, img_new, lm_new, mask_new
diff --git a/src/src/face3d/util/skin_mask.py b/src/src/face3d/util/skin_mask.py
deleted file mode 100644
index a8a74e4c3b40d13b0258b83a12f56321a85bb179..0000000000000000000000000000000000000000
--- a/src/src/face3d/util/skin_mask.py
+++ /dev/null
@@ -1,125 +0,0 @@
-"""This script is to generate skin attention mask for Deep3DFaceRecon_pytorch
-"""
-
-import math
-import numpy as np
-import os
-import cv2
-
-class GMM:
-    def __init__(self, dim, num, w, mu, cov, cov_det, cov_inv):
-        self.dim = dim # feature dimension
-        self.num = num # number of Gaussian components
-        self.w = w # weights of Gaussian components (a list of scalars)
-        self.mu= mu # mean of Gaussian components (a list of 1xdim vectors)
-        self.cov = cov # covariance matrix of Gaussian components (a list of dimxdim matrices)
-        self.cov_det = cov_det # pre-computed determinet of covariance matrices (a list of scalars)
-        self.cov_inv = cov_inv # pre-computed inverse covariance matrices (a list of dimxdim matrices)
-
-        self.factor = [0]*num
-        for i in range(self.num):
-            self.factor[i] = (2*math.pi)**(self.dim/2) * self.cov_det[i]**0.5
-        
-    def likelihood(self, data):
-        assert(data.shape[1] == self.dim)
-        N = data.shape[0]
-        lh = np.zeros(N)
-
-        for i in range(self.num):
-            data_ = data - self.mu[i]
-
-            tmp = np.matmul(data_,self.cov_inv[i]) * data_
-            tmp = np.sum(tmp,axis=1)
-            power = -0.5 * tmp
-
-            p = np.array([math.exp(power[j]) for j in range(N)])
-            p = p/self.factor[i]
-            lh += p*self.w[i]
-        
-        return lh
-
-
-def _rgb2ycbcr(rgb):
-    m = np.array([[65.481, 128.553, 24.966],
-                  [-37.797, -74.203, 112],
-                  [112, -93.786, -18.214]])
-    shape = rgb.shape
-    rgb = rgb.reshape((shape[0] * shape[1], 3))
-    ycbcr = np.dot(rgb, m.transpose() / 255.)
-    ycbcr[:, 0] += 16.
-    ycbcr[:, 1:] += 128.
-    return ycbcr.reshape(shape)
-
-
-def _bgr2ycbcr(bgr):
-    rgb = bgr[..., ::-1]
-    return _rgb2ycbcr(rgb)
-
-
-gmm_skin_w = [0.24063933, 0.16365987, 0.26034665, 0.33535415]
-gmm_skin_mu = [np.array([113.71862, 103.39613, 164.08226]),
-                np.array([150.19858, 105.18467, 155.51428]),
-                np.array([183.92976, 107.62468, 152.71820]),
-                np.array([114.90524, 113.59782, 151.38217])]
-gmm_skin_cov_det = [5692842.5, 5851930.5, 2329131., 1585971.]
-gmm_skin_cov_inv = [np.array([[0.0019472069, 0.0020450759, -0.00060243998],[0.0020450759, 0.017700525, 0.0051420014],[-0.00060243998, 0.0051420014, 0.0081308950]]),
-                    np.array([[0.0027110141, 0.0011036990, 0.0023122299],[0.0011036990, 0.010707724, 0.010742856],[0.0023122299, 0.010742856, 0.017481629]]),
-                    np.array([[0.0048026871, 0.00022935172, 0.0077668377],[0.00022935172, 0.011729696, 0.0081661865],[0.0077668377, 0.0081661865, 0.025374353]]),
-                    np.array([[0.0011989699, 0.0022453172, -0.0010748957],[0.0022453172, 0.047758564, 0.020332102],[-0.0010748957, 0.020332102, 0.024502251]])]
-
-gmm_skin = GMM(3, 4, gmm_skin_w, gmm_skin_mu, [], gmm_skin_cov_det, gmm_skin_cov_inv)
-
-gmm_nonskin_w = [0.12791070, 0.31130761, 0.34245777, 0.21832393]
-gmm_nonskin_mu = [np.array([99.200851, 112.07533, 140.20602]),
-                    np.array([110.91392, 125.52969, 130.19237]),
-                    np.array([129.75864, 129.96107, 126.96808]),
-                    np.array([112.29587, 128.85121, 129.05431])]
-gmm_nonskin_cov_det = [458703648., 6466488., 90611376., 133097.63]
-gmm_nonskin_cov_inv = [np.array([[0.00085371657, 0.00071197288, 0.00023958916],[0.00071197288, 0.0025935620, 0.00076557708],[0.00023958916, 0.00076557708, 0.0015042332]]),
-                    np.array([[0.00024650150, 0.00045542428, 0.00015019422],[0.00045542428, 0.026412144, 0.018419769],[0.00015019422, 0.018419769, 0.037497383]]),
-                    np.array([[0.00037054974, 0.00038146760, 0.00040408765],[0.00038146760, 0.0085505722, 0.0079136286],[0.00040408765, 0.0079136286, 0.010982352]]),
-                    np.array([[0.00013709733, 0.00051228428, 0.00012777430],[0.00051228428, 0.28237113, 0.10528370],[0.00012777430, 0.10528370, 0.23468947]])]
-
-gmm_nonskin = GMM(3, 4, gmm_nonskin_w, gmm_nonskin_mu, [], gmm_nonskin_cov_det, gmm_nonskin_cov_inv)
-
-prior_skin = 0.8
-prior_nonskin = 1 - prior_skin
-
-
-# calculate skin attention mask
-def skinmask(imbgr):
-    im = _bgr2ycbcr(imbgr)
-
-    data = im.reshape((-1,3))
-
-    lh_skin = gmm_skin.likelihood(data)
-    lh_nonskin = gmm_nonskin.likelihood(data)
-
-    tmp1 = prior_skin * lh_skin
-    tmp2 = prior_nonskin * lh_nonskin
-    post_skin = tmp1 / (tmp1+tmp2) # posterior probability
-
-    post_skin = post_skin.reshape((im.shape[0],im.shape[1]))
-
-    post_skin = np.round(post_skin*255)
-    post_skin = post_skin.astype(np.uint8)
-    post_skin = np.tile(np.expand_dims(post_skin,2),[1,1,3]) # reshape to H*W*3
-
-    return post_skin
-
-
-def get_skin_mask(img_path):
-    print('generating skin masks......')
-    names = [i for i in sorted(os.listdir(
-        img_path)) if 'jpg' in i or 'png' in i or 'jpeg' in i or 'PNG' in i]
-    save_path = os.path.join(img_path, 'mask')
-    if not os.path.isdir(save_path):
-        os.makedirs(save_path)
-    
-    for i in range(0, len(names)):
-        name = names[i]
-        print('%05d' % (i), ' ', name)
-        full_image_name = os.path.join(img_path, name)
-        img = cv2.imread(full_image_name).astype(np.float32)
-        skin_img = skinmask(img)
-        cv2.imwrite(os.path.join(save_path, name), skin_img.astype(np.uint8))
diff --git a/src/src/face3d/util/test_mean_face.txt b/src/src/face3d/util/test_mean_face.txt
deleted file mode 100644
index 3a46d4db7699ffed8f898fcee64099631509946d..0000000000000000000000000000000000000000
--- a/src/src/face3d/util/test_mean_face.txt
+++ /dev/null
@@ -1,136 +0,0 @@
--5.228591537475585938e+01
-2.078247070312500000e-01
--5.064269638061523438e+01
--1.315765380859375000e+01
--4.952939224243164062e+01
--2.592591094970703125e+01
--4.793047332763671875e+01
--3.832135772705078125e+01
--4.512159729003906250e+01
--5.059623336791992188e+01
--3.917720794677734375e+01
--6.043736648559570312e+01
--2.929953765869140625e+01
--6.861183166503906250e+01
--1.719801330566406250e+01
--7.572736358642578125e+01
--1.961936950683593750e+00
--7.862001037597656250e+01
-1.467941284179687500e+01
--7.607844543457031250e+01
-2.744073486328125000e+01
--6.915261840820312500e+01
-3.855677795410156250e+01
--5.950350570678710938e+01
-4.478240966796875000e+01
--4.867547225952148438e+01
-4.714337158203125000e+01
--3.800830078125000000e+01
-4.940315246582031250e+01
--2.496297454833984375e+01
-5.117234802246093750e+01
--1.241538238525390625e+01
-5.190507507324218750e+01
-8.244247436523437500e-01
--4.150688934326171875e+01
-2.386329650878906250e+01
--3.570307159423828125e+01
-3.017010498046875000e+01
--2.790358734130859375e+01
-3.212951660156250000e+01
--1.941773223876953125e+01
-3.156523132324218750e+01
--1.138106536865234375e+01
-2.841992187500000000e+01
-5.993263244628906250e+00
-2.895182800292968750e+01
-1.343590545654296875e+01
-3.189880371093750000e+01
-2.203153991699218750e+01
-3.302221679687500000e+01
-2.992478942871093750e+01
-3.099150085449218750e+01
-3.628388977050781250e+01
-2.765748596191406250e+01
--1.933914184570312500e+00
-1.405374145507812500e+01
--2.153038024902343750e+00
-5.772636413574218750e+00
--2.270050048828125000e+00
--2.121643066406250000e+00
--2.218330383300781250e+00
--1.068978118896484375e+01
--1.187252044677734375e+01
--1.997912597656250000e+01
--6.879402160644531250e+00
--2.143579864501953125e+01
--1.227821350097656250e+00
--2.193494415283203125e+01
-4.623237609863281250e+00
--2.152721405029296875e+01
-9.721397399902343750e+00
--1.953671264648437500e+01
--3.648714447021484375e+01
-9.811126708984375000e+00
--3.130242919921875000e+01
-1.422447967529296875e+01
--2.212834930419921875e+01
-1.493019866943359375e+01
--1.500880432128906250e+01
-1.073588562011718750e+01
--2.095037078857421875e+01
-9.054298400878906250e+00
--3.050099182128906250e+01
-8.704177856445312500e+00
-1.173237609863281250e+01
-1.054329681396484375e+01
-1.856353759765625000e+01
-1.535009765625000000e+01
-2.893331909179687500e+01
-1.451992797851562500e+01
-3.452944946289062500e+01
-1.065280151367187500e+01
-2.875990295410156250e+01
-8.654792785644531250e+00
-1.942100524902343750e+01
-9.422447204589843750e+00
--2.204488372802734375e+01
--3.983994293212890625e+01
--1.324458312988281250e+01
--3.467377471923828125e+01
--6.749649047851562500e+00
--3.092894744873046875e+01
--9.183349609375000000e-01
--3.196458435058593750e+01
-4.220649719238281250e+00
--3.090406036376953125e+01
-1.089889526367187500e+01
--3.497008514404296875e+01
-1.874589538574218750e+01
--4.065438079833984375e+01
-1.124106597900390625e+01
--4.438417816162109375e+01
-5.181709289550781250e+00
--4.649170684814453125e+01
--1.158607482910156250e+00
--4.680406951904296875e+01
--7.918922424316406250e+00
--4.671575164794921875e+01
--1.452505493164062500e+01
--4.416526031494140625e+01
--2.005007171630859375e+01
--3.997841644287109375e+01
--1.054919433593750000e+01
--3.849683380126953125e+01
--1.051826477050781250e+00
--3.794863128662109375e+01
-6.412681579589843750e+00
--3.804645538330078125e+01
-1.627674865722656250e+01
--4.039697265625000000e+01
-6.373878479003906250e+00
--4.087213897705078125e+01
--8.551712036132812500e-01
--4.157129669189453125e+01
--1.014953613281250000e+01
--4.128469085693359375e+01
diff --git a/src/src/face3d/util/util.py b/src/src/face3d/util/util.py
deleted file mode 100644
index 0d689ca138fc0fbf5bec794511ea0f9e638f9ea9..0000000000000000000000000000000000000000
--- a/src/src/face3d/util/util.py
+++ /dev/null
@@ -1,208 +0,0 @@
-"""This script contains basic utilities for Deep3DFaceRecon_pytorch
-"""
-from __future__ import print_function
-import numpy as np
-import torch
-from PIL import Image
-import os
-import importlib
-import argparse
-from argparse import Namespace
-import torchvision
-
-
-def str2bool(v):
-    if isinstance(v, bool):
-        return v
-    if v.lower() in ('yes', 'true', 't', 'y', '1'):
-        return True
-    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
-        return False
-    else:
-        raise argparse.ArgumentTypeError('Boolean value expected.')
-
-
-def copyconf(default_opt, **kwargs):
-    conf = Namespace(**vars(default_opt))
-    for key in kwargs:
-        setattr(conf, key, kwargs[key])
-    return conf
-
-def genvalconf(train_opt, **kwargs):
-    conf = Namespace(**vars(train_opt))
-    attr_dict = train_opt.__dict__
-    for key, value in attr_dict.items():
-        if 'val' in key and key.split('_')[0] in attr_dict:
-            setattr(conf, key.split('_')[0], value)
-
-    for key in kwargs:
-        setattr(conf, key, kwargs[key])
-
-    return conf
-        
-def find_class_in_module(target_cls_name, module):
-    target_cls_name = target_cls_name.replace('_', '').lower()
-    clslib = importlib.import_module(module)
-    cls = None
-    for name, clsobj in clslib.__dict__.items():
-        if name.lower() == target_cls_name:
-            cls = clsobj
-
-    assert cls is not None, "In %s, there should be a class whose name matches %s in lowercase without underscore(_)" % (module, target_cls_name)
-
-    return cls
-
-
-def tensor2im(input_image, imtype=np.uint8):
-    """"Converts a Tensor array into a numpy image array.
-
-    Parameters:
-        input_image (tensor) --  the input image tensor array, range(0, 1)
-        imtype (type)        --  the desired type of the converted numpy array
-    """
-    if not isinstance(input_image, np.ndarray):
-        if isinstance(input_image, torch.Tensor):  # get the data from a variable
-            image_tensor = input_image.data
-        else:
-            return input_image
-        image_numpy = image_tensor.clamp(0.0, 1.0).cpu().float().numpy()  # convert it into a numpy array
-        if image_numpy.shape[0] == 1:  # grayscale to RGB
-            image_numpy = np.tile(image_numpy, (3, 1, 1))
-        image_numpy = np.transpose(image_numpy, (1, 2, 0)) * 255.0  # post-processing: tranpose and scaling
-    else:  # if it is a numpy array, do nothing
-        image_numpy = input_image
-    return image_numpy.astype(imtype)
-
-
-def diagnose_network(net, name='network'):
-    """Calculate and print the mean of average absolute(gradients)
-
-    Parameters:
-        net (torch network) -- Torch network
-        name (str) -- the name of the network
-    """
-    mean = 0.0
-    count = 0
-    for param in net.parameters():
-        if param.grad is not None:
-            mean += torch.mean(torch.abs(param.grad.data))
-            count += 1
-    if count > 0:
-        mean = mean / count
-    print(name)
-    print(mean)
-
-
-def save_image(image_numpy, image_path, aspect_ratio=1.0):
-    """Save a numpy image to the disk
-
-    Parameters:
-        image_numpy (numpy array) -- input numpy array
-        image_path (str)          -- the path of the image
-    """
-
-    image_pil = Image.fromarray(image_numpy)
-    h, w, _ = image_numpy.shape
-
-    if aspect_ratio is None:
-        pass
-    elif aspect_ratio > 1.0:
-        image_pil = image_pil.resize((h, int(w * aspect_ratio)), Image.BICUBIC)
-    elif aspect_ratio < 1.0:
-        image_pil = image_pil.resize((int(h / aspect_ratio), w), Image.BICUBIC)
-    image_pil.save(image_path)
-
-
-def print_numpy(x, val=True, shp=False):
-    """Print the mean, min, max, median, std, and size of a numpy array
-
-    Parameters:
-        val (bool) -- if print the values of the numpy array
-        shp (bool) -- if print the shape of the numpy array
-    """
-    x = x.astype(np.float64)
-    if shp:
-        print('shape,', x.shape)
-    if val:
-        x = x.flatten()
-        print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % (
-            np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x)))
-
-
-def mkdirs(paths):
-    """create empty directories if they don't exist
-
-    Parameters:
-        paths (str list) -- a list of directory paths
-    """
-    if isinstance(paths, list) and not isinstance(paths, str):
-        for path in paths:
-            mkdir(path)
-    else:
-        mkdir(paths)
-
-
-def mkdir(path):
-    """create a single empty directory if it didn't exist
-
-    Parameters:
-        path (str) -- a single directory path
-    """
-    if not os.path.exists(path):
-        os.makedirs(path)
-
-
-def correct_resize_label(t, size):
-    device = t.device
-    t = t.detach().cpu()
-    resized = []
-    for i in range(t.size(0)):
-        one_t = t[i, :1]
-        one_np = np.transpose(one_t.numpy().astype(np.uint8), (1, 2, 0))
-        one_np = one_np[:, :, 0]
-        one_image = Image.fromarray(one_np).resize(size, Image.NEAREST)
-        resized_t = torch.from_numpy(np.array(one_image)).long()
-        resized.append(resized_t)
-    return torch.stack(resized, dim=0).to(device)
-
-
-def correct_resize(t, size, mode=Image.BICUBIC):
-    device = t.device
-    t = t.detach().cpu()
-    resized = []
-    for i in range(t.size(0)):
-        one_t = t[i:i + 1]
-        one_image = Image.fromarray(tensor2im(one_t)).resize(size, Image.BICUBIC)
-        resized_t = torchvision.transforms.functional.to_tensor(one_image) * 2 - 1.0
-        resized.append(resized_t)
-    return torch.stack(resized, dim=0).to(device)
-
-def draw_landmarks(img, landmark, color='r', step=2):
-    """
-    Return:
-        img              -- numpy.array, (B, H, W, 3) img with landmark, RGB order, range (0, 255)
-        
-
-    Parameters:
-        img              -- numpy.array, (B, H, W, 3), RGB order, range (0, 255)
-        landmark         -- numpy.array, (B, 68, 2), y direction is opposite to v direction
-        color            -- str, 'r' or 'b' (red or blue)
-    """
-    if color =='r':
-        c = np.array([255., 0, 0])
-    else:
-        c = np.array([0, 0, 255.])
-
-    _, H, W, _ = img.shape
-    img, landmark = img.copy(), landmark.copy()
-    landmark[..., 1] = H - 1 - landmark[..., 1]
-    landmark = np.round(landmark).astype(np.int32)
-    for i in range(landmark.shape[1]):
-        x, y = landmark[:, i, 0], landmark[:, i, 1]
-        for j in range(-step, step):
-            for k in range(-step, step):
-                u = np.clip(x + j, 0, W - 1)
-                v = np.clip(y + k, 0, H - 1)
-                for m in range(landmark.shape[0]):
-                    img[m, v[m], u[m]] = c
-    return img
diff --git a/src/src/face3d/util/visualizer.py b/src/src/face3d/util/visualizer.py
deleted file mode 100644
index 4023a6d4086acba9bc88e079f625194d324d7c9e..0000000000000000000000000000000000000000
--- a/src/src/face3d/util/visualizer.py
+++ /dev/null
@@ -1,227 +0,0 @@
-"""This script defines the visualizer for Deep3DFaceRecon_pytorch
-"""
-
-import numpy as np
-import os
-import sys
-import ntpath
-import time
-from . import util, html
-from subprocess import Popen, PIPE
-from torch.utils.tensorboard import SummaryWriter
-
-def save_images(webpage, visuals, image_path, aspect_ratio=1.0, width=256):
-    """Save images to the disk.
-
-    Parameters:
-        webpage (the HTML class) -- the HTML webpage class that stores these imaegs (see html.py for more details)
-        visuals (OrderedDict)    -- an ordered dictionary that stores (name, images (either tensor or numpy) ) pairs
-        image_path (str)         -- the string is used to create image paths
-        aspect_ratio (float)     -- the aspect ratio of saved images
-        width (int)              -- the images will be resized to width x width
-
-    This function will save images stored in 'visuals' to the HTML file specified by 'webpage'.
-    """
-    image_dir = webpage.get_image_dir()
-    short_path = ntpath.basename(image_path[0])
-    name = os.path.splitext(short_path)[0]
-
-    webpage.add_header(name)
-    ims, txts, links = [], [], []
-
-    for label, im_data in visuals.items():
-        im = util.tensor2im(im_data)
-        image_name = '%s/%s.png' % (label, name)
-        os.makedirs(os.path.join(image_dir, label), exist_ok=True)
-        save_path = os.path.join(image_dir, image_name)
-        util.save_image(im, save_path, aspect_ratio=aspect_ratio)
-        ims.append(image_name)
-        txts.append(label)
-        links.append(image_name)
-    webpage.add_images(ims, txts, links, width=width)
-
-
-class Visualizer():
-    """This class includes several functions that can display/save images and print/save logging information.
-
-    It uses a Python library tensprboardX for display, and a Python library 'dominate' (wrapped in 'HTML') for creating HTML files with images.
-    """
-
-    def __init__(self, opt):
-        """Initialize the Visualizer class
-
-        Parameters:
-            opt -- stores all the experiment flags; needs to be a subclass of BaseOptions
-        Step 1: Cache the training/test options
-        Step 2: create a tensorboard writer
-        Step 3: create an HTML object for saveing HTML filters
-        Step 4: create a logging file to store training losses
-        """
-        self.opt = opt  # cache the option
-        self.use_html = opt.isTrain and not opt.no_html
-        self.writer = SummaryWriter(os.path.join(opt.checkpoints_dir, 'logs', opt.name))
-        self.win_size = opt.display_winsize
-        self.name = opt.name
-        self.saved = False
-        if self.use_html:  # create an HTML object at <checkpoints_dir>/web/; images will be saved under <checkpoints_dir>/web/images/
-            self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web')
-            self.img_dir = os.path.join(self.web_dir, 'images')
-            print('create web directory %s...' % self.web_dir)
-            util.mkdirs([self.web_dir, self.img_dir])
-        # create a logging file to store training losses
-        self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt')
-        with open(self.log_name, "a") as log_file:
-            now = time.strftime("%c")
-            log_file.write('================ Training Loss (%s) ================\n' % now)
-
-    def reset(self):
-        """Reset the self.saved status"""
-        self.saved = False
-
-
-    def display_current_results(self, visuals, total_iters, epoch, save_result):
-        """Display current results on tensorboad; save current results to an HTML file.
-
-        Parameters:
-            visuals (OrderedDict) - - dictionary of images to display or save
-            total_iters (int) -- total iterations
-            epoch (int) - - the current epoch
-            save_result (bool) - - if save the current results to an HTML file
-        """
-        for label, image in visuals.items():
-            self.writer.add_image(label, util.tensor2im(image), total_iters, dataformats='HWC')
-
-        if self.use_html and (save_result or not self.saved):  # save images to an HTML file if they haven't been saved.
-            self.saved = True
-            # save images to the disk
-            for label, image in visuals.items():
-                image_numpy = util.tensor2im(image)
-                img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.png' % (epoch, label))
-                util.save_image(image_numpy, img_path)
-
-            # update website
-            webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, refresh=0)
-            for n in range(epoch, 0, -1):
-                webpage.add_header('epoch [%d]' % n)
-                ims, txts, links = [], [], []
-
-                for label, image_numpy in visuals.items():
-                    image_numpy = util.tensor2im(image)
-                    img_path = 'epoch%.3d_%s.png' % (n, label)
-                    ims.append(img_path)
-                    txts.append(label)
-                    links.append(img_path)
-                webpage.add_images(ims, txts, links, width=self.win_size)
-            webpage.save()
-
-    def plot_current_losses(self, total_iters, losses):
-        # G_loss_collection = {}
-        # D_loss_collection = {}
-        # for name, value in losses.items():
-        #     if 'G' in name or 'NCE' in name or 'idt' in name:
-        #         G_loss_collection[name] = value
-        #     else:
-        #         D_loss_collection[name] = value
-        # self.writer.add_scalars('G_collec', G_loss_collection, total_iters)
-        # self.writer.add_scalars('D_collec', D_loss_collection, total_iters)
-        for name, value in losses.items():
-            self.writer.add_scalar(name, value, total_iters)
-
-    # losses: same format as |losses| of plot_current_losses
-    def print_current_losses(self, epoch, iters, losses, t_comp, t_data):
-        """print current losses on console; also save the losses to the disk
-
-        Parameters:
-            epoch (int) -- current epoch
-            iters (int) -- current training iteration during this epoch (reset to 0 at the end of every epoch)
-            losses (OrderedDict) -- training losses stored in the format of (name, float) pairs
-            t_comp (float) -- computational time per data point (normalized by batch_size)
-            t_data (float) -- data loading time per data point (normalized by batch_size)
-        """
-        message = '(epoch: %d, iters: %d, time: %.3f, data: %.3f) ' % (epoch, iters, t_comp, t_data)
-        for k, v in losses.items():
-            message += '%s: %.3f ' % (k, v)
-
-        print(message)  # print the message
-        with open(self.log_name, "a") as log_file:
-            log_file.write('%s\n' % message)  # save the message
-
-
-class MyVisualizer:
-    def __init__(self, opt):
-        """Initialize the Visualizer class
-
-        Parameters:
-            opt -- stores all the experiment flags; needs to be a subclass of BaseOptions
-        Step 1: Cache the training/test options
-        Step 2: create a tensorboard writer
-        Step 3: create an HTML object for saveing HTML filters
-        Step 4: create a logging file to store training losses
-        """
-        self.opt = opt  # cache the optio
-        self.name = opt.name
-        self.img_dir = os.path.join(opt.checkpoints_dir, opt.name, 'results')
-        
-        if opt.phase != 'test':
-            self.writer = SummaryWriter(os.path.join(opt.checkpoints_dir, opt.name, 'logs'))
-            # create a logging file to store training losses
-            self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt')
-            with open(self.log_name, "a") as log_file:
-                now = time.strftime("%c")
-                log_file.write('================ Training Loss (%s) ================\n' % now)
-
-
-    def display_current_results(self, visuals, total_iters, epoch, dataset='train', save_results=False, count=0, name=None,
-            add_image=True):
-        """Display current results on tensorboad; save current results to an HTML file.
-
-        Parameters:
-            visuals (OrderedDict) - - dictionary of images to display or save
-            total_iters (int) -- total iterations
-            epoch (int) - - the current epoch
-            dataset (str) - - 'train' or 'val' or 'test'
-        """
-        # if (not add_image) and (not save_results): return
-        
-        for label, image in visuals.items():
-            for i in range(image.shape[0]):
-                image_numpy = util.tensor2im(image[i])
-                if add_image:
-                    self.writer.add_image(label + '%s_%02d'%(dataset, i + count),
-                            image_numpy, total_iters, dataformats='HWC')
-
-                if save_results:
-                    save_path = os.path.join(self.img_dir, dataset, 'epoch_%s_%06d'%(epoch, total_iters))
-                    if not os.path.isdir(save_path):
-                        os.makedirs(save_path)
-
-                    if name is not None:
-                        img_path = os.path.join(save_path, '%s.png' % name)
-                    else:
-                        img_path = os.path.join(save_path, '%s_%03d.png' % (label, i + count))
-                    util.save_image(image_numpy, img_path)
-
-
-    def plot_current_losses(self, total_iters, losses, dataset='train'):
-        for name, value in losses.items():
-            self.writer.add_scalar(name + '/%s'%dataset, value, total_iters)
-
-    # losses: same format as |losses| of plot_current_losses
-    def print_current_losses(self, epoch, iters, losses, t_comp, t_data, dataset='train'):
-        """print current losses on console; also save the losses to the disk
-
-        Parameters:
-            epoch (int) -- current epoch
-            iters (int) -- current training iteration during this epoch (reset to 0 at the end of every epoch)
-            losses (OrderedDict) -- training losses stored in the format of (name, float) pairs
-            t_comp (float) -- computational time per data point (normalized by batch_size)
-            t_data (float) -- data loading time per data point (normalized by batch_size)
-        """
-        message = '(dataset: %s, epoch: %d, iters: %d, time: %.3f, data: %.3f) ' % (
-            dataset, epoch, iters, t_comp, t_data)
-        for k, v in losses.items():
-            message += '%s: %.3f ' % (k, v)
-
-        print(message)  # print the message
-        with open(self.log_name, "a") as log_file:
-            log_file.write('%s\n' % message)  # save the message
diff --git a/src/src/face3d/visualize.py b/src/src/face3d/visualize.py
deleted file mode 100644
index 23a1110806a0ddf37d4aa549c023d1c3f7114e3e..0000000000000000000000000000000000000000
--- a/src/src/face3d/visualize.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# check the sync of 3dmm feature and the audio
-import cv2
-import numpy as np
-from src.face3d.models.bfm import ParametricFaceModel
-from src.face3d.models.facerecon_model import FaceReconModel
-import torch
-import subprocess, platform
-import scipy.io as scio
-from tqdm import tqdm 
-
-# draft
-def gen_composed_video(args, device, first_frame_coeff, coeff_path, audio_path, save_path, exp_dim=64):
-    
-    coeff_first = scio.loadmat(first_frame_coeff)['full_3dmm']
-
-    coeff_pred = scio.loadmat(coeff_path)['coeff_3dmm']
-
-    coeff_full = np.repeat(coeff_first, coeff_pred.shape[0], axis=0) # 257
-
-    coeff_full[:, 80:144] = coeff_pred[:, 0:64]
-    coeff_full[:, 224:227]  = coeff_pred[:, 64:67] # 3 dim translation
-    coeff_full[:, 254:]  = coeff_pred[:, 67:] # 3 dim translation
-
-    tmp_video_path = '/tmp/face3dtmp.mp4'
-
-    facemodel = FaceReconModel(args)
-    
-    video = cv2.VideoWriter(tmp_video_path, cv2.VideoWriter_fourcc(*'mp4v'), 25, (224, 224))
-
-    for k in tqdm(range(coeff_pred.shape[0]), 'face3d rendering:'):
-        cur_coeff_full = torch.tensor(coeff_full[k:k+1], device=device)
-
-        facemodel.forward(cur_coeff_full, device)
-
-        predicted_landmark = facemodel.pred_lm # TODO.
-        predicted_landmark = predicted_landmark.cpu().numpy().squeeze()
-
-        rendered_img = facemodel.pred_face
-        rendered_img = 255. * rendered_img.cpu().numpy().squeeze().transpose(1,2,0)
-        out_img = rendered_img[:, :, :3].astype(np.uint8)
-
-        video.write(np.uint8(out_img[:,:,::-1]))
-
-    video.release()
-
-    command = 'ffmpeg -v quiet -y -i {} -i {} -strict -2 -q:v 1 {}'.format(audio_path, tmp_video_path, save_path)
-    subprocess.call(command, shell=platform.system() != 'Windows')
-
diff --git a/src/src/facerender/animate.py b/src/src/facerender/animate.py
deleted file mode 100644
index 781f5a3318a086049cc6b74393073ddda7001d5e..0000000000000000000000000000000000000000
--- a/src/src/facerender/animate.py
+++ /dev/null
@@ -1,257 +0,0 @@
-import os
-import cv2
-import yaml
-import numpy as np
-import warnings
-from skimage import img_as_ubyte
-import safetensors
-import safetensors.torch 
-warnings.filterwarnings('ignore')
-
-
-import imageio
-import torch
-import torchvision
-
-
-from src.facerender.modules.keypoint_detector import HEEstimator, KPDetector
-from src.facerender.modules.mapping import MappingNet
-from src.facerender.modules.generator import OcclusionAwareGenerator, OcclusionAwareSPADEGenerator
-from src.facerender.modules.make_animation import make_animation 
-
-from pydub import AudioSegment 
-from src.utils.face_enhancer import enhancer_generator_with_len, enhancer_list
-from src.utils.paste_pic import paste_pic
-from src.utils.videoio import save_video_with_watermark
-
-try:
-    import webui  # in webui
-    in_webui = True
-except:
-    in_webui = False
-
-class AnimateFromCoeff():
-
-    def __init__(self, sadtalker_path, device):
-
-        with open(sadtalker_path['facerender_yaml']) as f:
-            config = yaml.safe_load(f)
-
-        generator = OcclusionAwareSPADEGenerator(**config['model_params']['generator_params'],
-                                                    **config['model_params']['common_params'])
-        kp_extractor = KPDetector(**config['model_params']['kp_detector_params'],
-                                    **config['model_params']['common_params'])
-        he_estimator = HEEstimator(**config['model_params']['he_estimator_params'],
-                               **config['model_params']['common_params'])
-        mapping = MappingNet(**config['model_params']['mapping_params'])
-
-        generator.to(device)
-        kp_extractor.to(device)
-        he_estimator.to(device)
-        mapping.to(device)
-        for param in generator.parameters():
-            param.requires_grad = False
-        for param in kp_extractor.parameters():
-            param.requires_grad = False 
-        for param in he_estimator.parameters():
-            param.requires_grad = False
-        for param in mapping.parameters():
-            param.requires_grad = False
-
-        if sadtalker_path is not None:
-            if 'checkpoint' in sadtalker_path: # use safe tensor
-                self.load_cpk_facevid2vid_safetensor(sadtalker_path['checkpoint'], kp_detector=kp_extractor, generator=generator, he_estimator=None)
-            else:
-                self.load_cpk_facevid2vid(sadtalker_path['free_view_checkpoint'], kp_detector=kp_extractor, generator=generator, he_estimator=he_estimator)
-        else:
-            raise AttributeError("Checkpoint should be specified for video head pose estimator.")
-
-        if  sadtalker_path['mappingnet_checkpoint'] is not None:
-            self.load_cpk_mapping(sadtalker_path['mappingnet_checkpoint'], mapping=mapping)
-        else:
-            raise AttributeError("Checkpoint should be specified for video head pose estimator.") 
-
-        self.kp_extractor = kp_extractor
-        self.generator = generator
-        self.he_estimator = he_estimator
-        self.mapping = mapping
-
-        self.kp_extractor.eval()
-        self.generator.eval()
-        self.he_estimator.eval()
-        self.mapping.eval()
-         
-        self.device = device
-    
-    def load_cpk_facevid2vid_safetensor(self, checkpoint_path, generator=None, 
-                        kp_detector=None, he_estimator=None,  
-                        device="cpu"):
-
-        checkpoint = safetensors.torch.load_file(checkpoint_path)
-
-        if generator is not None:
-            x_generator = {}
-            for k,v in checkpoint.items():
-                if 'generator' in k:
-                    x_generator[k.replace('generator.', '')] = v
-            generator.load_state_dict(x_generator)
-        if kp_detector is not None:
-            x_generator = {}
-            for k,v in checkpoint.items():
-                if 'kp_extractor' in k:
-                    x_generator[k.replace('kp_extractor.', '')] = v
-            kp_detector.load_state_dict(x_generator)
-        if he_estimator is not None:
-            x_generator = {}
-            for k,v in checkpoint.items():
-                if 'he_estimator' in k:
-                    x_generator[k.replace('he_estimator.', '')] = v
-            he_estimator.load_state_dict(x_generator)
-        
-        return None
-
-    def load_cpk_facevid2vid(self, checkpoint_path, generator=None, discriminator=None, 
-                        kp_detector=None, he_estimator=None, optimizer_generator=None, 
-                        optimizer_discriminator=None, optimizer_kp_detector=None, 
-                        optimizer_he_estimator=None, device="cpu"):
-        checkpoint = torch.load(checkpoint_path, map_location=torch.device(device))
-        if generator is not None:
-            generator.load_state_dict(checkpoint['generator'])
-        if kp_detector is not None:
-            kp_detector.load_state_dict(checkpoint['kp_detector'])
-        if he_estimator is not None:
-            he_estimator.load_state_dict(checkpoint['he_estimator'])
-        if discriminator is not None:
-            try:
-               discriminator.load_state_dict(checkpoint['discriminator'])
-            except:
-               print ('No discriminator in the state-dict. Dicriminator will be randomly initialized')
-        if optimizer_generator is not None:
-            optimizer_generator.load_state_dict(checkpoint['optimizer_generator'])
-        if optimizer_discriminator is not None:
-            try:
-                optimizer_discriminator.load_state_dict(checkpoint['optimizer_discriminator'])
-            except RuntimeError as e:
-                print ('No discriminator optimizer in the state-dict. Optimizer will be not initialized')
-        if optimizer_kp_detector is not None:
-            optimizer_kp_detector.load_state_dict(checkpoint['optimizer_kp_detector'])
-        if optimizer_he_estimator is not None:
-            optimizer_he_estimator.load_state_dict(checkpoint['optimizer_he_estimator'])
-
-        return checkpoint['epoch']
-    
-    def load_cpk_mapping(self, checkpoint_path, mapping=None, discriminator=None,
-                 optimizer_mapping=None, optimizer_discriminator=None, device='cpu'):
-        checkpoint = torch.load(checkpoint_path,  map_location=torch.device(device))
-        if mapping is not None:
-            mapping.load_state_dict(checkpoint['mapping'])
-        if discriminator is not None:
-            discriminator.load_state_dict(checkpoint['discriminator'])
-        if optimizer_mapping is not None:
-            optimizer_mapping.load_state_dict(checkpoint['optimizer_mapping'])
-        if optimizer_discriminator is not None:
-            optimizer_discriminator.load_state_dict(checkpoint['optimizer_discriminator'])
-
-        return checkpoint['epoch']
-
-    def generate(self, x, video_save_dir, pic_path, crop_info, enhancer=None, background_enhancer=None, preprocess='crop', img_size=256):
-
-        source_image=x['source_image'].type(torch.FloatTensor)
-        source_semantics=x['source_semantics'].type(torch.FloatTensor)
-        target_semantics=x['target_semantics_list'].type(torch.FloatTensor) 
-        source_image=source_image.to(self.device)
-        source_semantics=source_semantics.to(self.device)
-        target_semantics=target_semantics.to(self.device)
-        if 'yaw_c_seq' in x:
-            yaw_c_seq = x['yaw_c_seq'].type(torch.FloatTensor)
-            yaw_c_seq = x['yaw_c_seq'].to(self.device)
-        else:
-            yaw_c_seq = None
-        if 'pitch_c_seq' in x:
-            pitch_c_seq = x['pitch_c_seq'].type(torch.FloatTensor)
-            pitch_c_seq = x['pitch_c_seq'].to(self.device)
-        else:
-            pitch_c_seq = None
-        if 'roll_c_seq' in x:
-            roll_c_seq = x['roll_c_seq'].type(torch.FloatTensor) 
-            roll_c_seq = x['roll_c_seq'].to(self.device)
-        else:
-            roll_c_seq = None
-
-        frame_num = x['frame_num']
-
-        predictions_video = make_animation(source_image, source_semantics, target_semantics,
-                                        self.generator, self.kp_extractor, self.he_estimator, self.mapping, 
-                                        yaw_c_seq, pitch_c_seq, roll_c_seq, use_exp = True)
-
-        predictions_video = predictions_video.reshape((-1,)+predictions_video.shape[2:])
-        predictions_video = predictions_video[:frame_num]
-
-        video = []
-        for idx in range(predictions_video.shape[0]):
-            image = predictions_video[idx]
-            image = np.transpose(image.data.cpu().numpy(), [1, 2, 0]).astype(np.float32)
-            video.append(image)
-        result = img_as_ubyte(video)
-
-        ### the generated video is 256x256, so we keep the aspect ratio, 
-        original_size = crop_info[0]
-        if original_size:
-            result = [ cv2.resize(result_i,(img_size, int(img_size * original_size[1]/original_size[0]) )) for result_i in result ]
-        
-        video_name = x['video_name']  + '.mp4'
-        path = os.path.join(video_save_dir, 'temp_'+video_name)
-        
-        imageio.mimsave(path, result,  fps=float(25))
-
-        av_path = os.path.join(video_save_dir, video_name)
-        return_path = av_path 
-        
-        audio_path =  x['audio_path'] 
-        audio_name = os.path.splitext(os.path.split(audio_path)[-1])[0]
-        new_audio_path = os.path.join(video_save_dir, audio_name+'.wav')
-        start_time = 0
-        # cog will not keep the .mp3 filename
-        sound = AudioSegment.from_file(audio_path)
-        frames = frame_num 
-        end_time = start_time + frames*1/25*1000
-        word1=sound.set_frame_rate(16000)
-        word = word1[start_time:end_time]
-        word.export(new_audio_path, format="wav")
-
-        save_video_with_watermark(path, new_audio_path, av_path, watermark= False)
-        print(f'The generated video is named {video_save_dir}/{video_name}') 
-
-        if 'full' in preprocess.lower():
-            # only add watermark to the full image.
-            video_name_full = x['video_name']  + '_full.mp4'
-            full_video_path = os.path.join(video_save_dir, video_name_full)
-            return_path = full_video_path
-            paste_pic(path, pic_path, crop_info, new_audio_path, full_video_path, extended_crop= True if 'ext' in preprocess.lower() else False)
-            print(f'The generated video is named {video_save_dir}/{video_name_full}') 
-        else:
-            full_video_path = av_path 
-
-        #### paste back then enhancers
-        if enhancer:
-            video_name_enhancer = x['video_name']  + '_enhanced.mp4'
-            enhanced_path = os.path.join(video_save_dir, 'temp_'+video_name_enhancer)
-            av_path_enhancer = os.path.join(video_save_dir, video_name_enhancer) 
-            return_path = av_path_enhancer
-
-            try:
-                enhanced_images_gen_with_len = enhancer_generator_with_len(full_video_path, method=enhancer, bg_upsampler=background_enhancer)
-                imageio.mimsave(enhanced_path, enhanced_images_gen_with_len, fps=float(25))
-            except:
-                enhanced_images_gen_with_len = enhancer_list(full_video_path, method=enhancer, bg_upsampler=background_enhancer)
-                imageio.mimsave(enhanced_path, enhanced_images_gen_with_len, fps=float(25))
-            
-            save_video_with_watermark(enhanced_path, new_audio_path, av_path_enhancer, watermark= False)
-            print(f'The generated video is named {video_save_dir}/{video_name_enhancer}')
-            os.remove(enhanced_path)
-
-        os.remove(path)
-        os.remove(new_audio_path)
-
-        return return_path
-
diff --git a/src/src/facerender/modules/dense_motion.py b/src/src/facerender/modules/dense_motion.py
deleted file mode 100644
index a286ead2e84ed1961335d34a3b50ab38f25e4495..0000000000000000000000000000000000000000
--- a/src/src/facerender/modules/dense_motion.py
+++ /dev/null
@@ -1,121 +0,0 @@
-from torch import nn
-import torch.nn.functional as F
-import torch
-from src.facerender.modules.util import Hourglass, make_coordinate_grid, kp2gaussian
-
-from src.facerender.sync_batchnorm import SynchronizedBatchNorm3d as BatchNorm3d
-
-
-class DenseMotionNetwork(nn.Module):
-    """
-    Module that predicting a dense motion from sparse motion representation given by kp_source and kp_driving
-    """
-
-    def __init__(self, block_expansion, num_blocks, max_features, num_kp, feature_channel, reshape_depth, compress,
-                 estimate_occlusion_map=False):
-        super(DenseMotionNetwork, self).__init__()
-        # self.hourglass = Hourglass(block_expansion=block_expansion, in_features=(num_kp+1)*(feature_channel+1), max_features=max_features, num_blocks=num_blocks)
-        self.hourglass = Hourglass(block_expansion=block_expansion, in_features=(num_kp+1)*(compress+1), max_features=max_features, num_blocks=num_blocks)
-
-        self.mask = nn.Conv3d(self.hourglass.out_filters, num_kp + 1, kernel_size=7, padding=3)
-
-        self.compress = nn.Conv3d(feature_channel, compress, kernel_size=1)
-        self.norm = BatchNorm3d(compress, affine=True)
-
-        if estimate_occlusion_map:
-            # self.occlusion = nn.Conv2d(reshape_channel*reshape_depth, 1, kernel_size=7, padding=3)
-            self.occlusion = nn.Conv2d(self.hourglass.out_filters*reshape_depth, 1, kernel_size=7, padding=3)
-        else:
-            self.occlusion = None
-
-        self.num_kp = num_kp
-
-
-    def create_sparse_motions(self, feature, kp_driving, kp_source):
-        bs, _, d, h, w = feature.shape
-        identity_grid = make_coordinate_grid((d, h, w), type=kp_source['value'].type())
-        identity_grid = identity_grid.view(1, 1, d, h, w, 3)
-        coordinate_grid = identity_grid - kp_driving['value'].view(bs, self.num_kp, 1, 1, 1, 3)
-        
-        # if 'jacobian' in kp_driving:
-        if 'jacobian' in kp_driving and kp_driving['jacobian'] is not None:
-            jacobian = torch.matmul(kp_source['jacobian'], torch.inverse(kp_driving['jacobian']))
-            jacobian = jacobian.unsqueeze(-3).unsqueeze(-3).unsqueeze(-3)
-            jacobian = jacobian.repeat(1, 1, d, h, w, 1, 1)
-            coordinate_grid = torch.matmul(jacobian, coordinate_grid.unsqueeze(-1))
-            coordinate_grid = coordinate_grid.squeeze(-1)                  
-
-
-        driving_to_source = coordinate_grid + kp_source['value'].view(bs, self.num_kp, 1, 1, 1, 3)    # (bs, num_kp, d, h, w, 3)
-
-        #adding background feature
-        identity_grid = identity_grid.repeat(bs, 1, 1, 1, 1, 1)
-        sparse_motions = torch.cat([identity_grid, driving_to_source], dim=1)                #bs num_kp+1 d h w 3
-        
-        # sparse_motions = driving_to_source
-
-        return sparse_motions
-
-    def create_deformed_feature(self, feature, sparse_motions):
-        bs, _, d, h, w = feature.shape
-        feature_repeat = feature.unsqueeze(1).unsqueeze(1).repeat(1, self.num_kp+1, 1, 1, 1, 1, 1)      # (bs, num_kp+1, 1, c, d, h, w)
-        feature_repeat = feature_repeat.view(bs * (self.num_kp+1), -1, d, h, w)                         # (bs*(num_kp+1), c, d, h, w)
-        sparse_motions = sparse_motions.view((bs * (self.num_kp+1), d, h, w, -1))                       # (bs*(num_kp+1), d, h, w, 3) !!!!
-        sparse_deformed = F.grid_sample(feature_repeat, sparse_motions)
-        sparse_deformed = sparse_deformed.view((bs, self.num_kp+1, -1, d, h, w))                        # (bs, num_kp+1, c, d, h, w)
-        return sparse_deformed
-
-    def create_heatmap_representations(self, feature, kp_driving, kp_source):
-        spatial_size = feature.shape[3:]
-        gaussian_driving = kp2gaussian(kp_driving, spatial_size=spatial_size, kp_variance=0.01)
-        gaussian_source = kp2gaussian(kp_source, spatial_size=spatial_size, kp_variance=0.01)
-        heatmap = gaussian_driving - gaussian_source
-
-        # adding background feature
-        zeros = torch.zeros(heatmap.shape[0], 1, spatial_size[0], spatial_size[1], spatial_size[2]).type(heatmap.type())
-        heatmap = torch.cat([zeros, heatmap], dim=1)
-        heatmap = heatmap.unsqueeze(2)         # (bs, num_kp+1, 1, d, h, w)
-        return heatmap
-
-    def forward(self, feature, kp_driving, kp_source):
-        bs, _, d, h, w = feature.shape
-
-        feature = self.compress(feature)
-        feature = self.norm(feature)
-        feature = F.relu(feature)
-
-        out_dict = dict()
-        sparse_motion = self.create_sparse_motions(feature, kp_driving, kp_source)
-        deformed_feature = self.create_deformed_feature(feature, sparse_motion)
-
-        heatmap = self.create_heatmap_representations(deformed_feature, kp_driving, kp_source)
-
-        input_ = torch.cat([heatmap, deformed_feature], dim=2)
-        input_ = input_.view(bs, -1, d, h, w)
-
-        # input = deformed_feature.view(bs, -1, d, h, w)      # (bs, num_kp+1 * c, d, h, w)
-
-        prediction = self.hourglass(input_)
-
-
-        mask = self.mask(prediction)
-        mask = F.softmax(mask, dim=1)
-        out_dict['mask'] = mask
-        mask = mask.unsqueeze(2)                                   # (bs, num_kp+1, 1, d, h, w)
-        
-        zeros_mask = torch.zeros_like(mask)   
-        mask = torch.where(mask < 1e-3, zeros_mask, mask) 
-
-        sparse_motion = sparse_motion.permute(0, 1, 5, 2, 3, 4)    # (bs, num_kp+1, 3, d, h, w)
-        deformation = (sparse_motion * mask).sum(dim=1)            # (bs, 3, d, h, w)
-        deformation = deformation.permute(0, 2, 3, 4, 1)           # (bs, d, h, w, 3)
-
-        out_dict['deformation'] = deformation
-
-        if self.occlusion:
-            bs, c, d, h, w = prediction.shape
-            prediction = prediction.view(bs, -1, h, w)
-            occlusion_map = torch.sigmoid(self.occlusion(prediction))
-            out_dict['occlusion_map'] = occlusion_map
-
-        return out_dict
diff --git a/src/src/facerender/modules/discriminator.py b/src/src/facerender/modules/discriminator.py
deleted file mode 100644
index d4459b07cb075c9f9d345f9b3dffc02cd859313b..0000000000000000000000000000000000000000
--- a/src/src/facerender/modules/discriminator.py
+++ /dev/null
@@ -1,90 +0,0 @@
-from torch import nn
-import torch.nn.functional as F
-from facerender.modules.util import kp2gaussian
-import torch
-
-
-class DownBlock2d(nn.Module):
-    """
-    Simple block for processing video (encoder).
-    """
-
-    def __init__(self, in_features, out_features, norm=False, kernel_size=4, pool=False, sn=False):
-        super(DownBlock2d, self).__init__()
-        self.conv = nn.Conv2d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size)
-
-        if sn:
-            self.conv = nn.utils.spectral_norm(self.conv)
-
-        if norm:
-            self.norm = nn.InstanceNorm2d(out_features, affine=True)
-        else:
-            self.norm = None
-        self.pool = pool
-
-    def forward(self, x):
-        out = x
-        out = self.conv(out)
-        if self.norm:
-            out = self.norm(out)
-        out = F.leaky_relu(out, 0.2)
-        if self.pool:
-            out = F.avg_pool2d(out, (2, 2))
-        return out
-
-
-class Discriminator(nn.Module):
-    """
-    Discriminator similar to Pix2Pix
-    """
-
-    def __init__(self, num_channels=3, block_expansion=64, num_blocks=4, max_features=512,
-                 sn=False, **kwargs):
-        super(Discriminator, self).__init__()
-
-        down_blocks = []
-        for i in range(num_blocks):
-            down_blocks.append(
-                DownBlock2d(num_channels if i == 0 else min(max_features, block_expansion * (2 ** i)),
-                            min(max_features, block_expansion * (2 ** (i + 1))),
-                            norm=(i != 0), kernel_size=4, pool=(i != num_blocks - 1), sn=sn))
-
-        self.down_blocks = nn.ModuleList(down_blocks)
-        self.conv = nn.Conv2d(self.down_blocks[-1].conv.out_channels, out_channels=1, kernel_size=1)
-        if sn:
-            self.conv = nn.utils.spectral_norm(self.conv)
-
-    def forward(self, x):
-        feature_maps = []
-        out = x
-
-        for down_block in self.down_blocks:
-            feature_maps.append(down_block(out))
-            out = feature_maps[-1]
-        prediction_map = self.conv(out)
-
-        return feature_maps, prediction_map
-
-
-class MultiScaleDiscriminator(nn.Module):
-    """
-    Multi-scale (scale) discriminator
-    """
-
-    def __init__(self, scales=(), **kwargs):
-        super(MultiScaleDiscriminator, self).__init__()
-        self.scales = scales
-        discs = {}
-        for scale in scales:
-            discs[str(scale).replace('.', '-')] = Discriminator(**kwargs)
-        self.discs = nn.ModuleDict(discs)
-
-    def forward(self, x):
-        out_dict = {}
-        for scale, disc in self.discs.items():
-            scale = str(scale).replace('-', '.')
-            key = 'prediction_' + scale
-            feature_maps, prediction_map = disc(x[key])
-            out_dict['feature_maps_' + scale] = feature_maps
-            out_dict['prediction_map_' + scale] = prediction_map
-        return out_dict
diff --git a/src/src/facerender/modules/generator.py b/src/src/facerender/modules/generator.py
deleted file mode 100644
index 5a9edcb3b328d3afc99072b2461d7ca69919f813..0000000000000000000000000000000000000000
--- a/src/src/facerender/modules/generator.py
+++ /dev/null
@@ -1,255 +0,0 @@
-import torch
-from torch import nn
-import torch.nn.functional as F
-from src.facerender.modules.util import ResBlock2d, SameBlock2d, UpBlock2d, DownBlock2d, ResBlock3d, SPADEResnetBlock
-from src.facerender.modules.dense_motion import DenseMotionNetwork
-
-
-class OcclusionAwareGenerator(nn.Module):
-    """
-    Generator follows NVIDIA architecture.
-    """
-
-    def __init__(self, image_channel, feature_channel, num_kp, block_expansion, max_features, num_down_blocks, reshape_channel, reshape_depth,
-                 num_resblocks, estimate_occlusion_map=False, dense_motion_params=None, estimate_jacobian=False):
-        super(OcclusionAwareGenerator, self).__init__()
-
-        if dense_motion_params is not None:
-            self.dense_motion_network = DenseMotionNetwork(num_kp=num_kp, feature_channel=feature_channel,
-                                                           estimate_occlusion_map=estimate_occlusion_map,
-                                                           **dense_motion_params)
-        else:
-            self.dense_motion_network = None
-
-        self.first = SameBlock2d(image_channel, block_expansion, kernel_size=(7, 7), padding=(3, 3))
-
-        down_blocks = []
-        for i in range(num_down_blocks):
-            in_features = min(max_features, block_expansion * (2 ** i))
-            out_features = min(max_features, block_expansion * (2 ** (i + 1)))
-            down_blocks.append(DownBlock2d(in_features, out_features, kernel_size=(3, 3), padding=(1, 1)))
-        self.down_blocks = nn.ModuleList(down_blocks)
-
-        self.second = nn.Conv2d(in_channels=out_features, out_channels=max_features, kernel_size=1, stride=1)
-
-        self.reshape_channel = reshape_channel
-        self.reshape_depth = reshape_depth
-
-        self.resblocks_3d = torch.nn.Sequential()
-        for i in range(num_resblocks):
-            self.resblocks_3d.add_module('3dr' + str(i), ResBlock3d(reshape_channel, kernel_size=3, padding=1))
-
-        out_features = block_expansion * (2 ** (num_down_blocks))
-        self.third = SameBlock2d(max_features, out_features, kernel_size=(3, 3), padding=(1, 1), lrelu=True)
-        self.fourth = nn.Conv2d(in_channels=out_features, out_channels=out_features, kernel_size=1, stride=1)
-
-        self.resblocks_2d = torch.nn.Sequential()
-        for i in range(num_resblocks):
-            self.resblocks_2d.add_module('2dr' + str(i), ResBlock2d(out_features, kernel_size=3, padding=1))
-
-        up_blocks = []
-        for i in range(num_down_blocks):
-            in_features = max(block_expansion, block_expansion * (2 ** (num_down_blocks - i)))
-            out_features = max(block_expansion, block_expansion * (2 ** (num_down_blocks - i - 1)))
-            up_blocks.append(UpBlock2d(in_features, out_features, kernel_size=(3, 3), padding=(1, 1)))
-        self.up_blocks = nn.ModuleList(up_blocks)
-
-        self.final = nn.Conv2d(block_expansion, image_channel, kernel_size=(7, 7), padding=(3, 3))
-        self.estimate_occlusion_map = estimate_occlusion_map
-        self.image_channel = image_channel
-
-    def deform_input(self, inp, deformation):
-        _, d_old, h_old, w_old, _ = deformation.shape
-        _, _, d, h, w = inp.shape
-        if d_old != d or h_old != h or w_old != w:
-            deformation = deformation.permute(0, 4, 1, 2, 3)
-            deformation = F.interpolate(deformation, size=(d, h, w), mode='trilinear')
-            deformation = deformation.permute(0, 2, 3, 4, 1)
-        return F.grid_sample(inp, deformation)
-
-    def forward(self, source_image, kp_driving, kp_source):
-        # Encoding (downsampling) part
-        out = self.first(source_image)
-        for i in range(len(self.down_blocks)):
-            out = self.down_blocks[i](out)
-        out = self.second(out)
-        bs, c, h, w = out.shape
-        # print(out.shape)
-        feature_3d = out.view(bs, self.reshape_channel, self.reshape_depth, h ,w) 
-        feature_3d = self.resblocks_3d(feature_3d)
-
-        # Transforming feature representation according to deformation and occlusion
-        output_dict = {}
-        if self.dense_motion_network is not None:
-            dense_motion = self.dense_motion_network(feature=feature_3d, kp_driving=kp_driving,
-                                                     kp_source=kp_source)
-            output_dict['mask'] = dense_motion['mask']
-
-            if 'occlusion_map' in dense_motion:
-                occlusion_map = dense_motion['occlusion_map']
-                output_dict['occlusion_map'] = occlusion_map
-            else:
-                occlusion_map = None
-            deformation = dense_motion['deformation']
-            out = self.deform_input(feature_3d, deformation)
-
-            bs, c, d, h, w = out.shape
-            out = out.view(bs, c*d, h, w)
-            out = self.third(out)
-            out = self.fourth(out)
-
-            if occlusion_map is not None:
-                if out.shape[2] != occlusion_map.shape[2] or out.shape[3] != occlusion_map.shape[3]:
-                    occlusion_map = F.interpolate(occlusion_map, size=out.shape[2:], mode='bilinear')
-                out = out * occlusion_map
-
-            # output_dict["deformed"] = self.deform_input(source_image, deformation)  # 3d deformation cannot deform 2d image
-
-        # Decoding part
-        out = self.resblocks_2d(out)
-        for i in range(len(self.up_blocks)):
-            out = self.up_blocks[i](out)
-        out = self.final(out)
-        out = F.sigmoid(out)
-
-        output_dict["prediction"] = out
-
-        return output_dict
-
-
-class SPADEDecoder(nn.Module):
-    def __init__(self):
-        super().__init__()
-        ic = 256
-        oc = 64
-        norm_G = 'spadespectralinstance'
-        label_nc = 256
-        
-        self.fc = nn.Conv2d(ic, 2 * ic, 3, padding=1)
-        self.G_middle_0 = SPADEResnetBlock(2 * ic, 2 * ic, norm_G, label_nc)
-        self.G_middle_1 = SPADEResnetBlock(2 * ic, 2 * ic, norm_G, label_nc)
-        self.G_middle_2 = SPADEResnetBlock(2 * ic, 2 * ic, norm_G, label_nc)
-        self.G_middle_3 = SPADEResnetBlock(2 * ic, 2 * ic, norm_G, label_nc)
-        self.G_middle_4 = SPADEResnetBlock(2 * ic, 2 * ic, norm_G, label_nc)
-        self.G_middle_5 = SPADEResnetBlock(2 * ic, 2 * ic, norm_G, label_nc)
-        self.up_0 = SPADEResnetBlock(2 * ic, ic, norm_G, label_nc)
-        self.up_1 = SPADEResnetBlock(ic, oc, norm_G, label_nc)
-        self.conv_img = nn.Conv2d(oc, 3, 3, padding=1)
-        self.up = nn.Upsample(scale_factor=2)
-        
-    def forward(self, feature):
-        seg = feature
-        x = self.fc(feature)
-        x = self.G_middle_0(x, seg)
-        x = self.G_middle_1(x, seg)
-        x = self.G_middle_2(x, seg)
-        x = self.G_middle_3(x, seg)
-        x = self.G_middle_4(x, seg)
-        x = self.G_middle_5(x, seg)
-        x = self.up(x)                
-        x = self.up_0(x, seg)         # 256, 128, 128
-        x = self.up(x)                
-        x = self.up_1(x, seg)         # 64, 256, 256
-
-        x = self.conv_img(F.leaky_relu(x, 2e-1))
-        # x = torch.tanh(x)
-        x = F.sigmoid(x)
-        
-        return x
-
-
-class OcclusionAwareSPADEGenerator(nn.Module):
-
-    def __init__(self, image_channel, feature_channel, num_kp, block_expansion, max_features, num_down_blocks, reshape_channel, reshape_depth,
-                 num_resblocks, estimate_occlusion_map=False, dense_motion_params=None, estimate_jacobian=False):
-        super(OcclusionAwareSPADEGenerator, self).__init__()
-
-        if dense_motion_params is not None:
-            self.dense_motion_network = DenseMotionNetwork(num_kp=num_kp, feature_channel=feature_channel,
-                                                           estimate_occlusion_map=estimate_occlusion_map,
-                                                           **dense_motion_params)
-        else:
-            self.dense_motion_network = None
-
-        self.first = SameBlock2d(image_channel, block_expansion, kernel_size=(3, 3), padding=(1, 1))
-
-        down_blocks = []
-        for i in range(num_down_blocks):
-            in_features = min(max_features, block_expansion * (2 ** i))
-            out_features = min(max_features, block_expansion * (2 ** (i + 1)))
-            down_blocks.append(DownBlock2d(in_features, out_features, kernel_size=(3, 3), padding=(1, 1)))
-        self.down_blocks = nn.ModuleList(down_blocks)
-
-        self.second = nn.Conv2d(in_channels=out_features, out_channels=max_features, kernel_size=1, stride=1)
-
-        self.reshape_channel = reshape_channel
-        self.reshape_depth = reshape_depth
-
-        self.resblocks_3d = torch.nn.Sequential()
-        for i in range(num_resblocks):
-            self.resblocks_3d.add_module('3dr' + str(i), ResBlock3d(reshape_channel, kernel_size=3, padding=1))
-
-        out_features = block_expansion * (2 ** (num_down_blocks))
-        self.third = SameBlock2d(max_features, out_features, kernel_size=(3, 3), padding=(1, 1), lrelu=True)
-        self.fourth = nn.Conv2d(in_channels=out_features, out_channels=out_features, kernel_size=1, stride=1)
-
-        self.estimate_occlusion_map = estimate_occlusion_map
-        self.image_channel = image_channel
-
-        self.decoder = SPADEDecoder()
-
-    def deform_input(self, inp, deformation):
-        _, d_old, h_old, w_old, _ = deformation.shape
-        _, _, d, h, w = inp.shape
-        if d_old != d or h_old != h or w_old != w:
-            deformation = deformation.permute(0, 4, 1, 2, 3)
-            deformation = F.interpolate(deformation, size=(d, h, w), mode='trilinear')
-            deformation = deformation.permute(0, 2, 3, 4, 1)
-        return F.grid_sample(inp, deformation)
-
-    def forward(self, source_image, kp_driving, kp_source):
-        # Encoding (downsampling) part
-        out = self.first(source_image)
-        for i in range(len(self.down_blocks)):
-            out = self.down_blocks[i](out)
-        out = self.second(out)
-        bs, c, h, w = out.shape
-        # print(out.shape)
-        feature_3d = out.view(bs, self.reshape_channel, self.reshape_depth, h ,w) 
-        feature_3d = self.resblocks_3d(feature_3d)
-
-        # Transforming feature representation according to deformation and occlusion
-        output_dict = {}
-        if self.dense_motion_network is not None:
-            dense_motion = self.dense_motion_network(feature=feature_3d, kp_driving=kp_driving,
-                                                     kp_source=kp_source)
-            output_dict['mask'] = dense_motion['mask']
-
-            # import pdb; pdb.set_trace()
-
-            if 'occlusion_map' in dense_motion:
-                occlusion_map = dense_motion['occlusion_map']
-                output_dict['occlusion_map'] = occlusion_map
-            else:
-                occlusion_map = None
-            deformation = dense_motion['deformation']
-            out = self.deform_input(feature_3d, deformation)
-
-            bs, c, d, h, w = out.shape
-            out = out.view(bs, c*d, h, w)
-            out = self.third(out)
-            out = self.fourth(out)
-
-            # occlusion_map = torch.where(occlusion_map < 0.95, 0, occlusion_map)
-            
-            if occlusion_map is not None:
-                if out.shape[2] != occlusion_map.shape[2] or out.shape[3] != occlusion_map.shape[3]:
-                    occlusion_map = F.interpolate(occlusion_map, size=out.shape[2:], mode='bilinear')
-                out = out * occlusion_map
-
-        # Decoding part
-        out = self.decoder(out)
-
-        output_dict["prediction"] = out
-        
-        return output_dict
\ No newline at end of file
diff --git a/src/src/facerender/modules/keypoint_detector.py b/src/src/facerender/modules/keypoint_detector.py
deleted file mode 100644
index 62a38a962b2f1a4326aac771aced353ec5e22a96..0000000000000000000000000000000000000000
--- a/src/src/facerender/modules/keypoint_detector.py
+++ /dev/null
@@ -1,179 +0,0 @@
-from torch import nn
-import torch
-import torch.nn.functional as F
-
-from src.facerender.sync_batchnorm import SynchronizedBatchNorm2d as BatchNorm2d
-from src.facerender.modules.util import KPHourglass, make_coordinate_grid, AntiAliasInterpolation2d, ResBottleneck
-
-
-class KPDetector(nn.Module):
-    """
-    Detecting canonical keypoints. Return keypoint position and jacobian near each keypoint.
-    """
-
-    def __init__(self, block_expansion, feature_channel, num_kp, image_channel, max_features, reshape_channel, reshape_depth,
-                 num_blocks, temperature, estimate_jacobian=False, scale_factor=1, single_jacobian_map=False):
-        super(KPDetector, self).__init__()
-
-        self.predictor = KPHourglass(block_expansion, in_features=image_channel,
-                                     max_features=max_features,  reshape_features=reshape_channel, reshape_depth=reshape_depth, num_blocks=num_blocks)
-
-        # self.kp = nn.Conv3d(in_channels=self.predictor.out_filters, out_channels=num_kp, kernel_size=7, padding=3)
-        self.kp = nn.Conv3d(in_channels=self.predictor.out_filters, out_channels=num_kp, kernel_size=3, padding=1)
-
-        if estimate_jacobian:
-            self.num_jacobian_maps = 1 if single_jacobian_map else num_kp
-            # self.jacobian = nn.Conv3d(in_channels=self.predictor.out_filters, out_channels=9 * self.num_jacobian_maps, kernel_size=7, padding=3)
-            self.jacobian = nn.Conv3d(in_channels=self.predictor.out_filters, out_channels=9 * self.num_jacobian_maps, kernel_size=3, padding=1)
-            '''
-            initial as:
-            [[1 0 0]
-             [0 1 0]
-             [0 0 1]]
-            '''
-            self.jacobian.weight.data.zero_()
-            self.jacobian.bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0, 0, 0, 1] * self.num_jacobian_maps, dtype=torch.float))
-        else:
-            self.jacobian = None
-
-        self.temperature = temperature
-        self.scale_factor = scale_factor
-        if self.scale_factor != 1:
-            self.down = AntiAliasInterpolation2d(image_channel, self.scale_factor)
-
-    def gaussian2kp(self, heatmap):
-        """
-        Extract the mean from a heatmap
-        """
-        shape = heatmap.shape
-        heatmap = heatmap.unsqueeze(-1)
-        grid = make_coordinate_grid(shape[2:], heatmap.type()).unsqueeze_(0).unsqueeze_(0)
-        value = (heatmap * grid).sum(dim=(2, 3, 4))
-        kp = {'value': value}
-
-        return kp
-
-    def forward(self, x):
-        if self.scale_factor != 1:
-            x = self.down(x)
-
-        feature_map = self.predictor(x)
-        prediction = self.kp(feature_map)
-
-        final_shape = prediction.shape
-        heatmap = prediction.view(final_shape[0], final_shape[1], -1)
-        heatmap = F.softmax(heatmap / self.temperature, dim=2)
-        heatmap = heatmap.view(*final_shape)
-
-        out = self.gaussian2kp(heatmap)
-
-        if self.jacobian is not None:
-            jacobian_map = self.jacobian(feature_map)
-            jacobian_map = jacobian_map.reshape(final_shape[0], self.num_jacobian_maps, 9, final_shape[2],
-                                                final_shape[3], final_shape[4])
-            heatmap = heatmap.unsqueeze(2)
-
-            jacobian = heatmap * jacobian_map
-            jacobian = jacobian.view(final_shape[0], final_shape[1], 9, -1)
-            jacobian = jacobian.sum(dim=-1)
-            jacobian = jacobian.view(jacobian.shape[0], jacobian.shape[1], 3, 3)
-            out['jacobian'] = jacobian
-
-        return out
-
-
-class HEEstimator(nn.Module):
-    """
-    Estimating head pose and expression.
-    """
-
-    def __init__(self, block_expansion, feature_channel, num_kp, image_channel, max_features, num_bins=66, estimate_jacobian=True):
-        super(HEEstimator, self).__init__()
-
-        self.conv1 = nn.Conv2d(in_channels=image_channel, out_channels=block_expansion, kernel_size=7, padding=3, stride=2)
-        self.norm1 = BatchNorm2d(block_expansion, affine=True)
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-
-        self.conv2 = nn.Conv2d(in_channels=block_expansion, out_channels=256, kernel_size=1)
-        self.norm2 = BatchNorm2d(256, affine=True)
-
-        self.block1 = nn.Sequential()
-        for i in range(3):
-            self.block1.add_module('b1_'+ str(i), ResBottleneck(in_features=256, stride=1))
-
-        self.conv3 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1)
-        self.norm3 = BatchNorm2d(512, affine=True)
-        self.block2 = ResBottleneck(in_features=512, stride=2)
-
-        self.block3 = nn.Sequential()
-        for i in range(3):
-            self.block3.add_module('b3_'+ str(i), ResBottleneck(in_features=512, stride=1))
-
-        self.conv4 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1)
-        self.norm4 = BatchNorm2d(1024, affine=True)
-        self.block4 = ResBottleneck(in_features=1024, stride=2)
-
-        self.block5 = nn.Sequential()
-        for i in range(5):
-            self.block5.add_module('b5_'+ str(i), ResBottleneck(in_features=1024, stride=1))
-
-        self.conv5 = nn.Conv2d(in_channels=1024, out_channels=2048, kernel_size=1)
-        self.norm5 = BatchNorm2d(2048, affine=True)
-        self.block6 = ResBottleneck(in_features=2048, stride=2)
-
-        self.block7 = nn.Sequential()
-        for i in range(2):
-            self.block7.add_module('b7_'+ str(i), ResBottleneck(in_features=2048, stride=1))
-
-        self.fc_roll = nn.Linear(2048, num_bins)
-        self.fc_pitch = nn.Linear(2048, num_bins)
-        self.fc_yaw = nn.Linear(2048, num_bins)
-
-        self.fc_t = nn.Linear(2048, 3)
-
-        self.fc_exp = nn.Linear(2048, 3*num_kp)
-
-    def forward(self, x):
-        out = self.conv1(x)
-        out = self.norm1(out)
-        out = F.relu(out)
-        out = self.maxpool(out)
-
-        out = self.conv2(out)
-        out = self.norm2(out)
-        out = F.relu(out)
-
-        out = self.block1(out)
-
-        out = self.conv3(out)
-        out = self.norm3(out)
-        out = F.relu(out)
-        out = self.block2(out)
-
-        out = self.block3(out)
-
-        out = self.conv4(out)
-        out = self.norm4(out)
-        out = F.relu(out)
-        out = self.block4(out)
-
-        out = self.block5(out)
-
-        out = self.conv5(out)
-        out = self.norm5(out)
-        out = F.relu(out)
-        out = self.block6(out)
-
-        out = self.block7(out)
-
-        out = F.adaptive_avg_pool2d(out, 1)
-        out = out.view(out.shape[0], -1)
-
-        yaw = self.fc_roll(out)
-        pitch = self.fc_pitch(out)
-        roll = self.fc_yaw(out)
-        t = self.fc_t(out)
-        exp = self.fc_exp(out)
-
-        return {'yaw': yaw, 'pitch': pitch, 'roll': roll, 't': t, 'exp': exp}
-
diff --git a/src/src/facerender/modules/make_animation.py b/src/src/facerender/modules/make_animation.py
deleted file mode 100644
index 3360c53501a064f35d7db21a5361f89aa9658b42..0000000000000000000000000000000000000000
--- a/src/src/facerender/modules/make_animation.py
+++ /dev/null
@@ -1,170 +0,0 @@
-from scipy.spatial import ConvexHull
-import torch
-import torch.nn.functional as F
-import numpy as np
-from tqdm import tqdm 
-
-def normalize_kp(kp_source, kp_driving, kp_driving_initial, adapt_movement_scale=False,
-                 use_relative_movement=False, use_relative_jacobian=False):
-    if adapt_movement_scale:
-        source_area = ConvexHull(kp_source['value'][0].data.cpu().numpy()).volume
-        driving_area = ConvexHull(kp_driving_initial['value'][0].data.cpu().numpy()).volume
-        adapt_movement_scale = np.sqrt(source_area) / np.sqrt(driving_area)
-    else:
-        adapt_movement_scale = 1
-
-    kp_new = {k: v for k, v in kp_driving.items()}
-
-    if use_relative_movement:
-        kp_value_diff = (kp_driving['value'] - kp_driving_initial['value'])
-        kp_value_diff *= adapt_movement_scale
-        kp_new['value'] = kp_value_diff + kp_source['value']
-
-        if use_relative_jacobian:
-            jacobian_diff = torch.matmul(kp_driving['jacobian'], torch.inverse(kp_driving_initial['jacobian']))
-            kp_new['jacobian'] = torch.matmul(jacobian_diff, kp_source['jacobian'])
-
-    return kp_new
-
-def headpose_pred_to_degree(pred):
-    device = pred.device
-    idx_tensor = [idx for idx in range(66)]
-    idx_tensor = torch.FloatTensor(idx_tensor).type_as(pred).to(device)
-    pred = F.softmax(pred)
-    degree = torch.sum(pred*idx_tensor, 1) * 3 - 99
-    return degree
-
-def get_rotation_matrix(yaw, pitch, roll):
-    yaw = yaw / 180 * 3.14
-    pitch = pitch / 180 * 3.14
-    roll = roll / 180 * 3.14
-
-    roll = roll.unsqueeze(1)
-    pitch = pitch.unsqueeze(1)
-    yaw = yaw.unsqueeze(1)
-
-    pitch_mat = torch.cat([torch.ones_like(pitch), torch.zeros_like(pitch), torch.zeros_like(pitch), 
-                          torch.zeros_like(pitch), torch.cos(pitch), -torch.sin(pitch),
-                          torch.zeros_like(pitch), torch.sin(pitch), torch.cos(pitch)], dim=1)
-    pitch_mat = pitch_mat.view(pitch_mat.shape[0], 3, 3)
-
-    yaw_mat = torch.cat([torch.cos(yaw), torch.zeros_like(yaw), torch.sin(yaw), 
-                           torch.zeros_like(yaw), torch.ones_like(yaw), torch.zeros_like(yaw),
-                           -torch.sin(yaw), torch.zeros_like(yaw), torch.cos(yaw)], dim=1)
-    yaw_mat = yaw_mat.view(yaw_mat.shape[0], 3, 3)
-
-    roll_mat = torch.cat([torch.cos(roll), -torch.sin(roll), torch.zeros_like(roll),  
-                         torch.sin(roll), torch.cos(roll), torch.zeros_like(roll),
-                         torch.zeros_like(roll), torch.zeros_like(roll), torch.ones_like(roll)], dim=1)
-    roll_mat = roll_mat.view(roll_mat.shape[0], 3, 3)
-
-    rot_mat = torch.einsum('bij,bjk,bkm->bim', pitch_mat, yaw_mat, roll_mat)
-
-    return rot_mat
-
-def keypoint_transformation(kp_canonical, he, wo_exp=False):
-    kp = kp_canonical['value']    # (bs, k, 3) 
-    yaw, pitch, roll= he['yaw'], he['pitch'], he['roll']      
-    yaw = headpose_pred_to_degree(yaw) 
-    pitch = headpose_pred_to_degree(pitch)
-    roll = headpose_pred_to_degree(roll)
-
-    if 'yaw_in' in he:
-        yaw = he['yaw_in']
-    if 'pitch_in' in he:
-        pitch = he['pitch_in']
-    if 'roll_in' in he:
-        roll = he['roll_in']
-
-    rot_mat = get_rotation_matrix(yaw, pitch, roll)    # (bs, 3, 3)
-
-    t, exp = he['t'], he['exp']
-    if wo_exp:
-        exp =  exp*0  
-    
-    # keypoint rotation
-    kp_rotated = torch.einsum('bmp,bkp->bkm', rot_mat, kp)
-
-    # keypoint translation
-    t[:, 0] = t[:, 0]*0
-    t[:, 2] = t[:, 2]*0
-    t = t.unsqueeze(1).repeat(1, kp.shape[1], 1)
-    kp_t = kp_rotated + t
-
-    # add expression deviation 
-    exp = exp.view(exp.shape[0], -1, 3)
-    kp_transformed = kp_t + exp
-
-    return {'value': kp_transformed}
-
-
-
-def make_animation(source_image, source_semantics, target_semantics,
-                            generator, kp_detector, he_estimator, mapping, 
-                            yaw_c_seq=None, pitch_c_seq=None, roll_c_seq=None,
-                            use_exp=True, use_half=False):
-    with torch.no_grad():
-        predictions = []
-
-        kp_canonical = kp_detector(source_image)
-        he_source = mapping(source_semantics)
-        kp_source = keypoint_transformation(kp_canonical, he_source)
-    
-        for frame_idx in tqdm(range(target_semantics.shape[1]), 'Face Renderer:'):
-            # still check the dimension
-            # print(target_semantics.shape, source_semantics.shape)
-            target_semantics_frame = target_semantics[:, frame_idx]
-            he_driving = mapping(target_semantics_frame)
-            if yaw_c_seq is not None:
-                he_driving['yaw_in'] = yaw_c_seq[:, frame_idx]
-            if pitch_c_seq is not None:
-                he_driving['pitch_in'] = pitch_c_seq[:, frame_idx] 
-            if roll_c_seq is not None:
-                he_driving['roll_in'] = roll_c_seq[:, frame_idx] 
-            
-            kp_driving = keypoint_transformation(kp_canonical, he_driving)
-                
-            kp_norm = kp_driving
-            out = generator(source_image, kp_source=kp_source, kp_driving=kp_norm)
-            '''
-            source_image_new = out['prediction'].squeeze(1)
-            kp_canonical_new =  kp_detector(source_image_new)
-            he_source_new = he_estimator(source_image_new) 
-            kp_source_new = keypoint_transformation(kp_canonical_new, he_source_new, wo_exp=True)
-            kp_driving_new = keypoint_transformation(kp_canonical_new, he_driving, wo_exp=True)
-            out = generator(source_image_new, kp_source=kp_source_new, kp_driving=kp_driving_new)
-            '''
-            predictions.append(out['prediction'])
-        predictions_ts = torch.stack(predictions, dim=1)
-    return predictions_ts
-
-class AnimateModel(torch.nn.Module):
-    """
-    Merge all generator related updates into single model for better multi-gpu usage
-    """
-
-    def __init__(self, generator, kp_extractor, mapping):
-        super(AnimateModel, self).__init__()
-        self.kp_extractor = kp_extractor
-        self.generator = generator
-        self.mapping = mapping
-
-        self.kp_extractor.eval()
-        self.generator.eval()
-        self.mapping.eval()
-
-    def forward(self, x):
-        
-        source_image = x['source_image']
-        source_semantics = x['source_semantics']
-        target_semantics = x['target_semantics']
-        yaw_c_seq = x['yaw_c_seq']
-        pitch_c_seq = x['pitch_c_seq']
-        roll_c_seq = x['roll_c_seq']
-
-        predictions_video = make_animation(source_image, source_semantics, target_semantics,
-                                        self.generator, self.kp_extractor,
-                                        self.mapping, use_exp = True,
-                                        yaw_c_seq=yaw_c_seq, pitch_c_seq=pitch_c_seq, roll_c_seq=roll_c_seq)
-        
-        return predictions_video
\ No newline at end of file
diff --git a/src/src/facerender/modules/mapping.py b/src/src/facerender/modules/mapping.py
deleted file mode 100644
index 0e3a1c2d1770996080c08e9daafb346f05d7bcdd..0000000000000000000000000000000000000000
--- a/src/src/facerender/modules/mapping.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import numpy as np
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class MappingNet(nn.Module):
-    def __init__(self, coeff_nc, descriptor_nc, layer, num_kp, num_bins):
-        super( MappingNet, self).__init__()
-
-        self.layer = layer
-        nonlinearity = nn.LeakyReLU(0.1)
-
-        self.first = nn.Sequential(
-            torch.nn.Conv1d(coeff_nc, descriptor_nc, kernel_size=7, padding=0, bias=True))
-
-        for i in range(layer):
-            net = nn.Sequential(nonlinearity,
-                torch.nn.Conv1d(descriptor_nc, descriptor_nc, kernel_size=3, padding=0, dilation=3))
-            setattr(self, 'encoder' + str(i), net)   
-
-        self.pooling = nn.AdaptiveAvgPool1d(1)
-        self.output_nc = descriptor_nc
-
-        self.fc_roll = nn.Linear(descriptor_nc, num_bins)
-        self.fc_pitch = nn.Linear(descriptor_nc, num_bins)
-        self.fc_yaw = nn.Linear(descriptor_nc, num_bins)
-        self.fc_t = nn.Linear(descriptor_nc, 3)
-        self.fc_exp = nn.Linear(descriptor_nc, 3*num_kp)
-
-    def forward(self, input_3dmm):
-        out = self.first(input_3dmm)
-        for i in range(self.layer):
-            model = getattr(self, 'encoder' + str(i))
-            out = model(out) + out[:,:,3:-3]
-        out = self.pooling(out)
-        out = out.view(out.shape[0], -1)
-        #print('out:', out.shape)
-
-        yaw = self.fc_yaw(out)
-        pitch = self.fc_pitch(out)
-        roll = self.fc_roll(out)
-        t = self.fc_t(out)
-        exp = self.fc_exp(out)
-
-        return {'yaw': yaw, 'pitch': pitch, 'roll': roll, 't': t, 'exp': exp} 
\ No newline at end of file
diff --git a/src/src/facerender/modules/util.py b/src/src/facerender/modules/util.py
deleted file mode 100644
index b916deefbb8b957ad6ab3cd7403c28513e5ae18e..0000000000000000000000000000000000000000
--- a/src/src/facerender/modules/util.py
+++ /dev/null
@@ -1,564 +0,0 @@
-from torch import nn
-
-import torch.nn.functional as F
-import torch
-
-from src.facerender.sync_batchnorm import SynchronizedBatchNorm2d as BatchNorm2d
-from src.facerender.sync_batchnorm import SynchronizedBatchNorm3d as BatchNorm3d
-
-import torch.nn.utils.spectral_norm as spectral_norm
-
-
-def kp2gaussian(kp, spatial_size, kp_variance):
-    """
-    Transform a keypoint into gaussian like representation
-    """
-    mean = kp['value']
-
-    coordinate_grid = make_coordinate_grid(spatial_size, mean.type())
-    number_of_leading_dimensions = len(mean.shape) - 1
-    shape = (1,) * number_of_leading_dimensions + coordinate_grid.shape
-    coordinate_grid = coordinate_grid.view(*shape)
-    repeats = mean.shape[:number_of_leading_dimensions] + (1, 1, 1, 1)
-    coordinate_grid = coordinate_grid.repeat(*repeats)
-
-    # Preprocess kp shape
-    shape = mean.shape[:number_of_leading_dimensions] + (1, 1, 1, 3)
-    mean = mean.view(*shape)
-
-    mean_sub = (coordinate_grid - mean)
-
-    out = torch.exp(-0.5 * (mean_sub ** 2).sum(-1) / kp_variance)
-
-    return out
-
-def make_coordinate_grid_2d(spatial_size, type):
-    """
-    Create a meshgrid [-1,1] x [-1,1] of given spatial_size.
-    """
-    h, w = spatial_size
-    x = torch.arange(w).type(type)
-    y = torch.arange(h).type(type)
-
-    x = (2 * (x / (w - 1)) - 1)
-    y = (2 * (y / (h - 1)) - 1)
-
-    yy = y.view(-1, 1).repeat(1, w)
-    xx = x.view(1, -1).repeat(h, 1)
-
-    meshed = torch.cat([xx.unsqueeze_(2), yy.unsqueeze_(2)], 2)
-
-    return meshed
-
-
-def make_coordinate_grid(spatial_size, type):
-    d, h, w = spatial_size
-    x = torch.arange(w).type(type)
-    y = torch.arange(h).type(type)
-    z = torch.arange(d).type(type)
-
-    x = (2 * (x / (w - 1)) - 1)
-    y = (2 * (y / (h - 1)) - 1)
-    z = (2 * (z / (d - 1)) - 1)
-   
-    yy = y.view(1, -1, 1).repeat(d, 1, w)
-    xx = x.view(1, 1, -1).repeat(d, h, 1)
-    zz = z.view(-1, 1, 1).repeat(1, h, w)
-
-    meshed = torch.cat([xx.unsqueeze_(3), yy.unsqueeze_(3), zz.unsqueeze_(3)], 3)
-
-    return meshed
-
-
-class ResBottleneck(nn.Module):
-    def __init__(self, in_features, stride):
-        super(ResBottleneck, self).__init__()
-        self.conv1 = nn.Conv2d(in_channels=in_features, out_channels=in_features//4, kernel_size=1)
-        self.conv2 = nn.Conv2d(in_channels=in_features//4, out_channels=in_features//4, kernel_size=3, padding=1, stride=stride)
-        self.conv3 = nn.Conv2d(in_channels=in_features//4, out_channels=in_features, kernel_size=1)
-        self.norm1 = BatchNorm2d(in_features//4, affine=True)
-        self.norm2 = BatchNorm2d(in_features//4, affine=True)
-        self.norm3 = BatchNorm2d(in_features, affine=True)
-
-        self.stride = stride
-        if self.stride != 1:
-            self.skip = nn.Conv2d(in_channels=in_features, out_channels=in_features, kernel_size=1, stride=stride)
-            self.norm4 = BatchNorm2d(in_features, affine=True)
-
-    def forward(self, x):
-        out = self.conv1(x)
-        out = self.norm1(out)
-        out = F.relu(out)
-        out = self.conv2(out)
-        out = self.norm2(out)
-        out = F.relu(out)
-        out = self.conv3(out)
-        out = self.norm3(out)
-        if self.stride != 1:
-            x = self.skip(x)
-            x = self.norm4(x)
-        out += x
-        out = F.relu(out)
-        return out
-
-
-class ResBlock2d(nn.Module):
-    """
-    Res block, preserve spatial resolution.
-    """
-
-    def __init__(self, in_features, kernel_size, padding):
-        super(ResBlock2d, self).__init__()
-        self.conv1 = nn.Conv2d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size,
-                               padding=padding)
-        self.conv2 = nn.Conv2d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size,
-                               padding=padding)
-        self.norm1 = BatchNorm2d(in_features, affine=True)
-        self.norm2 = BatchNorm2d(in_features, affine=True)
-
-    def forward(self, x):
-        out = self.norm1(x)
-        out = F.relu(out)
-        out = self.conv1(out)
-        out = self.norm2(out)
-        out = F.relu(out)
-        out = self.conv2(out)
-        out += x
-        return out
-
-
-class ResBlock3d(nn.Module):
-    """
-    Res block, preserve spatial resolution.
-    """
-
-    def __init__(self, in_features, kernel_size, padding):
-        super(ResBlock3d, self).__init__()
-        self.conv1 = nn.Conv3d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size,
-                               padding=padding)
-        self.conv2 = nn.Conv3d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size,
-                               padding=padding)
-        self.norm1 = BatchNorm3d(in_features, affine=True)
-        self.norm2 = BatchNorm3d(in_features, affine=True)
-
-    def forward(self, x):
-        out = self.norm1(x)
-        out = F.relu(out)
-        out = self.conv1(out)
-        out = self.norm2(out)
-        out = F.relu(out)
-        out = self.conv2(out)
-        out += x
-        return out
-
-
-class UpBlock2d(nn.Module):
-    """
-    Upsampling block for use in decoder.
-    """
-
-    def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1):
-        super(UpBlock2d, self).__init__()
-
-        self.conv = nn.Conv2d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
-                              padding=padding, groups=groups)
-        self.norm = BatchNorm2d(out_features, affine=True)
-
-    def forward(self, x):
-        out = F.interpolate(x, scale_factor=2)
-        out = self.conv(out)
-        out = self.norm(out)
-        out = F.relu(out)
-        return out
-
-class UpBlock3d(nn.Module):
-    """
-    Upsampling block for use in decoder.
-    """
-
-    def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1):
-        super(UpBlock3d, self).__init__()
-
-        self.conv = nn.Conv3d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
-                              padding=padding, groups=groups)
-        self.norm = BatchNorm3d(out_features, affine=True)
-
-    def forward(self, x):
-        # out = F.interpolate(x, scale_factor=(1, 2, 2), mode='trilinear')
-        out = F.interpolate(x, scale_factor=(1, 2, 2))
-        out = self.conv(out)
-        out = self.norm(out)
-        out = F.relu(out)
-        return out
-
-
-class DownBlock2d(nn.Module):
-    """
-    Downsampling block for use in encoder.
-    """
-
-    def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1):
-        super(DownBlock2d, self).__init__()
-        self.conv = nn.Conv2d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
-                              padding=padding, groups=groups)
-        self.norm = BatchNorm2d(out_features, affine=True)
-        self.pool = nn.AvgPool2d(kernel_size=(2, 2))
-
-    def forward(self, x):
-        out = self.conv(x)
-        out = self.norm(out)
-        out = F.relu(out)
-        out = self.pool(out)
-        return out
-
-
-class DownBlock3d(nn.Module):
-    """
-    Downsampling block for use in encoder.
-    """
-
-    def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1):
-        super(DownBlock3d, self).__init__()
-        '''
-        self.conv = nn.Conv3d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
-                              padding=padding, groups=groups, stride=(1, 2, 2))
-        '''
-        self.conv = nn.Conv3d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
-                              padding=padding, groups=groups)
-        self.norm = BatchNorm3d(out_features, affine=True)
-        self.pool = nn.AvgPool3d(kernel_size=(1, 2, 2))
-
-    def forward(self, x):
-        out = self.conv(x)
-        out = self.norm(out)
-        out = F.relu(out)
-        out = self.pool(out)
-        return out
-
-
-class SameBlock2d(nn.Module):
-    """
-    Simple block, preserve spatial resolution.
-    """
-
-    def __init__(self, in_features, out_features, groups=1, kernel_size=3, padding=1, lrelu=False):
-        super(SameBlock2d, self).__init__()
-        self.conv = nn.Conv2d(in_channels=in_features, out_channels=out_features,
-                              kernel_size=kernel_size, padding=padding, groups=groups)
-        self.norm = BatchNorm2d(out_features, affine=True)
-        if lrelu:
-            self.ac = nn.LeakyReLU()
-        else:
-            self.ac = nn.ReLU()
-
-    def forward(self, x):
-        out = self.conv(x)
-        out = self.norm(out)
-        out = self.ac(out)
-        return out
-
-
-class Encoder(nn.Module):
-    """
-    Hourglass Encoder
-    """
-
-    def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
-        super(Encoder, self).__init__()
-
-        down_blocks = []
-        for i in range(num_blocks):
-            down_blocks.append(DownBlock3d(in_features if i == 0 else min(max_features, block_expansion * (2 ** i)),
-                                           min(max_features, block_expansion * (2 ** (i + 1))),
-                                           kernel_size=3, padding=1))
-        self.down_blocks = nn.ModuleList(down_blocks)
-
-    def forward(self, x):
-        outs = [x]
-        for down_block in self.down_blocks:
-            outs.append(down_block(outs[-1]))
-        return outs
-
-
-class Decoder(nn.Module):
-    """
-    Hourglass Decoder
-    """
-
-    def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
-        super(Decoder, self).__init__()
-
-        up_blocks = []
-
-        for i in range(num_blocks)[::-1]:
-            in_filters = (1 if i == num_blocks - 1 else 2) * min(max_features, block_expansion * (2 ** (i + 1)))
-            out_filters = min(max_features, block_expansion * (2 ** i))
-            up_blocks.append(UpBlock3d(in_filters, out_filters, kernel_size=3, padding=1))
-
-        self.up_blocks = nn.ModuleList(up_blocks)
-        # self.out_filters = block_expansion
-        self.out_filters = block_expansion + in_features
-
-        self.conv = nn.Conv3d(in_channels=self.out_filters, out_channels=self.out_filters, kernel_size=3, padding=1)
-        self.norm = BatchNorm3d(self.out_filters, affine=True)
-
-    def forward(self, x):
-        out = x.pop()
-        # for up_block in self.up_blocks[:-1]:
-        for up_block in self.up_blocks:
-            out = up_block(out)
-            skip = x.pop()
-            out = torch.cat([out, skip], dim=1)
-        # out = self.up_blocks[-1](out)
-        out = self.conv(out)
-        out = self.norm(out)
-        out = F.relu(out)
-        return out
-
-
-class Hourglass(nn.Module):
-    """
-    Hourglass architecture.
-    """
-
-    def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
-        super(Hourglass, self).__init__()
-        self.encoder = Encoder(block_expansion, in_features, num_blocks, max_features)
-        self.decoder = Decoder(block_expansion, in_features, num_blocks, max_features)
-        self.out_filters = self.decoder.out_filters
-
-    def forward(self, x):
-        return self.decoder(self.encoder(x))
-
-
-class KPHourglass(nn.Module):
-    """
-    Hourglass architecture.
-    """ 
-
-    def __init__(self, block_expansion, in_features, reshape_features, reshape_depth, num_blocks=3, max_features=256):
-        super(KPHourglass, self).__init__()
-        
-        self.down_blocks = nn.Sequential()
-        for i in range(num_blocks):
-            self.down_blocks.add_module('down'+ str(i), DownBlock2d(in_features if i == 0 else min(max_features, block_expansion * (2 ** i)),
-                                                                   min(max_features, block_expansion * (2 ** (i + 1))),
-                                                                   kernel_size=3, padding=1))
-
-        in_filters = min(max_features, block_expansion * (2 ** num_blocks))
-        self.conv = nn.Conv2d(in_channels=in_filters, out_channels=reshape_features, kernel_size=1)
-
-        self.up_blocks = nn.Sequential()
-        for i in range(num_blocks):
-            in_filters = min(max_features, block_expansion * (2 ** (num_blocks - i)))
-            out_filters = min(max_features, block_expansion * (2 ** (num_blocks - i - 1)))
-            self.up_blocks.add_module('up'+ str(i), UpBlock3d(in_filters, out_filters, kernel_size=3, padding=1))
-
-        self.reshape_depth = reshape_depth
-        self.out_filters = out_filters
-
-    def forward(self, x):
-        out = self.down_blocks(x)
-        out = self.conv(out)
-        bs, c, h, w = out.shape
-        out = out.view(bs, c//self.reshape_depth, self.reshape_depth, h, w)
-        out = self.up_blocks(out)
-
-        return out
-        
-
-
-class AntiAliasInterpolation2d(nn.Module):
-    """
-    Band-limited downsampling, for better preservation of the input signal.
-    """
-    def __init__(self, channels, scale):
-        super(AntiAliasInterpolation2d, self).__init__()
-        sigma = (1 / scale - 1) / 2
-        kernel_size = 2 * round(sigma * 4) + 1
-        self.ka = kernel_size // 2
-        self.kb = self.ka - 1 if kernel_size % 2 == 0 else self.ka
-
-        kernel_size = [kernel_size, kernel_size]
-        sigma = [sigma, sigma]
-        # The gaussian kernel is the product of the
-        # gaussian function of each dimension.
-        kernel = 1
-        meshgrids = torch.meshgrid(
-            [
-                torch.arange(size, dtype=torch.float32)
-                for size in kernel_size
-                ]
-        )
-        for size, std, mgrid in zip(kernel_size, sigma, meshgrids):
-            mean = (size - 1) / 2
-            kernel *= torch.exp(-(mgrid - mean) ** 2 / (2 * std ** 2))
-
-        # Make sure sum of values in gaussian kernel equals 1.
-        kernel = kernel / torch.sum(kernel)
-        # Reshape to depthwise convolutional weight
-        kernel = kernel.view(1, 1, *kernel.size())
-        kernel = kernel.repeat(channels, *[1] * (kernel.dim() - 1))
-
-        self.register_buffer('weight', kernel)
-        self.groups = channels
-        self.scale = scale
-        inv_scale = 1 / scale
-        self.int_inv_scale = int(inv_scale)
-
-    def forward(self, input):
-        if self.scale == 1.0:
-            return input
-
-        out = F.pad(input, (self.ka, self.kb, self.ka, self.kb))
-        out = F.conv2d(out, weight=self.weight, groups=self.groups)
-        out = out[:, :, ::self.int_inv_scale, ::self.int_inv_scale]
-
-        return out
-
-
-class SPADE(nn.Module):
-    def __init__(self, norm_nc, label_nc):
-        super().__init__()
-
-        self.param_free_norm = nn.InstanceNorm2d(norm_nc, affine=False)
-        nhidden = 128
-
-        self.mlp_shared = nn.Sequential(
-            nn.Conv2d(label_nc, nhidden, kernel_size=3, padding=1),
-            nn.ReLU())
-        self.mlp_gamma = nn.Conv2d(nhidden, norm_nc, kernel_size=3, padding=1)
-        self.mlp_beta = nn.Conv2d(nhidden, norm_nc, kernel_size=3, padding=1)
-
-    def forward(self, x, segmap):
-        normalized = self.param_free_norm(x)
-        segmap = F.interpolate(segmap, size=x.size()[2:], mode='nearest')
-        actv = self.mlp_shared(segmap)
-        gamma = self.mlp_gamma(actv)
-        beta = self.mlp_beta(actv)
-        out = normalized * (1 + gamma) + beta
-        return out
-    
-
-class SPADEResnetBlock(nn.Module):
-    def __init__(self, fin, fout, norm_G, label_nc, use_se=False, dilation=1):
-        super().__init__()
-        # Attributes
-        self.learned_shortcut = (fin != fout)
-        fmiddle = min(fin, fout)
-        self.use_se = use_se
-        # create conv layers
-        self.conv_0 = nn.Conv2d(fin, fmiddle, kernel_size=3, padding=dilation, dilation=dilation)
-        self.conv_1 = nn.Conv2d(fmiddle, fout, kernel_size=3, padding=dilation, dilation=dilation)
-        if self.learned_shortcut:
-            self.conv_s = nn.Conv2d(fin, fout, kernel_size=1, bias=False)
-        # apply spectral norm if specified
-        if 'spectral' in norm_G:
-            self.conv_0 = spectral_norm(self.conv_0)
-            self.conv_1 = spectral_norm(self.conv_1)
-            if self.learned_shortcut:
-                self.conv_s = spectral_norm(self.conv_s)
-        # define normalization layers
-        self.norm_0 = SPADE(fin, label_nc)
-        self.norm_1 = SPADE(fmiddle, label_nc)
-        if self.learned_shortcut:
-            self.norm_s = SPADE(fin, label_nc)
-
-    def forward(self, x, seg1):
-        x_s = self.shortcut(x, seg1)
-        dx = self.conv_0(self.actvn(self.norm_0(x, seg1)))
-        dx = self.conv_1(self.actvn(self.norm_1(dx, seg1)))
-        out = x_s + dx
-        return out
-
-    def shortcut(self, x, seg1):
-        if self.learned_shortcut:
-            x_s = self.conv_s(self.norm_s(x, seg1))
-        else:
-            x_s = x
-        return x_s
-
-    def actvn(self, x):
-        return F.leaky_relu(x, 2e-1)
-
-class audio2image(nn.Module):
-    def __init__(self, generator, kp_extractor, he_estimator_video, he_estimator_audio, train_params):
-        super().__init__()
-        # Attributes
-        self.generator = generator
-        self.kp_extractor = kp_extractor
-        self.he_estimator_video = he_estimator_video
-        self.he_estimator_audio = he_estimator_audio
-        self.train_params = train_params
-
-    def headpose_pred_to_degree(self, pred):
-        device = pred.device
-        idx_tensor = [idx for idx in range(66)]
-        idx_tensor = torch.FloatTensor(idx_tensor).to(device)
-        pred = F.softmax(pred)
-        degree = torch.sum(pred*idx_tensor, 1) * 3 - 99
-
-        return degree
-    
-    def get_rotation_matrix(self, yaw, pitch, roll):
-        yaw = yaw / 180 * 3.14
-        pitch = pitch / 180 * 3.14
-        roll = roll / 180 * 3.14
-
-        roll = roll.unsqueeze(1)
-        pitch = pitch.unsqueeze(1)
-        yaw = yaw.unsqueeze(1)
-
-        roll_mat = torch.cat([torch.ones_like(roll), torch.zeros_like(roll), torch.zeros_like(roll), 
-                          torch.zeros_like(roll), torch.cos(roll), -torch.sin(roll),
-                          torch.zeros_like(roll), torch.sin(roll), torch.cos(roll)], dim=1)
-        roll_mat = roll_mat.view(roll_mat.shape[0], 3, 3)
-
-        pitch_mat = torch.cat([torch.cos(pitch), torch.zeros_like(pitch), torch.sin(pitch), 
-                           torch.zeros_like(pitch), torch.ones_like(pitch), torch.zeros_like(pitch),
-                           -torch.sin(pitch), torch.zeros_like(pitch), torch.cos(pitch)], dim=1)
-        pitch_mat = pitch_mat.view(pitch_mat.shape[0], 3, 3)
-
-        yaw_mat = torch.cat([torch.cos(yaw), -torch.sin(yaw), torch.zeros_like(yaw),  
-                         torch.sin(yaw), torch.cos(yaw), torch.zeros_like(yaw),
-                         torch.zeros_like(yaw), torch.zeros_like(yaw), torch.ones_like(yaw)], dim=1)
-        yaw_mat = yaw_mat.view(yaw_mat.shape[0], 3, 3)
-
-        rot_mat = torch.einsum('bij,bjk,bkm->bim', roll_mat, pitch_mat, yaw_mat)
-
-        return rot_mat
-
-    def keypoint_transformation(self, kp_canonical, he):
-        kp = kp_canonical['value']    # (bs, k, 3)
-        yaw, pitch, roll = he['yaw'], he['pitch'], he['roll']
-        t, exp = he['t'], he['exp']
-    
-        yaw = self.headpose_pred_to_degree(yaw)
-        pitch = self.headpose_pred_to_degree(pitch)
-        roll = self.headpose_pred_to_degree(roll)
-
-        rot_mat = self.get_rotation_matrix(yaw, pitch, roll)    # (bs, 3, 3)
-    
-        # keypoint rotation
-        kp_rotated = torch.einsum('bmp,bkp->bkm', rot_mat, kp)
-
-    
-
-        # keypoint translation
-        t = t.unsqueeze_(1).repeat(1, kp.shape[1], 1)
-        kp_t = kp_rotated + t
-
-        # add expression deviation 
-        exp = exp.view(exp.shape[0], -1, 3)
-        kp_transformed = kp_t + exp
-
-        return {'value': kp_transformed}
-
-    def forward(self, source_image, target_audio):
-        pose_source = self.he_estimator_video(source_image)
-        pose_generated = self.he_estimator_audio(target_audio)
-        kp_canonical = self.kp_extractor(source_image)
-        kp_source = self.keypoint_transformation(kp_canonical, pose_source)
-        kp_transformed_generated = self.keypoint_transformation(kp_canonical, pose_generated)
-        generated = self.generator(source_image, kp_source=kp_source, kp_driving=kp_transformed_generated)
-        return generated
\ No newline at end of file
diff --git a/src/src/facerender/sync_batchnorm/__init__.py b/src/src/facerender/sync_batchnorm/__init__.py
deleted file mode 100644
index bc8709d92c610b36e0bcbd7da20c1eb41dc8cfcf..0000000000000000000000000000000000000000
--- a/src/src/facerender/sync_batchnorm/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-# File   : __init__.py
-# Author : Jiayuan Mao
-# Email  : maojiayuan@gmail.com
-# Date   : 27/01/2018
-# 
-# This file is part of Synchronized-BatchNorm-PyTorch.
-# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
-# Distributed under MIT License.
-
-from .batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, SynchronizedBatchNorm3d
-from .replicate import DataParallelWithCallback, patch_replication_callback
diff --git a/src/src/facerender/sync_batchnorm/batchnorm.py b/src/src/facerender/sync_batchnorm/batchnorm.py
deleted file mode 100644
index 5f4e763f0366dffa10320116413f8c7181a8aeb1..0000000000000000000000000000000000000000
--- a/src/src/facerender/sync_batchnorm/batchnorm.py
+++ /dev/null
@@ -1,315 +0,0 @@
-# -*- coding: utf-8 -*-
-# File   : batchnorm.py
-# Author : Jiayuan Mao
-# Email  : maojiayuan@gmail.com
-# Date   : 27/01/2018
-# 
-# This file is part of Synchronized-BatchNorm-PyTorch.
-# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
-# Distributed under MIT License.
-
-import collections
-
-import torch
-import torch.nn.functional as F
-
-from torch.nn.modules.batchnorm import _BatchNorm
-from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
-
-from .comm import SyncMaster
-
-__all__ = ['SynchronizedBatchNorm1d', 'SynchronizedBatchNorm2d', 'SynchronizedBatchNorm3d']
-
-
-def _sum_ft(tensor):
-    """sum over the first and last dimention"""
-    return tensor.sum(dim=0).sum(dim=-1)
-
-
-def _unsqueeze_ft(tensor):
-    """add new dementions at the front and the tail"""
-    return tensor.unsqueeze(0).unsqueeze(-1)
-
-
-_ChildMessage = collections.namedtuple('_ChildMessage', ['sum', 'ssum', 'sum_size'])
-_MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std'])
-
-
-class _SynchronizedBatchNorm(_BatchNorm):
-    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True):
-        super(_SynchronizedBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine)
-
-        self._sync_master = SyncMaster(self._data_parallel_master)
-
-        self._is_parallel = False
-        self._parallel_id = None
-        self._slave_pipe = None
-
-    def forward(self, input):
-        # If it is not parallel computation or is in evaluation mode, use PyTorch's implementation.
-        if not (self._is_parallel and self.training):
-            return F.batch_norm(
-                input, self.running_mean, self.running_var, self.weight, self.bias,
-                self.training, self.momentum, self.eps)
-
-        # Resize the input to (B, C, -1).
-        input_shape = input.size()
-        input = input.view(input.size(0), self.num_features, -1)
-
-        # Compute the sum and square-sum.
-        sum_size = input.size(0) * input.size(2)
-        input_sum = _sum_ft(input)
-        input_ssum = _sum_ft(input ** 2)
-
-        # Reduce-and-broadcast the statistics.
-        if self._parallel_id == 0:
-            mean, inv_std = self._sync_master.run_master(_ChildMessage(input_sum, input_ssum, sum_size))
-        else:
-            mean, inv_std = self._slave_pipe.run_slave(_ChildMessage(input_sum, input_ssum, sum_size))
-
-        # Compute the output.
-        if self.affine:
-            # MJY:: Fuse the multiplication for speed.
-            output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std * self.weight) + _unsqueeze_ft(self.bias)
-        else:
-            output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std)
-
-        # Reshape it.
-        return output.view(input_shape)
-
-    def __data_parallel_replicate__(self, ctx, copy_id):
-        self._is_parallel = True
-        self._parallel_id = copy_id
-
-        # parallel_id == 0 means master device.
-        if self._parallel_id == 0:
-            ctx.sync_master = self._sync_master
-        else:
-            self._slave_pipe = ctx.sync_master.register_slave(copy_id)
-
-    def _data_parallel_master(self, intermediates):
-        """Reduce the sum and square-sum, compute the statistics, and broadcast it."""
-
-        # Always using same "device order" makes the ReduceAdd operation faster.
-        # Thanks to:: Tete Xiao (http://tetexiao.com/)
-        intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device())
-
-        to_reduce = [i[1][:2] for i in intermediates]
-        to_reduce = [j for i in to_reduce for j in i]  # flatten
-        target_gpus = [i[1].sum.get_device() for i in intermediates]
-
-        sum_size = sum([i[1].sum_size for i in intermediates])
-        sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce)
-        mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size)
-
-        broadcasted = Broadcast.apply(target_gpus, mean, inv_std)
-
-        outputs = []
-        for i, rec in enumerate(intermediates):
-            outputs.append((rec[0], _MasterMessage(*broadcasted[i*2:i*2+2])))
-
-        return outputs
-
-    def _compute_mean_std(self, sum_, ssum, size):
-        """Compute the mean and standard-deviation with sum and square-sum. This method
-        also maintains the moving average on the master device."""
-        assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.'
-        mean = sum_ / size
-        sumvar = ssum - sum_ * mean
-        unbias_var = sumvar / (size - 1)
-        bias_var = sumvar / size
-
-        self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data
-        self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data
-
-        return mean, bias_var.clamp(self.eps) ** -0.5
-
-
-class SynchronizedBatchNorm1d(_SynchronizedBatchNorm):
-    r"""Applies Synchronized Batch Normalization over a 2d or 3d input that is seen as a
-    mini-batch.
-
-    .. math::
-
-        y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
-
-    This module differs from the built-in PyTorch BatchNorm1d as the mean and
-    standard-deviation are reduced across all devices during training.
-
-    For example, when one uses `nn.DataParallel` to wrap the network during
-    training, PyTorch's implementation normalize the tensor on each device using
-    the statistics only on that device, which accelerated the computation and
-    is also easy to implement, but the statistics might be inaccurate.
-    Instead, in this synchronized version, the statistics will be computed
-    over all training samples distributed on multiple devices.
-    
-    Note that, for one-GPU or CPU-only case, this module behaves exactly same
-    as the built-in PyTorch implementation.
-
-    The mean and standard-deviation are calculated per-dimension over
-    the mini-batches and gamma and beta are learnable parameter vectors
-    of size C (where C is the input size).
-
-    During training, this layer keeps a running estimate of its computed mean
-    and variance. The running sum is kept with a default momentum of 0.1.
-
-    During evaluation, this running mean/variance is used for normalization.
-
-    Because the BatchNorm is done over the `C` dimension, computing statistics
-    on `(N, L)` slices, it's common terminology to call this Temporal BatchNorm
-
-    Args:
-        num_features: num_features from an expected input of size
-            `batch_size x num_features [x width]`
-        eps: a value added to the denominator for numerical stability.
-            Default: 1e-5
-        momentum: the value used for the running_mean and running_var
-            computation. Default: 0.1
-        affine: a boolean value that when set to ``True``, gives the layer learnable
-            affine parameters. Default: ``True``
-
-    Shape:
-        - Input: :math:`(N, C)` or :math:`(N, C, L)`
-        - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input)
-
-    Examples:
-        >>> # With Learnable Parameters
-        >>> m = SynchronizedBatchNorm1d(100)
-        >>> # Without Learnable Parameters
-        >>> m = SynchronizedBatchNorm1d(100, affine=False)
-        >>> input = torch.autograd.Variable(torch.randn(20, 100))
-        >>> output = m(input)
-    """
-
-    def _check_input_dim(self, input):
-        if input.dim() != 2 and input.dim() != 3:
-            raise ValueError('expected 2D or 3D input (got {}D input)'
-                             .format(input.dim()))
-        super(SynchronizedBatchNorm1d, self)._check_input_dim(input)
-
-
-class SynchronizedBatchNorm2d(_SynchronizedBatchNorm):
-    r"""Applies Batch Normalization over a 4d input that is seen as a mini-batch
-    of 3d inputs
-
-    .. math::
-
-        y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
-
-    This module differs from the built-in PyTorch BatchNorm2d as the mean and
-    standard-deviation are reduced across all devices during training.
-
-    For example, when one uses `nn.DataParallel` to wrap the network during
-    training, PyTorch's implementation normalize the tensor on each device using
-    the statistics only on that device, which accelerated the computation and
-    is also easy to implement, but the statistics might be inaccurate.
-    Instead, in this synchronized version, the statistics will be computed
-    over all training samples distributed on multiple devices.
-    
-    Note that, for one-GPU or CPU-only case, this module behaves exactly same
-    as the built-in PyTorch implementation.
-
-    The mean and standard-deviation are calculated per-dimension over
-    the mini-batches and gamma and beta are learnable parameter vectors
-    of size C (where C is the input size).
-
-    During training, this layer keeps a running estimate of its computed mean
-    and variance. The running sum is kept with a default momentum of 0.1.
-
-    During evaluation, this running mean/variance is used for normalization.
-
-    Because the BatchNorm is done over the `C` dimension, computing statistics
-    on `(N, H, W)` slices, it's common terminology to call this Spatial BatchNorm
-
-    Args:
-        num_features: num_features from an expected input of
-            size batch_size x num_features x height x width
-        eps: a value added to the denominator for numerical stability.
-            Default: 1e-5
-        momentum: the value used for the running_mean and running_var
-            computation. Default: 0.1
-        affine: a boolean value that when set to ``True``, gives the layer learnable
-            affine parameters. Default: ``True``
-
-    Shape:
-        - Input: :math:`(N, C, H, W)`
-        - Output: :math:`(N, C, H, W)` (same shape as input)
-
-    Examples:
-        >>> # With Learnable Parameters
-        >>> m = SynchronizedBatchNorm2d(100)
-        >>> # Without Learnable Parameters
-        >>> m = SynchronizedBatchNorm2d(100, affine=False)
-        >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45))
-        >>> output = m(input)
-    """
-
-    def _check_input_dim(self, input):
-        if input.dim() != 4:
-            raise ValueError('expected 4D input (got {}D input)'
-                             .format(input.dim()))
-        super(SynchronizedBatchNorm2d, self)._check_input_dim(input)
-
-
-class SynchronizedBatchNorm3d(_SynchronizedBatchNorm):
-    r"""Applies Batch Normalization over a 5d input that is seen as a mini-batch
-    of 4d inputs
-
-    .. math::
-
-        y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
-
-    This module differs from the built-in PyTorch BatchNorm3d as the mean and
-    standard-deviation are reduced across all devices during training.
-
-    For example, when one uses `nn.DataParallel` to wrap the network during
-    training, PyTorch's implementation normalize the tensor on each device using
-    the statistics only on that device, which accelerated the computation and
-    is also easy to implement, but the statistics might be inaccurate.
-    Instead, in this synchronized version, the statistics will be computed
-    over all training samples distributed on multiple devices.
-    
-    Note that, for one-GPU or CPU-only case, this module behaves exactly same
-    as the built-in PyTorch implementation.
-
-    The mean and standard-deviation are calculated per-dimension over
-    the mini-batches and gamma and beta are learnable parameter vectors
-    of size C (where C is the input size).
-
-    During training, this layer keeps a running estimate of its computed mean
-    and variance. The running sum is kept with a default momentum of 0.1.
-
-    During evaluation, this running mean/variance is used for normalization.
-
-    Because the BatchNorm is done over the `C` dimension, computing statistics
-    on `(N, D, H, W)` slices, it's common terminology to call this Volumetric BatchNorm
-    or Spatio-temporal BatchNorm
-
-    Args:
-        num_features: num_features from an expected input of
-            size batch_size x num_features x depth x height x width
-        eps: a value added to the denominator for numerical stability.
-            Default: 1e-5
-        momentum: the value used for the running_mean and running_var
-            computation. Default: 0.1
-        affine: a boolean value that when set to ``True``, gives the layer learnable
-            affine parameters. Default: ``True``
-
-    Shape:
-        - Input: :math:`(N, C, D, H, W)`
-        - Output: :math:`(N, C, D, H, W)` (same shape as input)
-
-    Examples:
-        >>> # With Learnable Parameters
-        >>> m = SynchronizedBatchNorm3d(100)
-        >>> # Without Learnable Parameters
-        >>> m = SynchronizedBatchNorm3d(100, affine=False)
-        >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45, 10))
-        >>> output = m(input)
-    """
-
-    def _check_input_dim(self, input):
-        if input.dim() != 5:
-            raise ValueError('expected 5D input (got {}D input)'
-                             .format(input.dim()))
-        super(SynchronizedBatchNorm3d, self)._check_input_dim(input)
diff --git a/src/src/facerender/sync_batchnorm/comm.py b/src/src/facerender/sync_batchnorm/comm.py
deleted file mode 100644
index 922f8c4a3adaa9b32fdcaef09583be03b0d7eb2b..0000000000000000000000000000000000000000
--- a/src/src/facerender/sync_batchnorm/comm.py
+++ /dev/null
@@ -1,137 +0,0 @@
-# -*- coding: utf-8 -*-
-# File   : comm.py
-# Author : Jiayuan Mao
-# Email  : maojiayuan@gmail.com
-# Date   : 27/01/2018
-# 
-# This file is part of Synchronized-BatchNorm-PyTorch.
-# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
-# Distributed under MIT License.
-
-import queue
-import collections
-import threading
-
-__all__ = ['FutureResult', 'SlavePipe', 'SyncMaster']
-
-
-class FutureResult(object):
-    """A thread-safe future implementation. Used only as one-to-one pipe."""
-
-    def __init__(self):
-        self._result = None
-        self._lock = threading.Lock()
-        self._cond = threading.Condition(self._lock)
-
-    def put(self, result):
-        with self._lock:
-            assert self._result is None, 'Previous result has\'t been fetched.'
-            self._result = result
-            self._cond.notify()
-
-    def get(self):
-        with self._lock:
-            if self._result is None:
-                self._cond.wait()
-
-            res = self._result
-            self._result = None
-            return res
-
-
-_MasterRegistry = collections.namedtuple('MasterRegistry', ['result'])
-_SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result'])
-
-
-class SlavePipe(_SlavePipeBase):
-    """Pipe for master-slave communication."""
-
-    def run_slave(self, msg):
-        self.queue.put((self.identifier, msg))
-        ret = self.result.get()
-        self.queue.put(True)
-        return ret
-
-
-class SyncMaster(object):
-    """An abstract `SyncMaster` object.
-
-    - During the replication, as the data parallel will trigger an callback of each module, all slave devices should
-    call `register(id)` and obtain an `SlavePipe` to communicate with the master.
-    - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected,
-    and passed to a registered callback.
-    - After receiving the messages, the master device should gather the information and determine to message passed
-    back to each slave devices.
-    """
-
-    def __init__(self, master_callback):
-        """
-
-        Args:
-            master_callback: a callback to be invoked after having collected messages from slave devices.
-        """
-        self._master_callback = master_callback
-        self._queue = queue.Queue()
-        self._registry = collections.OrderedDict()
-        self._activated = False
-
-    def __getstate__(self):
-        return {'master_callback': self._master_callback}
-
-    def __setstate__(self, state):
-        self.__init__(state['master_callback'])
-
-    def register_slave(self, identifier):
-        """
-        Register an slave device.
-
-        Args:
-            identifier: an identifier, usually is the device id.
-
-        Returns: a `SlavePipe` object which can be used to communicate with the master device.
-
-        """
-        if self._activated:
-            assert self._queue.empty(), 'Queue is not clean before next initialization.'
-            self._activated = False
-            self._registry.clear()
-        future = FutureResult()
-        self._registry[identifier] = _MasterRegistry(future)
-        return SlavePipe(identifier, self._queue, future)
-
-    def run_master(self, master_msg):
-        """
-        Main entry for the master device in each forward pass.
-        The messages were first collected from each devices (including the master device), and then
-        an callback will be invoked to compute the message to be sent back to each devices
-        (including the master device).
-
-        Args:
-            master_msg: the message that the master want to send to itself. This will be placed as the first
-            message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example.
-
-        Returns: the message to be sent back to the master device.
-
-        """
-        self._activated = True
-
-        intermediates = [(0, master_msg)]
-        for i in range(self.nr_slaves):
-            intermediates.append(self._queue.get())
-
-        results = self._master_callback(intermediates)
-        assert results[0][0] == 0, 'The first result should belongs to the master.'
-
-        for i, res in results:
-            if i == 0:
-                continue
-            self._registry[i].result.put(res)
-
-        for i in range(self.nr_slaves):
-            assert self._queue.get() is True
-
-        return results[0][1]
-
-    @property
-    def nr_slaves(self):
-        return len(self._registry)
diff --git a/src/src/facerender/sync_batchnorm/replicate.py b/src/src/facerender/sync_batchnorm/replicate.py
deleted file mode 100644
index b71c7b8ed51a1d6c55b1f753bdd8d90bad79bd06..0000000000000000000000000000000000000000
--- a/src/src/facerender/sync_batchnorm/replicate.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# -*- coding: utf-8 -*-
-# File   : replicate.py
-# Author : Jiayuan Mao
-# Email  : maojiayuan@gmail.com
-# Date   : 27/01/2018
-# 
-# This file is part of Synchronized-BatchNorm-PyTorch.
-# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
-# Distributed under MIT License.
-
-import functools
-
-from torch.nn.parallel.data_parallel import DataParallel
-
-__all__ = [
-    'CallbackContext',
-    'execute_replication_callbacks',
-    'DataParallelWithCallback',
-    'patch_replication_callback'
-]
-
-
-class CallbackContext(object):
-    pass
-
-
-def execute_replication_callbacks(modules):
-    """
-    Execute an replication callback `__data_parallel_replicate__` on each module created by original replication.
-
-    The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
-
-    Note that, as all modules are isomorphism, we assign each sub-module with a context
-    (shared among multiple copies of this module on different devices).
-    Through this context, different copies can share some information.
-
-    We guarantee that the callback on the master copy (the first copy) will be called ahead of calling the callback
-    of any slave copies.
-    """
-    master_copy = modules[0]
-    nr_modules = len(list(master_copy.modules()))
-    ctxs = [CallbackContext() for _ in range(nr_modules)]
-
-    for i, module in enumerate(modules):
-        for j, m in enumerate(module.modules()):
-            if hasattr(m, '__data_parallel_replicate__'):
-                m.__data_parallel_replicate__(ctxs[j], i)
-
-
-class DataParallelWithCallback(DataParallel):
-    """
-    Data Parallel with a replication callback.
-
-    An replication callback `__data_parallel_replicate__` of each module will be invoked after being created by
-    original `replicate` function.
-    The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
-
-    Examples:
-        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
-        > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
-        # sync_bn.__data_parallel_replicate__ will be invoked.
-    """
-
-    def replicate(self, module, device_ids):
-        modules = super(DataParallelWithCallback, self).replicate(module, device_ids)
-        execute_replication_callbacks(modules)
-        return modules
-
-
-def patch_replication_callback(data_parallel):
-    """
-    Monkey-patch an existing `DataParallel` object. Add the replication callback.
-    Useful when you have customized `DataParallel` implementation.
-
-    Examples:
-        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
-        > sync_bn = DataParallel(sync_bn, device_ids=[0, 1])
-        > patch_replication_callback(sync_bn)
-        # this is equivalent to
-        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
-        > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
-    """
-
-    assert isinstance(data_parallel, DataParallel)
-
-    old_replicate = data_parallel.replicate
-
-    @functools.wraps(old_replicate)
-    def new_replicate(module, device_ids):
-        modules = old_replicate(module, device_ids)
-        execute_replication_callbacks(modules)
-        return modules
-
-    data_parallel.replicate = new_replicate
diff --git a/src/src/facerender/sync_batchnorm/unittest.py b/src/src/facerender/sync_batchnorm/unittest.py
deleted file mode 100644
index 0675c022e4ba85d38d1f813490f6740150909524..0000000000000000000000000000000000000000
--- a/src/src/facerender/sync_batchnorm/unittest.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# -*- coding: utf-8 -*-
-# File   : unittest.py
-# Author : Jiayuan Mao
-# Email  : maojiayuan@gmail.com
-# Date   : 27/01/2018
-# 
-# This file is part of Synchronized-BatchNorm-PyTorch.
-# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
-# Distributed under MIT License.
-
-import unittest
-
-import numpy as np
-from torch.autograd import Variable
-
-
-def as_numpy(v):
-    if isinstance(v, Variable):
-        v = v.data
-    return v.cpu().numpy()
-
-
-class TorchTestCase(unittest.TestCase):
-    def assertTensorClose(self, a, b, atol=1e-3, rtol=1e-3):
-        npa, npb = as_numpy(a), as_numpy(b)
-        self.assertTrue(
-                np.allclose(npa, npb, atol=atol),
-                'Tensor close check failed\n{}\n{}\nadiff={}, rdiff={}'.format(a, b, np.abs(npa - npb).max(), np.abs((npa - npb) / np.fmax(npa, 1e-5)).max())
-        )
diff --git a/src/src/generate_batch.py b/src/src/generate_batch.py
deleted file mode 100644
index 95f21526feea846977707e97394132d43225c02a..0000000000000000000000000000000000000000
--- a/src/src/generate_batch.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import os
-
-from tqdm import tqdm
-import torch
-import numpy as np
-import random
-import scipy.io as scio
-import src.utils.audio as audio
-
-def crop_pad_audio(wav, audio_length):
-    if len(wav) > audio_length:
-        wav = wav[:audio_length]
-    elif len(wav) < audio_length:
-        wav = np.pad(wav, [0, audio_length - len(wav)], mode='constant', constant_values=0)
-    return wav
-
-def parse_audio_length(audio_length, sr, fps):
-    bit_per_frames = sr / fps
-
-    num_frames = int(audio_length / bit_per_frames)
-    audio_length = int(num_frames * bit_per_frames)
-
-    return audio_length, num_frames
-
-def generate_blink_seq(num_frames):
-    ratio = np.zeros((num_frames,1))
-    frame_id = 0
-    while frame_id in range(num_frames):
-        start = 80
-        if frame_id+start+9<=num_frames - 1:
-            ratio[frame_id+start:frame_id+start+9, 0] = [0.5,0.6,0.7,0.9,1, 0.9, 0.7,0.6,0.5]
-            frame_id = frame_id+start+9
-        else:
-            break
-    return ratio 
-
-def generate_blink_seq_randomly(num_frames):
-    ratio = np.zeros((num_frames,1))
-    if num_frames<=20:
-        return ratio
-    frame_id = 0
-    while frame_id in range(num_frames):
-        start = random.choice(range(min(10,num_frames), min(int(num_frames/2), 70))) 
-        if frame_id+start+5<=num_frames - 1:
-            ratio[frame_id+start:frame_id+start+5, 0] = [0.5, 0.9, 1.0, 0.9, 0.5]
-            frame_id = frame_id+start+5
-        else:
-            break
-    return ratio
-
-def get_data(first_coeff_path, audio_path, device, ref_eyeblink_coeff_path, still=False, idlemode=False, length_of_audio=False, use_blink=True):
-
-    syncnet_mel_step_size = 16
-    fps = 25
-
-    pic_name = os.path.splitext(os.path.split(first_coeff_path)[-1])[0]
-    audio_name = os.path.splitext(os.path.split(audio_path)[-1])[0]
-
-    
-    if idlemode:
-        num_frames = int(length_of_audio * 25)
-        indiv_mels = np.zeros((num_frames, 80, 16))
-    else:
-        wav = audio.load_wav(audio_path, 16000) 
-        wav_length, num_frames = parse_audio_length(len(wav), 16000, 25)
-        wav = crop_pad_audio(wav, wav_length)
-        orig_mel = audio.melspectrogram(wav).T
-        spec = orig_mel.copy()         # nframes 80
-        indiv_mels = []
-
-        for i in tqdm(range(num_frames), 'mel:'):
-            start_frame_num = i-2
-            start_idx = int(80. * (start_frame_num / float(fps)))
-            end_idx = start_idx + syncnet_mel_step_size
-            seq = list(range(start_idx, end_idx))
-            seq = [ min(max(item, 0), orig_mel.shape[0]-1) for item in seq ]
-            m = spec[seq, :]
-            indiv_mels.append(m.T)
-        indiv_mels = np.asarray(indiv_mels)         # T 80 16
-
-    ratio = generate_blink_seq_randomly(num_frames)      # T
-    source_semantics_path = first_coeff_path
-    source_semantics_dict = scio.loadmat(source_semantics_path)
-    ref_coeff = source_semantics_dict['coeff_3dmm'][:1,:70]         #1 70
-    ref_coeff = np.repeat(ref_coeff, num_frames, axis=0)
-
-    if ref_eyeblink_coeff_path is not None:
-        ratio[:num_frames] = 0
-        refeyeblink_coeff_dict = scio.loadmat(ref_eyeblink_coeff_path)
-        refeyeblink_coeff = refeyeblink_coeff_dict['coeff_3dmm'][:,:64]
-        refeyeblink_num_frames = refeyeblink_coeff.shape[0]
-        if refeyeblink_num_frames<num_frames:
-            div = num_frames//refeyeblink_num_frames
-            re = num_frames%refeyeblink_num_frames
-            refeyeblink_coeff_list = [refeyeblink_coeff for i in range(div)]
-            refeyeblink_coeff_list.append(refeyeblink_coeff[:re, :64])
-            refeyeblink_coeff = np.concatenate(refeyeblink_coeff_list, axis=0)
-            print(refeyeblink_coeff.shape[0])
-
-        ref_coeff[:, :64] = refeyeblink_coeff[:num_frames, :64] 
-    
-    indiv_mels = torch.FloatTensor(indiv_mels).unsqueeze(1).unsqueeze(0) # bs T 1 80 16
-
-    if use_blink:
-        ratio = torch.FloatTensor(ratio).unsqueeze(0)                       # bs T
-    else:
-        ratio = torch.FloatTensor(ratio).unsqueeze(0).fill_(0.) 
-                               # bs T
-    ref_coeff = torch.FloatTensor(ref_coeff).unsqueeze(0)                # bs 1 70
-
-    indiv_mels = indiv_mels.to(device)
-    ratio = ratio.to(device)
-    ref_coeff = ref_coeff.to(device)
-
-    return {'indiv_mels': indiv_mels,  
-            'ref': ref_coeff, 
-            'num_frames': num_frames, 
-            'ratio_gt': ratio,
-            'audio_name': audio_name, 'pic_name': pic_name}
-
diff --git a/src/src/generate_facerender_batch.py b/src/src/generate_facerender_batch.py
deleted file mode 100644
index a821a6ece2fcff83c288a0989097d863cfec3dd1..0000000000000000000000000000000000000000
--- a/src/src/generate_facerender_batch.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import os
-import numpy as np
-from PIL import Image
-from skimage import io, img_as_float32, transform
-import torch
-import scipy.io as scio
-
-def get_facerender_data(coeff_path, pic_path, first_coeff_path, audio_path, 
-                        batch_size, input_yaw_list=None, input_pitch_list=None, input_roll_list=None, 
-                        expression_scale=1.0, still_mode = False, preprocess='crop', size = 256, facemodel='facevid2vid'):
-
-    semantic_radius = 13
-    video_name = os.path.splitext(os.path.split(coeff_path)[-1])[0]
-    txt_path = os.path.splitext(coeff_path)[0]
-
-    data={}
-
-    img1 = Image.open(pic_path)
-    source_image = np.array(img1)
-    source_image = img_as_float32(source_image)
-    source_image = transform.resize(source_image, (size, size, 3))
-    source_image = source_image.transpose((2, 0, 1))
-    source_image_ts = torch.FloatTensor(source_image).unsqueeze(0)
-    source_image_ts = source_image_ts.repeat(batch_size, 1, 1, 1)
-    data['source_image'] = source_image_ts
- 
-    source_semantics_dict = scio.loadmat(first_coeff_path)
-    generated_dict = scio.loadmat(coeff_path)
-
-    if 'full' not in preprocess.lower() and facemodel != 'pirender':
-        source_semantics = source_semantics_dict['coeff_3dmm'][:1,:70]         #1 70
-        generated_3dmm = generated_dict['coeff_3dmm'][:,:70]
-    else:
-        source_semantics = source_semantics_dict['coeff_3dmm'][:1,:73]         #1 70
-        generated_3dmm = generated_dict['coeff_3dmm'][:,:70]
-
-    source_semantics_new = transform_semantic_1(source_semantics, semantic_radius)
-    source_semantics_ts = torch.FloatTensor(source_semantics_new).unsqueeze(0)
-    source_semantics_ts = source_semantics_ts.repeat(batch_size, 1, 1)
-    data['source_semantics'] = source_semantics_ts
-
-    # target 
-    generated_3dmm[:, :64] = generated_3dmm[:, :64] * expression_scale
-
-    if 'full' in preprocess.lower() or facemodel == 'pirender':
-        generated_3dmm = np.concatenate([generated_3dmm, np.repeat(source_semantics[:,70:], generated_3dmm.shape[0], axis=0)], axis=1)
-
-    if still_mode:
-        generated_3dmm[:, 64:] = np.repeat(source_semantics[:, 64:], generated_3dmm.shape[0], axis=0)
-
-    with open(txt_path+'.txt', 'w') as f:
-        for coeff in generated_3dmm:
-            for i in coeff:
-                f.write(str(i)[:7]   + '  '+'\t')
-            f.write('\n')
-
-    target_semantics_list = [] 
-    frame_num = generated_3dmm.shape[0]
-    data['frame_num'] = frame_num
-    for frame_idx in range(frame_num):
-        target_semantics = transform_semantic_target(generated_3dmm, frame_idx, semantic_radius)
-        target_semantics_list.append(target_semantics)
-
-    remainder = frame_num%batch_size
-    if remainder!=0:
-        for _ in range(batch_size-remainder):
-            target_semantics_list.append(target_semantics)
-
-    target_semantics_np = np.array(target_semantics_list)             #frame_num 70 semantic_radius*2+1
-    target_semantics_np = target_semantics_np.reshape(batch_size, -1, target_semantics_np.shape[-2], target_semantics_np.shape[-1])
-    data['target_semantics_list'] = torch.FloatTensor(target_semantics_np)
-    data['video_name'] = video_name
-    data['audio_path'] = audio_path
-    
-    if input_yaw_list is not None:
-        yaw_c_seq = gen_camera_pose(input_yaw_list, frame_num, batch_size)
-        data['yaw_c_seq'] = torch.FloatTensor(yaw_c_seq)
-    if input_pitch_list is not None:
-        pitch_c_seq = gen_camera_pose(input_pitch_list, frame_num, batch_size)
-        data['pitch_c_seq'] = torch.FloatTensor(pitch_c_seq)
-    if input_roll_list is not None:
-        roll_c_seq = gen_camera_pose(input_roll_list, frame_num, batch_size) 
-        data['roll_c_seq'] = torch.FloatTensor(roll_c_seq)
- 
-    return data
-
-def transform_semantic_1(semantic, semantic_radius):
-    semantic_list =  [semantic for i in range(0, semantic_radius*2+1)]
-    coeff_3dmm = np.concatenate(semantic_list, 0)
-    return coeff_3dmm.transpose(1,0)
-
-def transform_semantic_target(coeff_3dmm, frame_index, semantic_radius):
-    num_frames = coeff_3dmm.shape[0]
-    seq = list(range(frame_index- semantic_radius, frame_index + semantic_radius+1))
-    index = [ min(max(item, 0), num_frames-1) for item in seq ] 
-    coeff_3dmm_g = coeff_3dmm[index, :]
-    return coeff_3dmm_g.transpose(1,0)
-
-def gen_camera_pose(camera_degree_list, frame_num, batch_size):
-
-    new_degree_list = [] 
-    if len(camera_degree_list) == 1:
-        for _ in range(frame_num):
-            new_degree_list.append(camera_degree_list[0]) 
-        remainder = frame_num%batch_size
-        if remainder!=0:
-            for _ in range(batch_size-remainder):
-                new_degree_list.append(new_degree_list[-1])
-        new_degree_np = np.array(new_degree_list).reshape(batch_size, -1) 
-        return new_degree_np
-
-    degree_sum = 0.
-    for i, degree in enumerate(camera_degree_list[1:]):
-        degree_sum += abs(degree-camera_degree_list[i])
-    
-    degree_per_frame = degree_sum/(frame_num-1)
-    for i, degree in enumerate(camera_degree_list[1:]):
-        degree_last = camera_degree_list[i]
-        degree_step = degree_per_frame * abs(degree-degree_last)/(degree-degree_last)
-        new_degree_list =  new_degree_list + list(np.arange(degree_last, degree, degree_step))
-    if len(new_degree_list) > frame_num:
-        new_degree_list = new_degree_list[:frame_num]
-    elif len(new_degree_list) < frame_num:
-        for _ in range(frame_num-len(new_degree_list)):
-            new_degree_list.append(new_degree_list[-1])
-    print(len(new_degree_list))
-    print(frame_num)
-
-    remainder = frame_num%batch_size
-    if remainder!=0:
-        for _ in range(batch_size-remainder):
-            new_degree_list.append(new_degree_list[-1])
-    new_degree_np = np.array(new_degree_list).reshape(batch_size, -1) 
-    return new_degree_np
-    
diff --git a/src/src/gradio_demo.py b/src/src/gradio_demo.py
deleted file mode 100644
index 9a2399fc44704b544ef39bb908d32a21da9fae17..0000000000000000000000000000000000000000
--- a/src/src/gradio_demo.py
+++ /dev/null
@@ -1,170 +0,0 @@
-import torch, uuid
-import os, sys, shutil, platform
-from src.facerender.pirender_animate import AnimateFromCoeff_PIRender
-from src.utils.preprocess import CropAndExtract
-from src.test_audio2coeff import Audio2Coeff  
-from src.facerender.animate import AnimateFromCoeff
-from src.generate_batch import get_data
-from src.generate_facerender_batch import get_facerender_data
-
-from src.utils.init_path import init_path
-
-from pydub import AudioSegment
-
-
-def mp3_to_wav(mp3_filename,wav_filename,frame_rate):
-    mp3_file = AudioSegment.from_file(file=mp3_filename)
-    mp3_file.set_frame_rate(frame_rate).export(wav_filename,format="wav")
-
-
-class SadTalker():
-
-    def __init__(self, checkpoint_path='checkpoints', config_path='src/config', lazy_load=False):
-
-        if torch.cuda.is_available():
-            device = "cuda"
-        elif platform.system() == 'Darwin': # macos 
-            device = "mps"
-        else:
-            device = "cpu"
-        
-        self.device = device
-
-        os.environ['TORCH_HOME']= checkpoint_path
-
-        self.checkpoint_path = checkpoint_path
-        self.config_path = config_path
-      
-
-    def test(self, source_image, driven_audio, preprocess='crop', 
-        still_mode=False,  use_enhancer=False, batch_size=1, size=256, 
-        pose_style = 0, 
-        facerender='facevid2vid',
-        exp_scale=1.0, 
-        use_ref_video = False,
-        ref_video = None,
-        ref_info = None,
-        use_idle_mode = False,
-        length_of_audio = 0, use_blink=True,
-        result_dir='./results/'):
-
-        self.sadtalker_paths = init_path(self.checkpoint_path, self.config_path, size, False, preprocess)
-        print(self.sadtalker_paths)
-            
-        self.audio_to_coeff = Audio2Coeff(self.sadtalker_paths, self.device)
-        self.preprocess_model = CropAndExtract(self.sadtalker_paths, self.device)
-        
-        if facerender == 'facevid2vid' and self.device != 'mps':
-            self.animate_from_coeff = AnimateFromCoeff(self.sadtalker_paths, self.device)
-        elif facerender == 'pirender' or self.device == 'mps':
-            self.animate_from_coeff = AnimateFromCoeff_PIRender(self.sadtalker_paths, self.device)
-            facerender = 'pirender'
-        else:
-            raise(RuntimeError('Unknown model: {}'.format(facerender)))
-            
-
-        time_tag = str(uuid.uuid4())
-        save_dir = os.path.join(result_dir, time_tag)
-        os.makedirs(save_dir, exist_ok=True)
-
-        input_dir = os.path.join(save_dir, 'input')
-        os.makedirs(input_dir, exist_ok=True)
-
-        print(source_image)
-        pic_path = os.path.join(input_dir, os.path.basename(source_image)) 
-        shutil.move(source_image, input_dir)
-
-        if driven_audio is not None and os.path.isfile(driven_audio):
-            audio_path = os.path.join(input_dir, os.path.basename(driven_audio))  
-
-            #### mp3 to wav
-            if '.mp3' in audio_path:
-                mp3_to_wav(driven_audio, audio_path.replace('.mp3', '.wav'), 16000)
-                audio_path = audio_path.replace('.mp3', '.wav')
-            else:
-                shutil.move(driven_audio, input_dir)
-
-        elif use_idle_mode:
-            audio_path = os.path.join(input_dir, 'idlemode_'+str(length_of_audio)+'.wav') ## generate audio from this new audio_path
-            from pydub import AudioSegment
-            one_sec_segment = AudioSegment.silent(duration=1000*length_of_audio)  #duration in milliseconds
-            one_sec_segment.export(audio_path, format="wav")
-        else:
-            print(use_ref_video, ref_info)
-            assert use_ref_video == True and ref_info == 'all'
-
-        if use_ref_video and ref_info == 'all': # full ref mode
-            ref_video_videoname = os.path.basename(ref_video)
-            audio_path = os.path.join(save_dir, ref_video_videoname+'.wav')
-            print('new audiopath:',audio_path)
-            # if ref_video contains audio, set the audio from ref_video.
-            cmd = r"ffmpeg -y -hide_banner -loglevel error -i %s %s"%(ref_video, audio_path)
-            os.system(cmd)        
-
-        os.makedirs(save_dir, exist_ok=True)
-        
-        #crop image and extract 3dmm from image
-        first_frame_dir = os.path.join(save_dir, 'first_frame_dir')
-        os.makedirs(first_frame_dir, exist_ok=True)
-        first_coeff_path, crop_pic_path, crop_info = self.preprocess_model.generate(pic_path, first_frame_dir, preprocess, True, size)
-        
-        if first_coeff_path is None:
-            raise AttributeError("No face is detected")
-
-        if use_ref_video:
-            print('using ref video for genreation')
-            ref_video_videoname = os.path.splitext(os.path.split(ref_video)[-1])[0]
-            ref_video_frame_dir = os.path.join(save_dir, ref_video_videoname)
-            os.makedirs(ref_video_frame_dir, exist_ok=True)
-            print('3DMM Extraction for the reference video providing pose')
-            ref_video_coeff_path, _, _ =  self.preprocess_model.generate(ref_video, ref_video_frame_dir, preprocess, source_image_flag=False)
-        else:
-            ref_video_coeff_path = None
-
-        if use_ref_video:
-            if ref_info == 'pose':
-                ref_pose_coeff_path = ref_video_coeff_path
-                ref_eyeblink_coeff_path = None
-            elif ref_info == 'blink':
-                ref_pose_coeff_path = None
-                ref_eyeblink_coeff_path = ref_video_coeff_path
-            elif ref_info == 'pose+blink':
-                ref_pose_coeff_path = ref_video_coeff_path
-                ref_eyeblink_coeff_path = ref_video_coeff_path
-            elif ref_info == 'all':            
-                ref_pose_coeff_path = None
-                ref_eyeblink_coeff_path = None
-            else:
-                raise('error in refinfo')
-        else:
-            ref_pose_coeff_path = None
-            ref_eyeblink_coeff_path = None
-
-        #audio2ceoff
-        if use_ref_video and ref_info == 'all':
-            coeff_path = ref_video_coeff_path # self.audio_to_coeff.generate(batch, save_dir, pose_style, ref_pose_coeff_path)
-        else:
-            batch = get_data(first_coeff_path, audio_path, self.device, ref_eyeblink_coeff_path=ref_eyeblink_coeff_path, still=still_mode, \
-                idlemode=use_idle_mode, length_of_audio=length_of_audio, use_blink=use_blink) # longer audio?
-            coeff_path = self.audio_to_coeff.generate(batch, save_dir, pose_style, ref_pose_coeff_path)
-
-        #coeff2video
-        data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path, batch_size, still_mode=still_mode, \
-            preprocess=preprocess, size=size, expression_scale = exp_scale, facemodel=facerender)
-        return_path = self.animate_from_coeff.generate(data, save_dir,  pic_path, crop_info, enhancer='gfpgan' if use_enhancer else None, preprocess=preprocess, img_size=size)
-        video_name = data['video_name']
-        print(f'The generated video is named {video_name} in {save_dir}')
-
-        del self.preprocess_model
-        del self.audio_to_coeff
-        del self.animate_from_coeff
-
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
-            
-        import gc; gc.collect()
-        
-        return return_path
-
-    
\ No newline at end of file
diff --git a/src/src/test_audio2coeff.py b/src/src/test_audio2coeff.py
deleted file mode 100644
index bbf19f494e2127b4ae9d6074b172fddb694d6e34..0000000000000000000000000000000000000000
--- a/src/src/test_audio2coeff.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import os 
-import torch
-import numpy as np
-from scipy.io import savemat, loadmat
-from yacs.config import CfgNode as CN
-from scipy.signal import savgol_filter
-
-import safetensors
-import safetensors.torch 
-
-from src.audio2pose_models.audio2pose import Audio2Pose
-from src.audio2exp_models.networks import SimpleWrapperV2 
-from src.audio2exp_models.audio2exp import Audio2Exp
-from src.utils.safetensor_helper import load_x_from_safetensor  
-
-def load_cpk(checkpoint_path, model=None, optimizer=None, device="cpu"):
-    checkpoint = torch.load(checkpoint_path, map_location=torch.device(device))
-    if model is not None:
-        model.load_state_dict(checkpoint['model'])
-    if optimizer is not None:
-        optimizer.load_state_dict(checkpoint['optimizer'])
-
-    return checkpoint['epoch']
-
-class Audio2Coeff():
-
-    def __init__(self, sadtalker_path, device):
-        #load config
-        fcfg_pose = open(sadtalker_path['audio2pose_yaml_path'])
-        cfg_pose = CN.load_cfg(fcfg_pose)
-        cfg_pose.freeze()
-        fcfg_exp = open(sadtalker_path['audio2exp_yaml_path'])
-        cfg_exp = CN.load_cfg(fcfg_exp)
-        cfg_exp.freeze()
-
-        # load audio2pose_model
-        self.audio2pose_model = Audio2Pose(cfg_pose, None, device=device)
-        self.audio2pose_model = self.audio2pose_model.to(device)
-        self.audio2pose_model.eval()
-        for param in self.audio2pose_model.parameters():
-            param.requires_grad = False 
-        
-        try:
-            if sadtalker_path['use_safetensor']:
-                checkpoints = safetensors.torch.load_file(sadtalker_path['checkpoint'])
-                self.audio2pose_model.load_state_dict(load_x_from_safetensor(checkpoints, 'audio2pose'))
-            else:
-                load_cpk(sadtalker_path['audio2pose_checkpoint'], model=self.audio2pose_model, device=device)
-        except:
-            raise Exception("Failed in loading audio2pose_checkpoint")
-
-        # load audio2exp_model
-        netG = SimpleWrapperV2()
-        netG = netG.to(device)
-        for param in netG.parameters():
-            netG.requires_grad = False
-        netG.eval()
-        try:
-            if sadtalker_path['use_safetensor']:
-                checkpoints = safetensors.torch.load_file(sadtalker_path['checkpoint'])
-                netG.load_state_dict(load_x_from_safetensor(checkpoints, 'audio2exp'))
-            else:
-                load_cpk(sadtalker_path['audio2exp_checkpoint'], model=netG, device=device)
-        except:
-            raise Exception("Failed in loading audio2exp_checkpoint")
-        self.audio2exp_model = Audio2Exp(netG, cfg_exp, device=device, prepare_training_loss=False)
-        self.audio2exp_model = self.audio2exp_model.to(device)
-        for param in self.audio2exp_model.parameters():
-            param.requires_grad = False
-        self.audio2exp_model.eval()
- 
-        self.device = device
-
-    def generate(self, batch, coeff_save_dir, pose_style, ref_pose_coeff_path=None):
-
-        with torch.no_grad():
-            #test
-            results_dict_exp= self.audio2exp_model.test(batch)
-            exp_pred = results_dict_exp['exp_coeff_pred']                         #bs T 64
-
-            #for class_id in  range(1):
-            #class_id = 0#(i+10)%45
-            #class_id = random.randint(0,46)                                   #46 styles can be selected 
-            batch['class'] = torch.LongTensor([pose_style]).to(self.device)
-            results_dict_pose = self.audio2pose_model.test(batch) 
-            pose_pred = results_dict_pose['pose_pred']                        #bs T 6
-
-            pose_len = pose_pred.shape[1]
-            if pose_len<13: 
-                pose_len = int((pose_len-1)/2)*2+1
-                pose_pred = torch.Tensor(savgol_filter(np.array(pose_pred.cpu()), pose_len, 2, axis=1)).to(self.device)
-            else:
-                pose_pred = torch.Tensor(savgol_filter(np.array(pose_pred.cpu()), 13, 2, axis=1)).to(self.device) 
-            
-            coeffs_pred = torch.cat((exp_pred, pose_pred), dim=-1)            #bs T 70
-
-            coeffs_pred_numpy = coeffs_pred[0].clone().detach().cpu().numpy() 
-
-            if ref_pose_coeff_path is not None: 
-                 coeffs_pred_numpy = self.using_refpose(coeffs_pred_numpy, ref_pose_coeff_path)
-        
-            savemat(os.path.join(coeff_save_dir, '%s##%s.mat'%(batch['pic_name'], batch['audio_name'])),  
-                    {'coeff_3dmm': coeffs_pred_numpy})
-
-            return os.path.join(coeff_save_dir, '%s##%s.mat'%(batch['pic_name'], batch['audio_name']))
-    
-    def using_refpose(self, coeffs_pred_numpy, ref_pose_coeff_path):
-        num_frames = coeffs_pred_numpy.shape[0]
-        refpose_coeff_dict = loadmat(ref_pose_coeff_path)
-        refpose_coeff = refpose_coeff_dict['coeff_3dmm'][:,64:70]
-        refpose_num_frames = refpose_coeff.shape[0]
-        if refpose_num_frames<num_frames:
-            div = num_frames//refpose_num_frames
-            re = num_frames%refpose_num_frames
-            refpose_coeff_list = [refpose_coeff for i in range(div)]
-            refpose_coeff_list.append(refpose_coeff[:re, :])
-            refpose_coeff = np.concatenate(refpose_coeff_list, axis=0)
-
-        #### relative head pose
-        coeffs_pred_numpy[:, 64:70] = coeffs_pred_numpy[:, 64:70] + ( refpose_coeff[:num_frames, :] - refpose_coeff[0:1, :] )
-        return coeffs_pred_numpy
-
-
diff --git a/src/src/utils/audio.py b/src/src/utils/audio.py
deleted file mode 100644
index 89433eb4c681112804fbed72b157700f553739a8..0000000000000000000000000000000000000000
--- a/src/src/utils/audio.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import librosa
-import librosa.filters
-import numpy as np
-# import tensorflow as tf
-from scipy import signal
-from scipy.io import wavfile
-from src.utils.hparams import hparams as hp
-
-def load_wav(path, sr):
-    return librosa.core.load(path, sr=sr)[0]
-
-def save_wav(wav, path, sr):
-    wav *= 32767 / max(0.01, np.max(np.abs(wav)))
-    #proposed by @dsmiller
-    wavfile.write(path, sr, wav.astype(np.int16))
-
-def save_wavenet_wav(wav, path, sr):
-    librosa.output.write_wav(path, wav, sr=sr)
-
-def preemphasis(wav, k, preemphasize=True):
-    if preemphasize:
-        return signal.lfilter([1, -k], [1], wav)
-    return wav
-
-def inv_preemphasis(wav, k, inv_preemphasize=True):
-    if inv_preemphasize:
-        return signal.lfilter([1], [1, -k], wav)
-    return wav
-
-def get_hop_size():
-    hop_size = hp.hop_size
-    if hop_size is None:
-        assert hp.frame_shift_ms is not None
-        hop_size = int(hp.frame_shift_ms / 1000 * hp.sample_rate)
-    return hop_size
-
-def linearspectrogram(wav):
-    D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
-    S = _amp_to_db(np.abs(D)) - hp.ref_level_db
-    
-    if hp.signal_normalization:
-        return _normalize(S)
-    return S
-
-def melspectrogram(wav):
-    D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
-    S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db
-    
-    if hp.signal_normalization:
-        return _normalize(S)
-    return S
-
-def _lws_processor():
-    import lws
-    return lws.lws(hp.n_fft, get_hop_size(), fftsize=hp.win_size, mode="speech")
-
-def _stft(y):
-    if hp.use_lws:
-        return _lws_processor(hp).stft(y).T
-    else:
-        return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=get_hop_size(), win_length=hp.win_size)
-
-##########################################################
-#Those are only correct when using lws!!! (This was messing with Wavenet quality for a long time!)
-def num_frames(length, fsize, fshift):
-    """Compute number of time frames of spectrogram
-    """
-    pad = (fsize - fshift)
-    if length % fshift == 0:
-        M = (length + pad * 2 - fsize) // fshift + 1
-    else:
-        M = (length + pad * 2 - fsize) // fshift + 2
-    return M
-
-
-def pad_lr(x, fsize, fshift):
-    """Compute left and right padding
-    """
-    M = num_frames(len(x), fsize, fshift)
-    pad = (fsize - fshift)
-    T = len(x) + 2 * pad
-    r = (M - 1) * fshift + fsize - T
-    return pad, pad + r
-##########################################################
-#Librosa correct padding
-def librosa_pad_lr(x, fsize, fshift):
-    return 0, (x.shape[0] // fshift + 1) * fshift - x.shape[0]
-
-# Conversions
-_mel_basis = None
-
-def _linear_to_mel(spectogram):
-    global _mel_basis
-    if _mel_basis is None:
-        _mel_basis = _build_mel_basis()
-    return np.dot(_mel_basis, spectogram)
-
-def _build_mel_basis():
-    assert hp.fmax <= hp.sample_rate // 2
-    return librosa.filters.mel(sr=hp.sample_rate, n_fft=hp.n_fft, n_mels=hp.num_mels,
-                               fmin=hp.fmin, fmax=hp.fmax)
-
-def _amp_to_db(x):
-    min_level = np.exp(hp.min_level_db / 20 * np.log(10))
-    return 20 * np.log10(np.maximum(min_level, x))
-
-def _db_to_amp(x):
-    return np.power(10.0, (x) * 0.05)
-
-def _normalize(S):
-    if hp.allow_clipping_in_normalization:
-        if hp.symmetric_mels:
-            return np.clip((2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value,
-                           -hp.max_abs_value, hp.max_abs_value)
-        else:
-            return np.clip(hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)), 0, hp.max_abs_value)
-    
-    assert S.max() <= 0 and S.min() - hp.min_level_db >= 0
-    if hp.symmetric_mels:
-        return (2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value
-    else:
-        return hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db))
-
-def _denormalize(D):
-    if hp.allow_clipping_in_normalization:
-        if hp.symmetric_mels:
-            return (((np.clip(D, -hp.max_abs_value,
-                              hp.max_abs_value) + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value))
-                    + hp.min_level_db)
-        else:
-            return ((np.clip(D, 0, hp.max_abs_value) * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
-    
-    if hp.symmetric_mels:
-        return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db)
-    else:
-        return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
diff --git a/src/src/utils/croper.py b/src/src/utils/croper.py
deleted file mode 100644
index 3d9a0ac58f97afdc95d40f2a400272b11fe38093..0000000000000000000000000000000000000000
--- a/src/src/utils/croper.py
+++ /dev/null
@@ -1,144 +0,0 @@
-import os
-import cv2
-import time
-import glob
-import argparse
-import scipy
-import numpy as np
-from PIL import Image
-import torch
-from tqdm import tqdm
-from itertools import cycle
-
-from src.face3d.extract_kp_videos_safe import KeypointExtractor
-from facexlib.alignment import landmark_98_to_68
-
-import numpy as np
-from PIL import Image
-
-class Preprocesser:
-    def __init__(self, device='cuda'):
-        self.predictor = KeypointExtractor(device)
-
-    def get_landmark(self, img_np):
-        """get landmark with dlib
-        :return: np.array shape=(68, 2)
-        """
-        with torch.no_grad():
-            dets = self.predictor.det_net.detect_faces(img_np, 0.97)
-
-        if len(dets) == 0:
-            return None
-        det = dets[0]
-
-        img = img_np[int(det[1]):int(det[3]), int(det[0]):int(det[2]), :]
-        lm = landmark_98_to_68(self.predictor.detector.get_landmarks(img)) # [0]
-
-        #### keypoints to the original location
-        lm[:,0] += int(det[0])
-        lm[:,1] += int(det[1])
-
-        return lm
-
-    def align_face(self, img, lm, output_size=1024):
-        """
-        :param filepath: str
-        :return: PIL Image
-        """
-        lm_chin = lm[0: 17]  # left-right
-        lm_eyebrow_left = lm[17: 22]  # left-right
-        lm_eyebrow_right = lm[22: 27]  # left-right
-        lm_nose = lm[27: 31]  # top-down
-        lm_nostrils = lm[31: 36]  # top-down
-        lm_eye_left = lm[36: 42]  # left-clockwise
-        lm_eye_right = lm[42: 48]  # left-clockwise
-        lm_mouth_outer = lm[48: 60]  # left-clockwise
-        lm_mouth_inner = lm[60: 68]  # left-clockwise
-
-        # Calculate auxiliary vectors.
-        eye_left = np.mean(lm_eye_left, axis=0)
-        eye_right = np.mean(lm_eye_right, axis=0)
-        eye_avg = (eye_left + eye_right) * 0.5
-        eye_to_eye = eye_right - eye_left
-        mouth_left = lm_mouth_outer[0]
-        mouth_right = lm_mouth_outer[6]
-        mouth_avg = (mouth_left + mouth_right) * 0.5
-        eye_to_mouth = mouth_avg - eye_avg
-
-        # Choose oriented crop rectangle.
-        x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]  # Addition of binocular difference and double mouth difference
-        x /= np.hypot(*x)   # hypot函数计算直角三角形的斜边长，用斜边长对三角形两条直边做归一化
-        x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)    # 双眼差和眼嘴差，选较大的作为基准尺度
-        y = np.flipud(x) * [-1, 1]
-        c = eye_avg + eye_to_mouth * 0.1
-        quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])   # 定义四边形，以面部基准位置为中心上下左右平移得到四个顶点
-        qsize = np.hypot(*x) * 2    # 定义四边形的大小（边长），为基准尺度的2倍
-
-        # Shrink.
-        # 如果计算出的四边形太大了，就按比例缩小它
-        shrink = int(np.floor(qsize / output_size * 0.5))
-        if shrink > 1:
-            rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink)))
-            img = img.resize(rsize, Image.ANTIALIAS)
-            quad /= shrink
-            qsize /= shrink
-        else:
-            rsize = (int(np.rint(float(img.size[0]))), int(np.rint(float(img.size[1]))))
-
-        # Crop.
-        border = max(int(np.rint(qsize * 0.1)), 3)
-        crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
-                int(np.ceil(max(quad[:, 1]))))
-        crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]),
-                min(crop[3] + border, img.size[1]))
-        if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
-            # img = img.crop(crop)
-            quad -= crop[0:2]
-
-        # Pad.
-        pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
-               int(np.ceil(max(quad[:, 1]))))
-        pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0),
-               max(pad[3] - img.size[1] + border, 0))
-        # if enable_padding and max(pad) > border - 4:
-        #     pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
-        #     img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
-        #     h, w, _ = img.shape
-        #     y, x, _ = np.ogrid[:h, :w, :1]
-        #     mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]),
-        #                       1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3]))
-        #     blur = qsize * 0.02
-        #     img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
-        #     img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
-        #     img = Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB')
-        #     quad += pad[:2]
-
-        # Transform.
-        quad = (quad + 0.5).flatten()
-        lx = max(min(quad[0], quad[2]), 0)
-        ly = max(min(quad[1], quad[7]), 0)
-        rx = min(max(quad[4], quad[6]), img.size[0])
-        ry = min(max(quad[3], quad[5]), img.size[0])
-
-        # Save aligned image.
-        return rsize, crop, [lx, ly, rx, ry]
-    
-    def crop(self, img_np_list, still=False, xsize=512):    # first frame for all video
-        img_np = img_np_list[0]
-        lm = self.get_landmark(img_np)
-
-        if lm is None:
-            raise 'can not detect the landmark from source image'
-        rsize, crop, quad = self.align_face(img=Image.fromarray(img_np), lm=lm, output_size=xsize)
-        clx, cly, crx, cry = crop
-        lx, ly, rx, ry = quad
-        lx, ly, rx, ry = int(lx), int(ly), int(rx), int(ry)
-        for _i in range(len(img_np_list)):
-            _inp = img_np_list[_i]
-            _inp = cv2.resize(_inp, (rsize[0], rsize[1]))
-            _inp = _inp[cly:cry, clx:crx]
-            if not still:
-                _inp = _inp[ly:ry, lx:rx]
-            img_np_list[_i] = _inp
-        return img_np_list, crop, quad
-
diff --git a/src/src/utils/face_enhancer.py b/src/src/utils/face_enhancer.py
deleted file mode 100644
index 15851a15966c963d7bd04f35eebdaa6b22a3d966..0000000000000000000000000000000000000000
--- a/src/src/utils/face_enhancer.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import os
-import torch 
-
-from gfpgan import GFPGANer
-
-from tqdm import tqdm
-
-from src.utils.videoio import load_video_to_cv2
-
-import cv2
-
-
-class GeneratorWithLen(object):
-    """ From https://stackoverflow.com/a/7460929 """
-
-    def __init__(self, gen, length):
-        self.gen = gen
-        self.length = length
-
-    def __len__(self):
-        return self.length
-
-    def __iter__(self):
-        return self.gen
-
-def enhancer_list(images, method='gfpgan', bg_upsampler='realesrgan'):
-    gen = enhancer_generator_no_len(images, method=method, bg_upsampler=bg_upsampler)
-    return list(gen)
-
-def enhancer_generator_with_len(images, method='gfpgan', bg_upsampler='realesrgan'):
-    """ Provide a generator with a __len__ method so that it can passed to functions that
-    call len()"""
-
-    if os.path.isfile(images): # handle video to images
-        # TODO: Create a generator version of load_video_to_cv2
-        images = load_video_to_cv2(images)
-
-    gen = enhancer_generator_no_len(images, method=method, bg_upsampler=bg_upsampler)
-    gen_with_len = GeneratorWithLen(gen, len(images))
-    return gen_with_len
-
-def enhancer_generator_no_len(images, method='gfpgan', bg_upsampler='realesrgan'):
-    """ Provide a generator function so that all of the enhanced images don't need
-    to be stored in memory at the same time. This can save tons of RAM compared to
-    the enhancer function. """
-
-    print('face enhancer....')
-    if not isinstance(images, list) and os.path.isfile(images): # handle video to images
-        images = load_video_to_cv2(images)
-
-    # ------------------------ set up GFPGAN restorer ------------------------
-    if  method == 'gfpgan':
-        arch = 'clean'
-        channel_multiplier = 2
-        model_name = 'GFPGANv1.4'
-        url = 'https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth'
-    elif method == 'RestoreFormer':
-        arch = 'RestoreFormer'
-        channel_multiplier = 2
-        model_name = 'RestoreFormer'
-        url = 'https://github.com/TencentARC/GFPGAN/releases/download/v1.3.4/RestoreFormer.pth'
-    elif method == 'codeformer': # TODO:
-        arch = 'CodeFormer'
-        channel_multiplier = 2
-        model_name = 'CodeFormer'
-        url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth'
-    else:
-        raise ValueError(f'Wrong model version {method}.')
-
-
-    # ------------------------ set up background upsampler ------------------------
-    if bg_upsampler == 'realesrgan':
-        if not torch.cuda.is_available():  # CPU
-            import warnings
-            warnings.warn('The unoptimized RealESRGAN is slow on CPU. We do not use it. '
-                          'If you really want to use it, please modify the corresponding codes.')
-            bg_upsampler = None
-        else:
-            from basicsr.archs.rrdbnet_arch import RRDBNet
-            from realesrgan import RealESRGANer
-            model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
-            bg_upsampler = RealESRGANer(
-                scale=2,
-                model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth',
-                model=model,
-                tile=400,
-                tile_pad=10,
-                pre_pad=0,
-                half=True)  # need to set False in CPU mode
-    else:
-        bg_upsampler = None
-
-    # determine model paths
-    model_path = os.path.join('gfpgan/weights', model_name + '.pth')
-    
-    if not os.path.isfile(model_path):
-        model_path = os.path.join('checkpoints', model_name + '.pth')
-    
-    if not os.path.isfile(model_path):
-        # download pre-trained models from url
-        model_path = url
-
-    restorer = GFPGANer(
-        model_path=model_path,
-        upscale=2,
-        arch=arch,
-        channel_multiplier=channel_multiplier,
-        bg_upsampler=bg_upsampler)
-
-    # ------------------------ restore ------------------------
-    for idx in tqdm(range(len(images)), 'Face Enhancer:'):
-        
-        img = cv2.cvtColor(images[idx], cv2.COLOR_RGB2BGR)
-        
-        # restore faces and background if necessary
-        cropped_faces, restored_faces, r_img = restorer.enhance(
-            img,
-            has_aligned=False,
-            only_center_face=False,
-            paste_back=True)
-        
-        r_img = cv2.cvtColor(r_img, cv2.COLOR_BGR2RGB)
-        yield r_img
diff --git a/src/src/utils/hparams.py b/src/src/utils/hparams.py
deleted file mode 100644
index 743c5c7d5a5a9e686f1ccd6fb3c2fb5cb382d62b..0000000000000000000000000000000000000000
--- a/src/src/utils/hparams.py
+++ /dev/null
@@ -1,160 +0,0 @@
-from glob import glob
-import os
-
-class HParams:
-	def __init__(self, **kwargs):
-		self.data = {}
-
-		for key, value in kwargs.items():
-			self.data[key] = value
-
-	def __getattr__(self, key):
-		if key not in self.data:
-			raise AttributeError("'HParams' object has no attribute %s" % key)
-		return self.data[key]
-
-	def set_hparam(self, key, value):
-		self.data[key] = value
-
-
-# Default hyperparameters
-hparams = HParams(
-	num_mels=80,  # Number of mel-spectrogram channels and local conditioning dimensionality
-	#  network
-	rescale=True,  # Whether to rescale audio prior to preprocessing
-	rescaling_max=0.9,  # Rescaling value
-	
-	# Use LWS (https://github.com/Jonathan-LeRoux/lws) for STFT and phase reconstruction
-	# It"s preferred to set True to use with https://github.com/r9y9/wavenet_vocoder
-	# Does not work if n_ffit is not multiple of hop_size!!
-	use_lws=False,
-	
-	n_fft=800,  # Extra window size is filled with 0 paddings to match this parameter
-	hop_size=200,  # For 16000Hz, 200 = 12.5 ms (0.0125 * sample_rate)
-	win_size=800,  # For 16000Hz, 800 = 50 ms (If None, win_size = n_fft) (0.05 * sample_rate)
-	sample_rate=16000,  # 16000Hz (corresponding to librispeech) (sox --i <filename>)
-	
-	frame_shift_ms=None,  # Can replace hop_size parameter. (Recommended: 12.5)
-	
-	# Mel and Linear spectrograms normalization/scaling and clipping
-	signal_normalization=True,
-	# Whether to normalize mel spectrograms to some predefined range (following below parameters)
-	allow_clipping_in_normalization=True,  # Only relevant if mel_normalization = True
-	symmetric_mels=True,
-	# Whether to scale the data to be symmetric around 0. (Also multiplies the output range by 2, 
-	# faster and cleaner convergence)
-	max_abs_value=4.,
-	# max absolute value of data. If symmetric, data will be [-max, max] else [0, max] (Must not 
-	# be too big to avoid gradient explosion, 
-	# not too small for fast convergence)
-	# Contribution by @begeekmyfriend
-	# Spectrogram Pre-Emphasis (Lfilter: Reduce spectrogram noise and helps model certitude 
-	# levels. Also allows for better G&L phase reconstruction)
-	preemphasize=True,  # whether to apply filter
-	preemphasis=0.97,  # filter coefficient.
-	
-	# Limits
-	min_level_db=-100,
-	ref_level_db=20,
-	fmin=55,
-	# Set this to 55 if your speaker is male! if female, 95 should help taking off noise. (To 
-	# test depending on dataset. Pitch info: male~[65, 260], female~[100, 525])
-	fmax=7600,  # To be increased/reduced depending on data.
-
-	###################### Our training parameters #################################
-	img_size=96,
-	fps=25,
-	
-	batch_size=16,
-	initial_learning_rate=1e-4,
-	nepochs=300000,  ### ctrl + c, stop whenever eval loss is consistently greater than train loss for ~10 epochs
-	num_workers=20,
-	checkpoint_interval=3000,
-	eval_interval=3000,
-	writer_interval=300,
-    save_optimizer_state=True,
-
-    syncnet_wt=0.0, # is initially zero, will be set automatically to 0.03 later. Leads to faster convergence. 
-	syncnet_batch_size=64,
-	syncnet_lr=1e-4,
-	syncnet_eval_interval=1000,
-	syncnet_checkpoint_interval=10000,
-
-	disc_wt=0.07,
-	disc_initial_learning_rate=1e-4,
-)
-
-
-
-# Default hyperparameters
-hparamsdebug = HParams(
-	num_mels=80,  # Number of mel-spectrogram channels and local conditioning dimensionality
-	#  network
-	rescale=True,  # Whether to rescale audio prior to preprocessing
-	rescaling_max=0.9,  # Rescaling value
-	
-	# Use LWS (https://github.com/Jonathan-LeRoux/lws) for STFT and phase reconstruction
-	# It"s preferred to set True to use with https://github.com/r9y9/wavenet_vocoder
-	# Does not work if n_ffit is not multiple of hop_size!!
-	use_lws=False,
-	
-	n_fft=800,  # Extra window size is filled with 0 paddings to match this parameter
-	hop_size=200,  # For 16000Hz, 200 = 12.5 ms (0.0125 * sample_rate)
-	win_size=800,  # For 16000Hz, 800 = 50 ms (If None, win_size = n_fft) (0.05 * sample_rate)
-	sample_rate=16000,  # 16000Hz (corresponding to librispeech) (sox --i <filename>)
-	
-	frame_shift_ms=None,  # Can replace hop_size parameter. (Recommended: 12.5)
-	
-	# Mel and Linear spectrograms normalization/scaling and clipping
-	signal_normalization=True,
-	# Whether to normalize mel spectrograms to some predefined range (following below parameters)
-	allow_clipping_in_normalization=True,  # Only relevant if mel_normalization = True
-	symmetric_mels=True,
-	# Whether to scale the data to be symmetric around 0. (Also multiplies the output range by 2, 
-	# faster and cleaner convergence)
-	max_abs_value=4.,
-	# max absolute value of data. If symmetric, data will be [-max, max] else [0, max] (Must not 
-	# be too big to avoid gradient explosion, 
-	# not too small for fast convergence)
-	# Contribution by @begeekmyfriend
-	# Spectrogram Pre-Emphasis (Lfilter: Reduce spectrogram noise and helps model certitude 
-	# levels. Also allows for better G&L phase reconstruction)
-	preemphasize=True,  # whether to apply filter
-	preemphasis=0.97,  # filter coefficient.
-	
-	# Limits
-	min_level_db=-100,
-	ref_level_db=20,
-	fmin=55,
-	# Set this to 55 if your speaker is male! if female, 95 should help taking off noise. (To 
-	# test depending on dataset. Pitch info: male~[65, 260], female~[100, 525])
-	fmax=7600,  # To be increased/reduced depending on data.
-
-	###################### Our training parameters #################################
-	img_size=96,
-	fps=25,
-	
-	batch_size=2,
-	initial_learning_rate=1e-3,
-	nepochs=100000,  ### ctrl + c, stop whenever eval loss is consistently greater than train loss for ~10 epochs
-	num_workers=0,
-	checkpoint_interval=10000,
-	eval_interval=10,
-	writer_interval=5,
-    save_optimizer_state=True,
-
-    syncnet_wt=0.0, # is initially zero, will be set automatically to 0.03 later. Leads to faster convergence. 
-	syncnet_batch_size=64,
-	syncnet_lr=1e-4,
-	syncnet_eval_interval=10000,
-	syncnet_checkpoint_interval=10000,
-
-	disc_wt=0.07,
-	disc_initial_learning_rate=1e-4,
-)
-
-
-def hparams_debug_string():
-	values = hparams.values()
-	hp = ["  %s: %s" % (name, values[name]) for name in sorted(values) if name != "sentences"]
-	return "Hyperparameters:\n" + "\n".join(hp)
diff --git a/src/src/utils/init_path.py b/src/src/utils/init_path.py
deleted file mode 100644
index 18ca81eb81f564f44fd376667168807e4e976a36..0000000000000000000000000000000000000000
--- a/src/src/utils/init_path.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import os
-import glob
-
-def init_path(checkpoint_dir, config_dir, size=512, old_version=False, preprocess='crop'):
-
-    if old_version:
-        #### load all the checkpoint of `pth`
-        sadtalker_paths = {
-                'wav2lip_checkpoint' : os.path.join(checkpoint_dir, 'wav2lip.pth'),
-                'audio2pose_checkpoint' : os.path.join(checkpoint_dir, 'auido2pose_00140-model.pth'),
-                'audio2exp_checkpoint' : os.path.join(checkpoint_dir, 'auido2exp_00300-model.pth'),
-                'free_view_checkpoint' : os.path.join(checkpoint_dir, 'facevid2vid_00189-model.pth.tar'),
-                'path_of_net_recon_model' : os.path.join(checkpoint_dir, 'epoch_20.pth')
-        }
-
-        use_safetensor = False
-    elif len(glob.glob(os.path.join(checkpoint_dir, '*.safetensors'))):
-        print('using safetensor as default')
-        sadtalker_paths = {
-            "checkpoint":os.path.join(checkpoint_dir, 'SadTalker_V0.0.2_'+str(size)+'.safetensors'),
-            }
-        use_safetensor = True
-    else:
-        print("WARNING: The new version of the model will be updated by safetensor, you may need to download it mannully. We run the old version of the checkpoint this time!")
-        use_safetensor = False
-        
-        sadtalker_paths = {
-                'wav2lip_checkpoint' : os.path.join(checkpoint_dir, 'wav2lip.pth'),
-                'audio2pose_checkpoint' : os.path.join(checkpoint_dir, 'auido2pose_00140-model.pth'),
-                'audio2exp_checkpoint' : os.path.join(checkpoint_dir, 'auido2exp_00300-model.pth'),
-                'free_view_checkpoint' : os.path.join(checkpoint_dir, 'facevid2vid_00189-model.pth.tar'),
-                'path_of_net_recon_model' : os.path.join(checkpoint_dir, 'epoch_20.pth')
-        }
-
-    sadtalker_paths['dir_of_BFM_fitting'] = os.path.join(config_dir) # , 'BFM_Fitting'
-    sadtalker_paths['audio2pose_yaml_path'] = os.path.join(config_dir, 'auido2pose.yaml')
-    sadtalker_paths['audio2exp_yaml_path'] = os.path.join(config_dir, 'auido2exp.yaml')
-    sadtalker_paths['pirender_yaml_path'] = os.path.join(config_dir, 'facerender_pirender.yaml')
-    sadtalker_paths['pirender_checkpoint'] =  os.path.join(checkpoint_dir, 'epoch_00190_iteration_000400000_checkpoint.pt')
-    sadtalker_paths['use_safetensor'] =  use_safetensor # os.path.join(config_dir, 'auido2exp.yaml')
-
-    if 'full' in preprocess:
-        sadtalker_paths['mappingnet_checkpoint'] = os.path.join(checkpoint_dir, 'mapping_00109-model.pth.tar')
-        sadtalker_paths['facerender_yaml'] = os.path.join(config_dir, 'facerender_still.yaml')
-    else:
-        sadtalker_paths['mappingnet_checkpoint'] = os.path.join(checkpoint_dir, 'mapping_00229-model.pth.tar')
-        sadtalker_paths['facerender_yaml'] = os.path.join(config_dir, 'facerender.yaml')
-
-    return sadtalker_paths
\ No newline at end of file
diff --git a/src/src/utils/model2safetensor.py b/src/src/utils/model2safetensor.py
deleted file mode 100644
index 50c485000d43ba9c230a0bc64ce8aeaaec6e2b29..0000000000000000000000000000000000000000
--- a/src/src/utils/model2safetensor.py
+++ /dev/null
@@ -1,141 +0,0 @@
-import torch
-import yaml
-import os
-
-import safetensors
-from safetensors.torch import save_file
-from yacs.config import CfgNode as CN
-import sys
-
-sys.path.append('/apdcephfs/private_shadowcun/SadTalker')
-
-from src.face3d.models import networks
-
-from src.facerender.modules.keypoint_detector import HEEstimator, KPDetector
-from src.facerender.modules.mapping import MappingNet
-from src.facerender.modules.generator import OcclusionAwareGenerator, OcclusionAwareSPADEGenerator
-
-from src.audio2pose_models.audio2pose import Audio2Pose
-from src.audio2exp_models.networks import SimpleWrapperV2 
-from src.test_audio2coeff import load_cpk
-
-size = 256
-############ face vid2vid
-config_path = os.path.join('src', 'config', 'facerender.yaml')
-current_root_path = '.'
-
-path_of_net_recon_model = os.path.join(current_root_path, 'checkpoints', 'epoch_20.pth')
-net_recon = networks.define_net_recon(net_recon='resnet50', use_last_fc=False, init_path='')
-checkpoint = torch.load(path_of_net_recon_model, map_location='cpu')    
-net_recon.load_state_dict(checkpoint['net_recon'])
-
-with open(config_path) as f:
-    config = yaml.safe_load(f)
-
-generator = OcclusionAwareSPADEGenerator(**config['model_params']['generator_params'],
-                                            **config['model_params']['common_params'])
-kp_extractor = KPDetector(**config['model_params']['kp_detector_params'],
-                            **config['model_params']['common_params'])
-he_estimator = HEEstimator(**config['model_params']['he_estimator_params'],
-                        **config['model_params']['common_params'])
-mapping = MappingNet(**config['model_params']['mapping_params'])
-
-def load_cpk_facevid2vid(checkpoint_path, generator=None, discriminator=None, 
-                        kp_detector=None, he_estimator=None, optimizer_generator=None, 
-                        optimizer_discriminator=None, optimizer_kp_detector=None, 
-                        optimizer_he_estimator=None, device="cpu"):
-
-    checkpoint = torch.load(checkpoint_path, map_location=torch.device(device))
-    if generator is not None:
-        generator.load_state_dict(checkpoint['generator'])
-    if kp_detector is not None:
-        kp_detector.load_state_dict(checkpoint['kp_detector'])
-    if he_estimator is not None:
-        he_estimator.load_state_dict(checkpoint['he_estimator'])
-    if discriminator is not None:
-        try:
-            discriminator.load_state_dict(checkpoint['discriminator'])
-        except:
-            print ('No discriminator in the state-dict. Dicriminator will be randomly initialized')
-    if optimizer_generator is not None:
-        optimizer_generator.load_state_dict(checkpoint['optimizer_generator'])
-    if optimizer_discriminator is not None:
-        try:
-            optimizer_discriminator.load_state_dict(checkpoint['optimizer_discriminator'])
-        except RuntimeError as e:
-            print ('No discriminator optimizer in the state-dict. Optimizer will be not initialized')
-    if optimizer_kp_detector is not None:
-        optimizer_kp_detector.load_state_dict(checkpoint['optimizer_kp_detector'])
-    if optimizer_he_estimator is not None:
-        optimizer_he_estimator.load_state_dict(checkpoint['optimizer_he_estimator'])
-
-    return checkpoint['epoch']
-
-
-def load_cpk_facevid2vid_safetensor(checkpoint_path, generator=None, 
-                        kp_detector=None, he_estimator=None,  
-                        device="cpu"):
-
-    checkpoint = safetensors.torch.load_file(checkpoint_path)
-
-    if generator is not None:
-        x_generator = {}
-        for k,v in checkpoint.items():
-            if 'generator' in k:
-                x_generator[k.replace('generator.', '')] = v
-        generator.load_state_dict(x_generator)
-    if kp_detector is not None:
-        x_generator = {}
-        for k,v in checkpoint.items():
-            if 'kp_extractor' in k:
-                x_generator[k.replace('kp_extractor.', '')] = v
-        kp_detector.load_state_dict(x_generator)
-    if he_estimator is not None:
-        x_generator = {}
-        for k,v in checkpoint.items():
-            if 'he_estimator' in k:
-                x_generator[k.replace('he_estimator.', '')] = v
-        he_estimator.load_state_dict(x_generator)
-    
-    return None
-
-free_view_checkpoint = '/apdcephfs/private_shadowcun/SadTalker/checkpoints/facevid2vid_'+str(size)+'-model.pth.tar'
-load_cpk_facevid2vid(free_view_checkpoint, kp_detector=kp_extractor, generator=generator, he_estimator=he_estimator)
-
-wav2lip_checkpoint = os.path.join(current_root_path, 'checkpoints', 'wav2lip.pth')
-
-audio2pose_checkpoint = os.path.join(current_root_path, 'checkpoints', 'auido2pose_00140-model.pth')
-audio2pose_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2pose.yaml')
-
-audio2exp_checkpoint = os.path.join(current_root_path, 'checkpoints', 'auido2exp_00300-model.pth')
-audio2exp_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2exp.yaml')
-
-fcfg_pose = open(audio2pose_yaml_path)
-cfg_pose = CN.load_cfg(fcfg_pose)
-cfg_pose.freeze()
-audio2pose_model = Audio2Pose(cfg_pose, wav2lip_checkpoint)
-audio2pose_model.eval()
-load_cpk(audio2pose_checkpoint, model=audio2pose_model, device='cpu')
-
-# load audio2exp_model
-netG = SimpleWrapperV2()
-netG.eval()
-load_cpk(audio2exp_checkpoint, model=netG, device='cpu')
-
-class SadTalker(torch.nn.Module):
-    def __init__(self, kp_extractor, generator, netG, audio2pose, face_3drecon):
-        super(SadTalker, self).__init__()
-        self.kp_extractor = kp_extractor
-        self.generator = generator
-        self.audio2exp = netG
-        self.audio2pose = audio2pose
-        self.face_3drecon = face_3drecon
-
-
-model = SadTalker(kp_extractor, generator, netG, audio2pose_model, net_recon)
-
-# here, we want to convert it to safetensor
-save_file(model.state_dict(), "checkpoints/SadTalker_V0.0.2_"+str(size)+".safetensors")
-
-### test
-load_cpk_facevid2vid_safetensor('checkpoints/SadTalker_V0.0.2_'+str(size)+'.safetensors', kp_detector=kp_extractor, generator=generator, he_estimator=None)
\ No newline at end of file
diff --git a/src/src/utils/paste_pic.py b/src/src/utils/paste_pic.py
deleted file mode 100644
index f9989e21e48e64f620f9b148e65fdfe806c53b14..0000000000000000000000000000000000000000
--- a/src/src/utils/paste_pic.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import cv2, os
-import numpy as np
-from tqdm import tqdm
-import uuid
-
-from src.utils.videoio import save_video_with_watermark 
-
-def paste_pic(video_path, pic_path, crop_info, new_audio_path, full_video_path, extended_crop=False):
-
-    if not os.path.isfile(pic_path):
-        raise ValueError('pic_path must be a valid path to video/image file')
-    elif pic_path.split('.')[-1] in ['jpg', 'png', 'jpeg']:
-        # loader for first frame
-        full_img = cv2.imread(pic_path)
-    else:
-        # loader for videos
-        video_stream = cv2.VideoCapture(pic_path)
-        fps = video_stream.get(cv2.CAP_PROP_FPS)
-        full_frames = [] 
-        while 1:
-            still_reading, frame = video_stream.read()
-            if not still_reading:
-                video_stream.release()
-                break 
-            break 
-        full_img = frame
-    frame_h = full_img.shape[0]
-    frame_w = full_img.shape[1]
-
-    video_stream = cv2.VideoCapture(video_path)
-    fps = video_stream.get(cv2.CAP_PROP_FPS)
-    crop_frames = []
-    while 1:
-        still_reading, frame = video_stream.read()
-        if not still_reading:
-            video_stream.release()
-            break
-        crop_frames.append(frame)
-    
-    if len(crop_info) != 3:
-        print("you didn't crop the image")
-        return
-    else:
-        r_w, r_h = crop_info[0]
-        clx, cly, crx, cry = crop_info[1]
-        lx, ly, rx, ry = crop_info[2]
-        lx, ly, rx, ry = int(lx), int(ly), int(rx), int(ry)
-        # oy1, oy2, ox1, ox2 = cly+ly, cly+ry, clx+lx, clx+rx
-        # oy1, oy2, ox1, ox2 = cly+ly, cly+ry, clx+lx, clx+rx
-
-        if extended_crop:
-            oy1, oy2, ox1, ox2 = cly, cry, clx, crx
-        else:
-            oy1, oy2, ox1, ox2 = cly+ly, cly+ry, clx+lx, clx+rx
-
-    tmp_path = str(uuid.uuid4())+'.mp4'
-    out_tmp = cv2.VideoWriter(tmp_path, cv2.VideoWriter_fourcc(*'MP4V'), fps, (frame_w, frame_h))
-    for crop_frame in tqdm(crop_frames, 'seamlessClone:'):
-        p = cv2.resize(crop_frame.astype(np.uint8), (ox2-ox1, oy2 - oy1)) 
-
-        mask = 255*np.ones(p.shape, p.dtype)
-        location = ((ox1+ox2) // 2, (oy1+oy2) // 2)
-        gen_img = cv2.seamlessClone(p, full_img, mask, location, cv2.NORMAL_CLONE)
-        out_tmp.write(gen_img)
-
-    out_tmp.release()
-
-    save_video_with_watermark(tmp_path, new_audio_path, full_video_path, watermark=False)
-    os.remove(tmp_path)
diff --git a/src/src/utils/preprocess.py b/src/src/utils/preprocess.py
deleted file mode 100644
index 0f784e6c3d8562e1db1bbd850b9f01843cee3c97..0000000000000000000000000000000000000000
--- a/src/src/utils/preprocess.py
+++ /dev/null
@@ -1,170 +0,0 @@
-import numpy as np
-import cv2, os, sys, torch
-from tqdm import tqdm
-from PIL import Image 
-
-# 3dmm extraction
-import safetensors
-import safetensors.torch 
-from src.face3d.util.preprocess import align_img
-from src.face3d.util.load_mats import load_lm3d
-from src.face3d.models import networks
-
-from scipy.io import loadmat, savemat
-from src.utils.croper import Preprocesser
-
-
-import warnings
-
-from src.utils.safetensor_helper import load_x_from_safetensor 
-warnings.filterwarnings("ignore")
-
-def split_coeff(coeffs):
-        """
-        Return:
-            coeffs_dict     -- a dict of torch.tensors
-
-        Parameters:
-            coeffs          -- torch.tensor, size (B, 256)
-        """
-        id_coeffs = coeffs[:, :80]
-        exp_coeffs = coeffs[:, 80: 144]
-        tex_coeffs = coeffs[:, 144: 224]
-        angles = coeffs[:, 224: 227]
-        gammas = coeffs[:, 227: 254]
-        translations = coeffs[:, 254:]
-        return {
-            'id': id_coeffs,
-            'exp': exp_coeffs,
-            'tex': tex_coeffs,
-            'angle': angles,
-            'gamma': gammas,
-            'trans': translations
-        }
-
-
-class CropAndExtract():
-    def __init__(self, sadtalker_path, device):
-
-        self.propress = Preprocesser(device)
-        self.net_recon = networks.define_net_recon(net_recon='resnet50', use_last_fc=False, init_path='').to(device)
-        
-        if sadtalker_path['use_safetensor']:
-            checkpoint = safetensors.torch.load_file(sadtalker_path['checkpoint'])    
-            self.net_recon.load_state_dict(load_x_from_safetensor(checkpoint, 'face_3drecon'))
-        else:
-            checkpoint = torch.load(sadtalker_path['path_of_net_recon_model'], map_location=torch.device(device))    
-            self.net_recon.load_state_dict(checkpoint['net_recon'])
-
-        self.net_recon.eval()
-        self.lm3d_std = load_lm3d(sadtalker_path['dir_of_BFM_fitting'])
-        self.device = device
-    
-    def generate(self, input_path, save_dir, crop_or_resize='crop', source_image_flag=False, pic_size=256):
-
-        pic_name = os.path.splitext(os.path.split(input_path)[-1])[0]  
-
-        landmarks_path =  os.path.join(save_dir, pic_name+'_landmarks.txt') 
-        coeff_path =  os.path.join(save_dir, pic_name+'.mat')  
-        png_path =  os.path.join(save_dir, pic_name+'.png')  
-
-        #load input
-        if not os.path.isfile(input_path):
-            raise ValueError('input_path must be a valid path to video/image file')
-        elif input_path.split('.')[-1] in ['jpg', 'png', 'jpeg']:
-            # loader for first frame
-            full_frames = [cv2.imread(input_path)]
-            fps = 25
-        else:
-            # loader for videos
-            video_stream = cv2.VideoCapture(input_path)
-            fps = video_stream.get(cv2.CAP_PROP_FPS)
-            full_frames = [] 
-            while 1:
-                still_reading, frame = video_stream.read()
-                if not still_reading:
-                    video_stream.release()
-                    break 
-                full_frames.append(frame) 
-                if source_image_flag:
-                    break
-
-        x_full_frames= [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  for frame in full_frames] 
-
-        #### crop images as the 
-        if 'crop' in crop_or_resize.lower(): # default crop
-            x_full_frames, crop, quad = self.propress.crop(x_full_frames, still=True if 'ext' in crop_or_resize.lower() else False, xsize=512)
-            clx, cly, crx, cry = crop
-            lx, ly, rx, ry = quad
-            lx, ly, rx, ry = int(lx), int(ly), int(rx), int(ry)
-            oy1, oy2, ox1, ox2 = cly+ly, cly+ry, clx+lx, clx+rx
-            crop_info = ((ox2 - ox1, oy2 - oy1), crop, quad)
-        elif 'full' in crop_or_resize.lower():
-            x_full_frames, crop, quad = self.propress.crop(x_full_frames, still=True if 'ext' in crop_or_resize.lower() else False, xsize=512)
-            clx, cly, crx, cry = crop
-            lx, ly, rx, ry = quad
-            lx, ly, rx, ry = int(lx), int(ly), int(rx), int(ry)
-            oy1, oy2, ox1, ox2 = cly+ly, cly+ry, clx+lx, clx+rx
-            crop_info = ((ox2 - ox1, oy2 - oy1), crop, quad)
-        else: # resize mode
-            oy1, oy2, ox1, ox2 = 0, x_full_frames[0].shape[0], 0, x_full_frames[0].shape[1] 
-            crop_info = ((ox2 - ox1, oy2 - oy1), None, None)
-
-        frames_pil = [Image.fromarray(cv2.resize(frame,(pic_size, pic_size))) for frame in x_full_frames]
-        if len(frames_pil) == 0:
-            print('No face is detected in the input file')
-            return None, None
-
-        # save crop info
-        for frame in frames_pil:
-            cv2.imwrite(png_path, cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
-
-        # 2. get the landmark according to the detected face. 
-        if not os.path.isfile(landmarks_path): 
-            lm = self.propress.predictor.extract_keypoint(frames_pil, landmarks_path)
-        else:
-            print(' Using saved landmarks.')
-            lm = np.loadtxt(landmarks_path).astype(np.float32)
-            lm = lm.reshape([len(x_full_frames), -1, 2])
-
-        if not os.path.isfile(coeff_path):
-            # load 3dmm paramter generator from Deep3DFaceRecon_pytorch 
-            video_coeffs, full_coeffs = [],  []
-            for idx in tqdm(range(len(frames_pil)), desc='3DMM Extraction In Video:'):
-                frame = frames_pil[idx]
-                W,H = frame.size
-                lm1 = lm[idx].reshape([-1, 2])
-            
-                if np.mean(lm1) == -1:
-                    lm1 = (self.lm3d_std[:, :2]+1)/2.
-                    lm1 = np.concatenate(
-                        [lm1[:, :1]*W, lm1[:, 1:2]*H], 1
-                    )
-                else:
-                    lm1[:, -1] = H - 1 - lm1[:, -1]
-
-                trans_params, im1, lm1, _ = align_img(frame, lm1, self.lm3d_std)
- 
-                trans_params = np.array([float(item) for item in np.hsplit(trans_params, 5)]).astype(np.float32)
-                im_t = torch.tensor(np.array(im1)/255., dtype=torch.float32).permute(2, 0, 1).to(self.device).unsqueeze(0)
-                
-                with torch.no_grad():
-                    full_coeff = self.net_recon(im_t)
-                    coeffs = split_coeff(full_coeff)
-
-                pred_coeff = {key:coeffs[key].cpu().numpy() for key in coeffs}
- 
-                pred_coeff = np.concatenate([
-                    pred_coeff['exp'], 
-                    pred_coeff['angle'],
-                    pred_coeff['trans'],
-                    trans_params[2:][None],
-                    ], 1)
-                video_coeffs.append(pred_coeff)
-                full_coeffs.append(full_coeff.cpu().numpy())
-
-            semantic_npy = np.array(video_coeffs)[:,0] 
-
-            savemat(coeff_path, {'coeff_3dmm': semantic_npy, 'full_3dmm': np.array(full_coeffs)[0]})
-
-        return coeff_path, png_path, crop_info
diff --git a/src/src/utils/safetensor_helper.py b/src/src/utils/safetensor_helper.py
deleted file mode 100644
index 3cdbdd21e4ed656dfe2d31a57360afb3e96480b3..0000000000000000000000000000000000000000
--- a/src/src/utils/safetensor_helper.py
+++ /dev/null
@@ -1,8 +0,0 @@
-
-
-def load_x_from_safetensor(checkpoint, key):
-    x_generator = {}
-    for k,v in checkpoint.items():
-        if key in k:
-            x_generator[k.replace(key+'.', '')] = v
-    return x_generator
\ No newline at end of file
diff --git a/src/src/utils/text2speech.py b/src/src/utils/text2speech.py
deleted file mode 100644
index 00d165b6cc7774fd200929aafa0ff3b15916111e..0000000000000000000000000000000000000000
--- a/src/src/utils/text2speech.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import os
-import tempfile
-from TTS.api import TTS
-
-
-class TTSTalker():
-    def __init__(self) -> None:
-        model_name = TTS.list_models()[0]
-        self.tts = TTS(model_name)
-
-    def test(self, text, language='en'):
-
-        tempf  = tempfile.NamedTemporaryFile(
-                delete = False,
-                suffix = ('.'+'wav'),
-            )
-
-        self.tts.tts_to_file(text, speaker=self.tts.speakers[0], language=language, file_path=tempf.name)
-
-        return tempf.name
\ No newline at end of file
diff --git a/src/src/utils/videoio.py b/src/src/utils/videoio.py
deleted file mode 100644
index d16ee667713a16e3f9644fcc3cb3e023bc2c9102..0000000000000000000000000000000000000000
--- a/src/src/utils/videoio.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import shutil
-import uuid
-
-import os
-
-import cv2
-
-def load_video_to_cv2(input_path):
-    video_stream = cv2.VideoCapture(input_path)
-    fps = video_stream.get(cv2.CAP_PROP_FPS)
-    full_frames = [] 
-    while 1:
-        still_reading, frame = video_stream.read()
-        if not still_reading:
-            video_stream.release()
-            break 
-        full_frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-    return full_frames
-
-def save_video_with_watermark(video, audio, save_path, watermark=False):
-    temp_file = str(uuid.uuid4())+'.mp4'
-    cmd = r'ffmpeg -y -hide_banner -loglevel error -i "%s" -i "%s" -vcodec mpeg4 "%s"' % (video, audio, temp_file)
-    os.system(cmd)
-
-    if watermark is False:
-        shutil.move(temp_file, save_path)
-    else:
-        # watermark
-        try:
-            ##### check if stable-diffusion-webui
-            import webui
-            from modules import paths
-            watarmark_path = paths.script_path+"/extensions/SadTalker/docs/sadtalker_logo.png"
-        except:
-            # get the root path of sadtalker.
-            dir_path = os.path.dirname(os.path.realpath(__file__))
-            watarmark_path = dir_path+"/../../docs/sadtalker_logo.png"
-
-        cmd = r'ffmpeg -y -hide_banner -loglevel error -i "%s" -i "%s" -filter_complex "[1]scale=100:-1[wm];[0][wm]overlay=(main_w-overlay_w)-10:10" "%s"' % (temp_file, watarmark_path, save_path)
-        os.system(cmd)
-        os.remove(temp_file)
\ No newline at end of file
diff --git a/src/utils/__pycache__/audio.cpython-38.pyc b/src/utils/__pycache__/audio.cpython-38.pyc
deleted file mode 100644
index 21e08988bd6845f5224b908e480384073f916c9a..0000000000000000000000000000000000000000
Binary files a/src/utils/__pycache__/audio.cpython-38.pyc and /dev/null differ
diff --git a/src/utils/__pycache__/croper.cpython-38.pyc b/src/utils/__pycache__/croper.cpython-38.pyc
deleted file mode 100644
index f638ebbf26fd296aaac8ddfbc5146224e643fc4b..0000000000000000000000000000000000000000
Binary files a/src/utils/__pycache__/croper.cpython-38.pyc and /dev/null differ
diff --git a/src/utils/__pycache__/face_enhancer.cpython-38.pyc b/src/utils/__pycache__/face_enhancer.cpython-38.pyc
deleted file mode 100644
index 7ee8c0a1b927ca7bddb6a492f630fe2568655ed7..0000000000000000000000000000000000000000
Binary files a/src/utils/__pycache__/face_enhancer.cpython-38.pyc and /dev/null differ
diff --git a/src/utils/__pycache__/hparams.cpython-38.pyc b/src/utils/__pycache__/hparams.cpython-38.pyc
deleted file mode 100644
index b58160de56c4dafb06e034254eca72657ca3f656..0000000000000000000000000000000000000000
Binary files a/src/utils/__pycache__/hparams.cpython-38.pyc and /dev/null differ
diff --git a/src/utils/__pycache__/preprocess.cpython-38.pyc b/src/utils/__pycache__/preprocess.cpython-38.pyc
deleted file mode 100644
index 1da2714b7d6cc0c461888ccc1d01b1faddc08d32..0000000000000000000000000000000000000000
Binary files a/src/utils/__pycache__/preprocess.cpython-38.pyc and /dev/null differ
diff --git a/src/utils/init_path.py b/src/utils/init_path.py
index 5f38d11907bd0dc789992062ce7f02d8876c638f..18ca81eb81f564f44fd376667168807e4e976a36 100644
--- a/src/utils/init_path.py
+++ b/src/utils/init_path.py
@@ -35,6 +35,8 @@ def init_path(checkpoint_dir, config_dir, size=512, old_version=False, preproces
     sadtalker_paths['dir_of_BFM_fitting'] = os.path.join(config_dir) # , 'BFM_Fitting'
     sadtalker_paths['audio2pose_yaml_path'] = os.path.join(config_dir, 'auido2pose.yaml')
     sadtalker_paths['audio2exp_yaml_path'] = os.path.join(config_dir, 'auido2exp.yaml')
+    sadtalker_paths['pirender_yaml_path'] = os.path.join(config_dir, 'facerender_pirender.yaml')
+    sadtalker_paths['pirender_checkpoint'] =  os.path.join(checkpoint_dir, 'epoch_00190_iteration_000400000_checkpoint.pt')
     sadtalker_paths['use_safetensor'] =  use_safetensor # os.path.join(config_dir, 'auido2exp.yaml')
 
     if 'full' in preprocess:
diff --git a/src/utils/videoio.py b/src/utils/videoio.py
index 08bfbdd7d4be97dc17fea4ad7b2733e9eb0ef975..d16ee667713a16e3f9644fcc3cb3e023bc2c9102 100644
--- a/src/utils/videoio.py
+++ b/src/utils/videoio.py
@@ -19,7 +19,7 @@ def load_video_to_cv2(input_path):
 
 def save_video_with_watermark(video, audio, save_path, watermark=False):
     temp_file = str(uuid.uuid4())+'.mp4'
-    cmd = r'ffmpeg -y -hide_banner -loglevel error -i "%s" -i "%s" -vcodec copy "%s"' % (video, audio, temp_file)
+    cmd = r'ffmpeg -y -hide_banner -loglevel error -i "%s" -i "%s" -vcodec mpeg4 "%s"' % (video, audio, temp_file)
     os.system(cmd)
 
     if watermark is False: