Spaces:

feifeifeiliu
/

TalkSHOW

Build error

App Files Files Community

feifeifeiliu commited on Apr 3, 2023

Commit

865fd8a

1 Parent(s): d8f41ae

first version

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +7 -0
README.md +2 -2
__init__.py +0 -0
app.py +282 -0
config/LS3DCG.json +60 -0
config/body_pixel.json +63 -0
config/body_vq.json +62 -0
config/face.json +59 -0
data_utils/__init__.py +3 -0
data_utils/__pycache__/__init__.cpython-37.pyc +0 -0
data_utils/__pycache__/consts.cpython-37.pyc +0 -0
data_utils/__pycache__/dataloader_torch.cpython-37.pyc +0 -0
data_utils/__pycache__/lower_body.cpython-37.pyc +0 -0
data_utils/__pycache__/mesh_dataset.cpython-37.pyc +0 -0
data_utils/__pycache__/rotation_conversion.cpython-37.pyc +0 -0
data_utils/__pycache__/utils.cpython-37.pyc +0 -0
data_utils/axis2matrix.py +29 -0
data_utils/consts.py +0 -0
data_utils/dataloader_torch.py +279 -0
data_utils/dataset_preprocess.py +170 -0
data_utils/get_j.py +51 -0
data_utils/hand_component.json +0 -0
data_utils/lower_body.py +143 -0
data_utils/mesh_dataset.py +348 -0
data_utils/rotation_conversion.py +551 -0
data_utils/utils.py +333 -0
demo/1st-page/1st-page-upper.mp4 +0 -0
demo/1st-page/1st-page-upper.npy +3 -0
demo/french/french.mp4 +0 -0
demo/french/french.npy +3 -0
demo/rich/rich.mp4 +3 -0
demo/rich/rich.npy +3 -0
demo/song/cut.mp4 +0 -0
demo/song/song.mp4 +3 -0
demo/song/song.npy +3 -0
demo/style/chemistry.mp4 +0 -0
demo/style/chemistry.npy +3 -0
demo/style/conan.mp4 +0 -0
demo/style/conan.npy +3 -0
demo/style/diversity.mp4 +3 -0
demo/style/diversity.npy +3 -0
demo/style/face.mp4 +0 -0
demo/style/face.npy +3 -0
demo/style/oliver.mp4 +0 -0
demo/style/oliver.npy +3 -0
demo/style/seth.mp4 +0 -0
demo/style/seth.npy +3 -0
demo_audio/1st-page.wav +0 -0
demo_audio/french.wav +0 -0
demo_audio/rich.wav +3 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+demo_audio/rich_short.wav filter=lfs diff=lfs merge=lfs -text
+demo_audio/rich.wav filter=lfs diff=lfs merge=lfs -text
+demo_audio/song.wav filter=lfs diff=lfs merge=lfs -text
+demo/rich/rich.mp4 filter=lfs diff=lfs merge=lfs -text
+demo/song/song.mp4 filter=lfs diff=lfs merge=lfs -text
+demo/style/diversity.mp4 filter=lfs diff=lfs merge=lfs -text
+visualise/teaser_01.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: TalkSHOW
-emoji: 🏃
 colorFrom: pink
-colorTo: green
 sdk: gradio
 sdk_version: 3.23.0
 app_file: app.py

 ---
 title: TalkSHOW
+emoji: 🌍
 colorFrom: pink
+colorTo: red
 sdk: gradio
 sdk_version: 3.23.0
 app_file: app.py

__init__.py ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,282 @@

+import gradio as gr
+import os
+import sys
+sys.path.append(os.getcwd())
+os.system(r"cd mesh-master")
+os.system(r"make all")
+os.system(r"cd ..")
+from transformers import Wav2Vec2Processor
+import numpy as np
+import json
+import smplx as smpl
+from nets import *
+from trainer.options import parse_args
+from data_utils import torch_data
+from trainer.config import load_JsonConfig
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils import data
+from data_utils.rotation_conversion import rotation_6d_to_matrix, matrix_to_axis_angle
+from data_utils.lower_body import part2full, pred2poses, poses2pred, poses2poses
+from visualise.rendering import RenderTool
+global device
+device = 'cpu'
+def init_model(model_name, model_path, args, config):
+    if model_name == 's2g_face':
+        generator = s2g_face(
+            args,
+            config,
+        )
+    elif model_name == 's2g_body_vq':
+        generator = s2g_body_vq(
+            args,
+            config,
+        )
+    elif model_name == 's2g_body_pixel':
+        generator = s2g_body_pixel(
+            args,
+            config,
+        )
+    elif model_name == 's2g_LS3DCG':
+        generator = LS3DCG(
+            args,
+            config,
+        )
+    else:
+        raise NotImplementedError
+    model_ckpt = torch.load(model_path, map_location=torch.device('cpu'))
+    if model_name == 'smplx_S2G':
+        generator.generator.load_state_dict(model_ckpt['generator']['generator'])
+    elif 'generator' in list(model_ckpt.keys()):
+        generator.load_state_dict(model_ckpt['generator'])
+    else:
+        model_ckpt = {'generator': model_ckpt}
+        generator.load_state_dict(model_ckpt)
+    return generator
+def get_vertices(smplx_model, betas, result_list, exp, require_pose=False):
+    vertices_list = []
+    poses_list = []
+    expression = torch.zeros([1, 100])
+    for i in result_list:
+        vertices = []
+        poses = []
+        for j in range(i.shape[0]):
+            output = smplx_model(betas=betas,
+                                 expression=i[j][165:265].unsqueeze_(dim=0) if exp else expression,
+                                 jaw_pose=i[j][0:3].unsqueeze_(dim=0),
+                                 leye_pose=i[j][3:6].unsqueeze_(dim=0),
+                                 reye_pose=i[j][6:9].unsqueeze_(dim=0),
+                                 global_orient=i[j][9:12].unsqueeze_(dim=0),
+                                 body_pose=i[j][12:75].unsqueeze_(dim=0),
+                                 left_hand_pose=i[j][75:120].unsqueeze_(dim=0),
+                                 right_hand_pose=i[j][120:165].unsqueeze_(dim=0),
+                                 return_verts=True)
+            vertices.append(output.vertices.detach().cpu().numpy().squeeze())
+            # pose = torch.cat([output.body_pose, output.left_hand_pose, output.right_hand_pose], dim=1)
+            pose = output.body_pose
+            poses.append(pose.detach().cpu())
+        vertices = np.asarray(vertices)
+        vertices_list.append(vertices)
+        poses = torch.cat(poses, dim=0)
+        poses_list.append(poses)
+    if require_pose:
+        return vertices_list, poses_list
+    else:
+        return vertices_list, None
+global_orient = torch.tensor([3.0747, -0.0158, -0.0152])
+parser = parse_args()
+args = parser.parse_args()
+RUN_MODE = "local"
+if RUN_MODE != "local":
+    os.system("wget -P experiments/2022-10-15-smplx_S2G-face-3d/ "
+              "https://huggingface.co/feifeifeiliu/TalkSHOW/resolve/main/2022-10-15-smplx_S2G-face-3d/ckpt-99.pth")
+    os.system("wget -P experiments/2022-10-31-smplx_S2G-body-vq-3d/ "
+              "https://huggingface.co/feifeifeiliu/TalkSHOW/resolve/main/2022-10-31-smplx_S2G-body-vq-3d/ckpt-99.pth")
+    os.system("wget -P experiments/2022-11-02-smplx_S2G-body-pixel-3d/ "
+              "https://huggingface.co/feifeifeiliu/TalkSHOW/resolve/main/2022-11-02-smplx_S2G-body-pixel-3d/ckpt-99.pth")
+    os.system("wget -P visualise/smplx/ "
+              "https://huggingface.co/feifeifeiliu/TalkSHOW/resolve/main/smplx/SMPLX_NEUTRAL.npz")
+config = load_JsonConfig("config/body_pixel.json")
+face_model_name = args.face_model_name
+face_model_path = args.face_model_path
+body_model_name = args.body_model_name
+body_model_path = args.body_model_path
+smplx_path = './visualise/'
+os.environ['smplx_npz_path'] = config.smplx_npz_path
+os.environ['extra_joint_path'] = config.extra_joint_path
+os.environ['j14_regressor_path'] = config.j14_regressor_path
+print('init model...')
+g_body = init_model(body_model_name, body_model_path, args, config)
+generator2 = None
+g_face = init_model(face_model_name, face_model_path, args, config)
+print('init smlpx model...')
+dtype = torch.float64
+model_params = dict(model_path=smplx_path,
+                    model_type='smplx',
+                    create_global_orient=True,
+                    create_body_pose=True,
+                    create_betas=True,
+                    num_betas=300,
+                    create_left_hand_pose=True,
+                    create_right_hand_pose=True,
+                    use_pca=False,
+                    flat_hand_mean=False,
+                    create_expression=True,
+                    num_expression_coeffs=100,
+                    num_pca_comps=12,
+                    create_jaw_pose=True,
+                    create_leye_pose=True,
+                    create_reye_pose=True,
+                    create_transl=False,
+                    # gender='ne',
+                    dtype=dtype, )
+smplx_model = smpl.create(**model_params).to(device)
+print('init rendertool...')
+rendertool = RenderTool('visualise/video/' + config.Log.name)
+def infer(wav, identity, pose):
+    betas = torch.zeros([1, 300], dtype=torch.float64).to(device)
+    am = Wav2Vec2Processor.from_pretrained("vitouphy/wav2vec2-xls-r-300m-phoneme")
+    am_sr = 16000
+    num_sample = args.num_sample
+    cur_wav_file = wav
+    if pose == 'Stand':
+        stand = True
+        face = False
+    elif pose == 'Sit':
+        stand = False
+        face = False
+    else:
+        stand = False
+        face = True
+    if face:
+        body_static = torch.zeros([1, 162], device=device)
+        body_static[:, 6:9] = torch.tensor([3.0747, -0.0158, -0.0152]).reshape(1, 3).repeat(body_static.shape[0], 1)
+    if identity == 'Oliver':
+        id = 0
+    elif identity == 'Chemistry':
+        id = 1
+    elif identity == 'Seth':
+        id = 2
+    elif identity == 'Conan':
+        id = 3
+    result_list = []
+    pred_face = g_face.infer_on_audio(cur_wav_file,
+                                      initial_pose=None,
+                                      norm_stats=None,
+                                      w_pre=False,
+                                      # id=id,
+                                      frame=None,
+                                      am=am,
+                                      am_sr=am_sr
+                                      )
+    pred_face = torch.tensor(pred_face).squeeze().to(device)
+    # pred_face = torch.zeros([gt.shape[0], 105])
+    if config.Data.pose.convert_to_6d:
+        pred_jaw = pred_face[:, :6].reshape(pred_face.shape[0], -1, 6)
+        pred_jaw = matrix_to_axis_angle(rotation_6d_to_matrix(pred_jaw)).reshape(pred_face.shape[0], -1)
+        pred_face = pred_face[:, 6:]
+    else:
+        pred_jaw = pred_face[:, :3]
+        pred_face = pred_face[:, 3:]
+    id = torch.tensor([id], device=device)
+    for i in range(num_sample):
+        pred_res = g_body.infer_on_audio(cur_wav_file,
+                                         initial_pose=None,
+                                         norm_stats=None,
+                                         txgfile=None,
+                                         id=id,
+                                         var=None,
+                                         fps=30,
+                                         w_pre=False
+                                         )
+        pred = torch.tensor(pred_res).squeeze().to(device)
+        if pred.shape[0] < pred_face.shape[0]:
+            repeat_frame = pred[-1].unsqueeze(dim=0).repeat(pred_face.shape[0] - pred.shape[0], 1)
+            pred = torch.cat([pred, repeat_frame], dim=0)
+        else:
+            pred = pred[:pred_face.shape[0], :]
+        body_or_face = False
+        if pred.shape[1] < 275:
+            body_or_face = True
+        if config.Data.pose.convert_to_6d:
+            pred = pred.reshape(pred.shape[0], -1, 6)
+            pred = matrix_to_axis_angle(rotation_6d_to_matrix(pred))
+            pred = pred.reshape(pred.shape[0], -1)
+        if config.Model.model_name == 's2g_LS3DCG':
+            pred = torch.cat([pred[:, :3], pred[:, 103:], pred[:, 3:103]], dim=-1)
+        else:
+            pred = torch.cat([pred_jaw, pred, pred_face], dim=-1)
+        # pred[:, 9:12] = global_orient
+        pred = part2full(pred, stand)
+        if face:
+            pred = torch.cat([pred[:, :3], body_static.repeat(pred.shape[0], 1), pred[:, -100:]], dim=-1)
+        # result_list[0] = poses2pred(result_list[0], stand)
+        # if gt_0 is None:
+        #     gt_0 = gt
+        # pred = pred2poses(pred, gt_0)
+        # result_list[0] = poses2poses(result_list[0], gt_0)
+        result_list.append(pred)
+    vertices_list, _ = get_vertices(smplx_model, betas, result_list, config.Data.pose.expression)
+    result_list = [res.to('cpu') for res in result_list]
+    dict = np.concatenate(result_list[:], axis=0)
+    rendertool._render_sequences(cur_wav_file, vertices_list, stand=stand, face=face, whole_body=args.whole_body)
+    return "result.mp4"
+def main():
+    iface = gr.Interface(fn=infer, inputs=["audio",
+                                           gr.Radio(["Oliver", "Chemistry", "Seth", "Conan"]),
+                                           gr.Radio(["Stand", "Sit", "Only Face"]),
+                                           ],
+                         outputs="video",
+                         examples=[[os.path.join(os.path.dirname(__file__), "demo_audio/style.wav"), "Oliver", "Sit"]])
+    iface.launch(debug=True)
+if __name__ == '__main__':
+    main()

config/LS3DCG.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "config_root_path": "/is/cluster/scratch/hyi/ExpressiveBody/SMPLifyX4/scripts",
+  "dataset_load_mode": "pickle",
+  "store_file_path": "store.pkl",
+  "smplx_npz_path": "visualise/smplx_model/SMPLX_NEUTRAL_2020.npz",
+  "extra_joint_path": "visualise/smplx_model/smplx_extra_joints.yaml",
+  "j14_regressor_path": "visualise/smplx_model/SMPLX_to_J14.pkl",
+  "param": {
+    "w_j": 1,
+    "w_b": 1,
+    "w_h": 1
+  },
+  "Data": {
+    "data_root": "../ExpressiveWholeBodyDatasetv1.0/",
+    "pklname": "_3d_mfcc.pkl",
+    "whole_video": false,
+    "pose": {
+      "normalization": false,
+      "convert_to_6d": false,
+      "norm_method": "all",
+      "augmentation": false,
+      "generate_length": 88,
+      "pre_pose_length": 0,
+      "pose_dim": 99,
+      "expression": true
+    },
+    "aud": {
+      "feat_method": "mfcc",
+      "aud_feat_dim": 64,
+      "aud_feat_win_size": null,
+      "context_info": false
+    }
+  },
+  "Model": {
+    "model_type": "body",
+    "model_name": "s2g_LS3DCG",
+    "code_num": 2048,
+    "AudioOpt": "Adam",
+    "encoder_choice": "mfcc",
+    "gan": false,
+  },
+  "DataLoader": {
+    "batch_size": 128,
+    "num_workers": 0
+  },
+  "Train": {
+    "epochs": 100,
+    "max_gradient_norm": 5,
+    "learning_rate": {
+      "generator_learning_rate": 1e-4,
+      "discriminator_learning_rate": 1e-4
+    }
+  },
+  "Log": {
+    "save_every": 50,
+    "print_every": 200,
+    "name": "LS3DCG"
+  }
+}

config/body_pixel.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "config_root_path": "/is/cluster/scratch/hyi/ExpressiveBody/SMPLifyX4/scripts",
+  "dataset_load_mode": "pickle",
+  "store_file_path": "store.pkl",
+  "smplx_npz_path": "visualise/smplx_model/SMPLX_NEUTRAL_2020.npz",
+  "extra_joint_path": "visualise/smplx_model/smplx_extra_joints.yaml",
+  "j14_regressor_path": "visualise/smplx_model/SMPLX_to_J14.pkl",
+  "param": {
+    "w_j": 1,
+    "w_b": 1,
+    "w_h": 1
+  },
+  "Data": {
+    "data_root": "../ExpressiveWholeBodyDatasetv1.0/",
+    "pklname": "_3d_mfcc.pkl",
+    "whole_video": false,
+    "pose": {
+      "normalization": false,
+      "convert_to_6d": false,
+      "norm_method": "all",
+      "augmentation": false,
+      "generate_length": 88,
+      "pre_pose_length": 0,
+      "pose_dim": 99,
+      "expression": true
+    },
+    "aud": {
+      "feat_method": "mfcc",
+      "aud_feat_dim": 64,
+      "aud_feat_win_size": null,
+      "context_info": false
+    }
+  },
+  "Model": {
+    "model_type": "body",
+    "model_name": "s2g_body_pixel",
+    "composition": true,
+    "code_num": 2048,
+    "bh_model": true,
+    "AudioOpt": "Adam",
+    "encoder_choice": "mfcc",
+    "gan": false,
+    "vq_path": "./experiments/2022-10-31-smplx_S2G-body-vq-3d/ckpt-99.pth"
+  },
+  "DataLoader": {
+    "batch_size": 128,
+    "num_workers": 0
+  },
+  "Train": {
+    "epochs": 100,
+    "max_gradient_norm": 5,
+    "learning_rate": {
+      "generator_learning_rate": 1e-4,
+      "discriminator_learning_rate": 1e-4
+    }
+  },
+  "Log": {
+    "save_every": 50,
+    "print_every": 200,
+    "name": "body-pixel2"
+  }
+}

config/body_vq.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "config_root_path": "/is/cluster/scratch/hyi/ExpressiveBody/SMPLifyX4/scripts",
+  "dataset_load_mode": "pickle",
+  "store_file_path": "store.pkl",
+  "smplx_npz_path": "visualise/smplx_model/SMPLX_NEUTRAL_2020.npz",
+  "extra_joint_path": "visualise/smplx_model/smplx_extra_joints.yaml",
+  "j14_regressor_path": "visualise/smplx_model/SMPLX_to_J14.pkl",
+  "param": {
+    "w_j": 1,
+    "w_b": 1,
+    "w_h": 1
+  },
+  "Data": {
+    "data_root": "../expressive_body-V0.7/",
+    "pklname": "_3d_mfcc.pkl",
+    "whole_video": false,
+    "pose": {
+      "normalization": false,
+      "convert_to_6d": false,
+      "norm_method": "all",
+      "augmentation": false,
+      "generate_length": 88,
+      "pre_pose_length": 0,
+      "pose_dim": 99,
+      "expression": true
+    },
+    "aud": {
+      "feat_method": "mfcc",
+      "aud_feat_dim": 64,
+      "aud_feat_win_size": null,
+      "context_info": false
+    }
+  },
+  "Model": {
+    "model_type": "body",
+    "model_name": "s2g_body_vq",
+    "composition": false,
+    "code_num": 2048,
+    "bh_model": true,
+    "AudioOpt": "Adam",
+    "encoder_choice": "mfcc",
+    "gan": false
+  },
+  "DataLoader": {
+    "batch_size": 128,
+    "num_workers": 0
+  },
+  "Train": {
+    "epochs": 100,
+    "max_gradient_norm": 5,
+    "learning_rate": {
+      "generator_learning_rate": 1e-4,
+      "discriminator_learning_rate": 1e-4
+    }
+  },
+  "Log": {
+    "save_every": 50,
+    "print_every": 200,
+    "name": "test"
+  }
+}

config/face.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "config_root_path": "/is/cluster/scratch/hyi/ExpressiveBody/SMPLifyX4/scripts",
+  "dataset_load_mode": "json",
+  "store_file_path": "store.pkl",
+  "smplx_npz_path": "visualise/smplx_model/SMPLX_NEUTRAL_2020.npz",
+  "extra_joint_path": "visualise/smplx_model/smplx_extra_joints.yaml",
+  "j14_regressor_path": "visualise/smplx_model/SMPLX_to_J14.pkl",
+  "param": {
+    "w_j": 1,
+    "w_b": 1,
+    "w_h": 1
+  },
+  "Data": {
+    "data_root": "../ExpressiveWholeBodyDatasetv1.0/",
+    "pklname": "_3d_wv2.pkl",
+    "whole_video": true,
+    "pose": {
+      "normalization": false,
+      "convert_to_6d": false,
+      "norm_method": "all",
+      "augmentation": false,
+      "generate_length": 88,
+      "pre_pose_length": 0,
+      "pose_dim": 99,
+      "expression": true
+    },
+    "aud": {
+      "feat_method": "mfcc",
+      "aud_feat_dim": 64,
+      "aud_feat_win_size": null,
+      "context_info": false
+    }
+  },
+  "Model": {
+    "model_type": "face",
+    "model_name": "s2g_face",
+    "AudioOpt": "SGD",
+    "encoder_choice": "faceformer",
+    "gan": false
+  },
+  "DataLoader": {
+    "batch_size": 1,
+    "num_workers": 0
+  },
+  "Train": {
+    "epochs": 100,
+    "max_gradient_norm": 5,
+    "learning_rate": {
+      "generator_learning_rate": 1e-4,
+      "discriminator_learning_rate": 1e-4
+    }
+  },
+  "Log": {
+    "save_every": 50,
+    "print_every": 1000,
+    "name": "face"
+  }
+}

data_utils/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+# from .dataloader_csv import MultiVidData as csv_data
+from .dataloader_torch import MultiVidData as torch_data
+from .utils import get_melspec, get_mfcc, get_mfcc_old, get_mfcc_psf, get_mfcc_psf_min, get_mfcc_ta

data_utils/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (375 Bytes). View file

data_utils/__pycache__/consts.cpython-37.pyc ADDED Viewed

Binary file (92.7 kB). View file

data_utils/__pycache__/dataloader_torch.cpython-37.pyc ADDED Viewed

Binary file (5.31 kB). View file

data_utils/__pycache__/lower_body.cpython-37.pyc ADDED Viewed

Binary file (3.91 kB). View file

data_utils/__pycache__/mesh_dataset.cpython-37.pyc ADDED Viewed

Binary file (7.9 kB). View file

data_utils/__pycache__/rotation_conversion.cpython-37.pyc ADDED Viewed

Binary file (16.4 kB). View file

data_utils/__pycache__/utils.cpython-37.pyc ADDED Viewed

Binary file (7.77 kB). View file

data_utils/axis2matrix.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import numpy as np
+import math
+import scipy.linalg as linalg
+def rotate_mat(axis, radian):
+    a = np.cross(np.eye(3), axis / linalg.norm(axis) * radian)
+    rot_matrix = linalg.expm(a)
+    return rot_matrix
+def aaa2mat(axis, sin, cos):
+    i = np.eye(3)
+    nnt = np.dot(axis.T, axis)
+    s = np.asarray([[0, -axis[0,2], axis[0,1]],
+                    [axis[0,2], 0, -axis[0,0]],
+                    [-axis[0,1], axis[0,0], 0]])
+    r = cos * i + (1-cos)*nnt +sin * s
+    return r
+rand_axis = np.asarray([[1,0,0]])
+#旋转角度
+r = math.pi/2
+#返回旋转矩阵
+rot_matrix = rotate_mat(rand_axis, r)
+r2 = aaa2mat(rand_axis, np.sin(r), np.cos(r))
+print(rot_matrix)

data_utils/consts.py ADDED Viewed

The diff for this file is too large to render. See raw diff

data_utils/dataloader_torch.py ADDED Viewed

	@@ -0,0 +1,279 @@

+import sys
+import os
+sys.path.append(os.getcwd())
+import os
+from tqdm import tqdm
+from data_utils.utils import *
+import torch.utils.data as data
+from data_utils.mesh_dataset import SmplxDataset
+from transformers import Wav2Vec2Processor
+class MultiVidData():
+    def __init__(self,
+                data_root,
+                speakers,
+                split='train',
+                limbscaling=False,
+                normalization=False,
+                norm_method='new',
+                split_trans_zero=False,
+                num_frames=25,
+                num_pre_frames=25,
+                num_generate_length=None,
+                aud_feat_win_size=None,
+                aud_feat_dim=64,
+                feat_method='mel_spec',
+                context_info=False,
+                smplx=False,
+                audio_sr=16000,
+                convert_to_6d=False,
+                expression=False,
+                config=None
+                ):
+        self.data_root = data_root
+        self.speakers = speakers
+        self.split = split
+        if split == 'pre':
+            self.split = 'train'
+        self.norm_method=norm_method
+        self.normalization = normalization
+        self.limbscaling = limbscaling
+        self.convert_to_6d = convert_to_6d
+        self.num_frames=num_frames
+        self.num_pre_frames=num_pre_frames
+        if num_generate_length is None:
+            self.num_generate_length = num_frames
+        else:
+            self.num_generate_length = num_generate_length
+        self.split_trans_zero=split_trans_zero
+        dataset = SmplxDataset
+        if self.split_trans_zero:
+            self.trans_dataset_list = []
+            self.zero_dataset_list = []
+        else:
+            self.all_dataset_list = []
+        self.dataset={}
+        self.complete_data=[]
+        self.config=config
+        load_mode=self.config.dataset_load_mode
+        ######################load with pickle file
+        if load_mode=='pickle':
+            import pickle
+            import subprocess
+            # store_file_path='/tmp/store.pkl'
+            # cp /is/cluster/scratch/hyi/ExpressiveBody/SMPLifyX4/scripts/store.pkl /tmp/store.pkl
+            # subprocess.run(f'cp /is/cluster/scratch/hyi/ExpressiveBody/SMPLifyX4/scripts/store.pkl {store_file_path}',shell=True)
+            # f = open(self.config.store_file_path, 'rb+')
+            f = open(self.split+config.Data.pklname, 'rb+')
+            self.dataset=pickle.load(f)
+            f.close()
+            for key in self.dataset:
+                self.complete_data.append(self.dataset[key].complete_data)
+        ######################load with pickle file
+        ######################load with a csv file
+        elif load_mode=='csv':
+            # 这里从我的一个code文件夹导入的，后续再完善进来
+            try:
+                sys.path.append(self.config.config_root_path)
+                from config import config_path
+                from csv_parser import csv_parse
+            except ImportError as e:
+                print(f'err: {e}')
+                raise ImportError('config root path error...')
+            for speaker_name in self.speakers:
+                # df_intervals=pd.read_csv(self.config.voca_csv_file_path)
+                df_intervals=None
+                df_intervals=df_intervals[df_intervals['speaker']==speaker_name]
+                df_intervals = df_intervals[df_intervals['dataset'] == self.split]
+                print(f'speaker {speaker_name} train interval length: {len(df_intervals)}')
+                for iter_index, (_, interval) in tqdm(
+                        (enumerate(df_intervals.iterrows())),desc=f'load {speaker_name}'
+                ):
+                    (
+                        interval_index,
+                        interval_speaker,
+                        interval_video_fn,
+                        interval_id,
+                        start_time,
+                        end_time,
+                        duration_time,
+                        start_time_10,
+                        over_flow_flag,
+                        short_dur_flag,
+                        big_video_dir,
+                        small_video_dir_name,
+                        speaker_video_path,
+                        voca_basename,
+                        json_basename,
+                        wav_basename,
+                        voca_top_clip_path,
+                        voca_json_clip_path,
+                        voca_wav_clip_path,
+                        audio_output_fn,
+                        image_output_path,
+                        pifpaf_output_path,
+                        mp_output_path,
+                        op_output_path,
+                        deca_output_path,
+                        pixie_output_path,
+                        cam_output_path,
+                        ours_output_path,
+                        merge_output_path,
+                        multi_output_path,
+                        gt_output_path,
+                        ours_images_path,
+                        pkl_fil_path,
+                    )=csv_parse(interval)
+                    if not os.path.exists(pkl_fil_path) or not os.path.exists(audio_output_fn):
+                        continue
+                    key=f'{interval_video_fn}/{small_video_dir_name}'
+                    self.dataset[key] = dataset(
+                        data_root=pkl_fil_path,
+                        speaker=speaker_name,
+                        audio_fn=audio_output_fn,
+                        audio_sr=audio_sr,
+                        fps=num_frames,
+                        feat_method=feat_method,
+                        audio_feat_dim=aud_feat_dim,
+                        train=(self.split == 'train'),
+                        load_all=True,
+                        split_trans_zero=self.split_trans_zero,
+                        limbscaling=self.limbscaling,
+                        num_frames=self.num_frames,
+                        num_pre_frames=self.num_pre_frames,
+                        num_generate_length=self.num_generate_length,
+                        audio_feat_win_size=aud_feat_win_size,
+                        context_info=context_info,
+                        convert_to_6d=convert_to_6d,
+                        expression=expression,
+                        config=self.config
+                    )
+                    self.complete_data.append(self.dataset[key].complete_data)
+        ######################load with a csv file
+        ######################origin load method
+        elif load_mode=='json':
+            # if self.split == 'train':
+            #     import pickle
+            #     f = open('store.pkl', 'rb+')
+            #     self.dataset=pickle.load(f)
+            #     f.close()
+            #     for key in self.dataset:
+            #         self.complete_data.append(self.dataset[key].complete_data)
+            # else:https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav
+            # if config.Model.model_type == 'face':
+            am = Wav2Vec2Processor.from_pretrained("vitouphy/wav2vec2-xls-r-300m-phoneme")
+            am_sr = 16000
+            # else:
+            #     am, am_sr = None, None
+            for speaker_name in self.speakers:
+                speaker_root = os.path.join(self.data_root, speaker_name)
+                videos=[v for v in os.listdir(speaker_root) ]
+                print(videos)
+                haode = huaide = 0
+                for vid in tqdm(videos, desc="Processing training data of {}......".format(speaker_name)):
+                    source_vid=vid
+                    # vid_pth=os.path.join(speaker_root, source_vid, 'images/half', self.split)
+                    vid_pth = os.path.join(speaker_root, source_vid, self.split)
+                    if smplx == 'pose':
+                        seqs = [s for s in os.listdir(vid_pth) if (s.startswith('clip'))]
+                    else:
+                        try:
+                            seqs = [s for s in os.listdir(vid_pth)]
+                        except:
+                            continue
+                    for s in seqs:
+                        seq_root=os.path.join(vid_pth, s)
+                        key = seq_root # correspond to clip******
+                        audio_fname = os.path.join(speaker_root, source_vid, self.split, s, '%s.wav' % (s))
+                        motion_fname = os.path.join(speaker_root, source_vid, self.split, s, '%s.pkl' % (s))
+                        if not os.path.isfile(audio_fname) or not os.path.isfile(motion_fname):
+                            huaide = huaide + 1
+                            continue
+                        self.dataset[key]=dataset(
+                            data_root=seq_root,
+                            speaker=speaker_name,
+                            motion_fn=motion_fname,
+                            audio_fn=audio_fname,
+                            audio_sr=audio_sr,
+                            fps=num_frames,
+                            feat_method=feat_method,
+                            audio_feat_dim=aud_feat_dim,
+                            train=(self.split=='train'),
+                            load_all=True,
+                            split_trans_zero=self.split_trans_zero,
+                            limbscaling=self.limbscaling,
+                            num_frames=self.num_frames,
+                            num_pre_frames=self.num_pre_frames,
+                            num_generate_length=self.num_generate_length,
+                            audio_feat_win_size=aud_feat_win_size,
+                            context_info=context_info,
+                            convert_to_6d=convert_to_6d,
+                            expression=expression,
+                            config=self.config,
+                            am=am,
+                            am_sr=am_sr,
+                            whole_video=config.Data.whole_video
+                        )
+                        self.complete_data.append(self.dataset[key].complete_data)
+                        haode = haode + 1
+                print("huaide:{}, haode:{}".format(huaide, haode))
+            import pickle
+            f = open(self.split+config.Data.pklname, 'wb')
+            pickle.dump(self.dataset, f)
+            f.close()
+        ######################origin load method
+        self.complete_data=np.concatenate(self.complete_data, axis=0)
+        # assert self.complete_data.shape[-1] == (12+21+21)*2
+        self.normalize_stats = {}
+        self.data_mean = None
+        self.data_std = None
+    def get_dataset(self):
+        self.normalize_stats['mean'] = self.data_mean
+        self.normalize_stats['std'] = self.data_std
+        for key in list(self.dataset.keys()):
+            if self.dataset[key].complete_data.shape[0] < self.num_generate_length:
+                continue
+            self.dataset[key].num_generate_length = self.num_generate_length
+            self.dataset[key].get_dataset(self.normalization, self.normalize_stats, self.split)
+            self.all_dataset_list.append(self.dataset[key].all_dataset)
+        if self.split_trans_zero:
+            self.trans_dataset = data.ConcatDataset(self.trans_dataset_list)
+            self.zero_dataset = data.ConcatDataset(self.zero_dataset_list)
+        else:
+            self.all_dataset = data.ConcatDataset(self.all_dataset_list)

data_utils/dataset_preprocess.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import os
+import pickle
+from tqdm import tqdm
+import shutil
+import torch
+import numpy as np
+import librosa
+import random
+speakers = ['seth', 'conan', 'oliver', 'chemistry']
+data_root = "../ExpressiveWholeBodyDatasetv1.0/"
+split = 'train'
+def split_list(full_list,shuffle=False,ratio=0.2):
+    n_total = len(full_list)
+    offset_0 = int(n_total * ratio)
+    offset_1 = int(n_total * ratio * 2)
+    if n_total==0 or offset_1<1:
+        return [],full_list
+    if shuffle:
+        random.shuffle(full_list)
+    sublist_0 = full_list[:offset_0]
+    sublist_1 = full_list[offset_0:offset_1]
+    sublist_2 = full_list[offset_1:]
+    return sublist_0, sublist_1, sublist_2
+def moveto(list, file):
+    for f in list:
+        before, after = '/'.join(f.split('/')[:-1]), f.split('/')[-1]
+        new_path = os.path.join(before, file)
+        new_path = os.path.join(new_path, after)
+        # os.makedirs(new_path)
+        # os.path.isdir(new_path)
+        # shutil.move(f, new_path)
+        #转移到新目录
+        shutil.copytree(f, new_path)
+        #删除原train里的文件
+        shutil.rmtree(f)
+    return None
+def read_pkl(data):
+    betas = np.array(data['betas'])
+    jaw_pose = np.array(data['jaw_pose'])
+    leye_pose = np.array(data['leye_pose'])
+    reye_pose = np.array(data['reye_pose'])
+    global_orient = np.array(data['global_orient']).squeeze()
+    body_pose = np.array(data['body_pose_axis'])
+    left_hand_pose = np.array(data['left_hand_pose'])
+    right_hand_pose = np.array(data['right_hand_pose'])
+    full_body = np.concatenate(
+        (jaw_pose, leye_pose, reye_pose, global_orient, body_pose, left_hand_pose, right_hand_pose), axis=1)
+    expression = np.array(data['expression'])
+    full_body = np.concatenate((full_body, expression), axis=1)
+    if (full_body.shape[0] < 90) or (torch.isnan(torch.from_numpy(full_body)).sum() > 0):
+        return 1
+    else:
+        return 0
+for speaker_name in speakers:
+    speaker_root = os.path.join(data_root, speaker_name)
+    videos = [v for v in os.listdir(speaker_root)]
+    print(videos)
+    haode = huaide = 0
+    total_seqs = []
+    for vid in tqdm(videos, desc="Processing training data of {}......".format(speaker_name)):
+    # for vid in videos:
+        source_vid = vid
+        vid_pth = os.path.join(speaker_root, source_vid)
+        # vid_pth = os.path.join(speaker_root, source_vid, 'images/half', split)
+        t = os.path.join(speaker_root, source_vid, 'test')
+        v = os.path.join(speaker_root, source_vid, 'val')
+        # if os.path.exists(t):
+        #     shutil.rmtree(t)
+        # if os.path.exists(v):
+        #     shutil.rmtree(v)
+        try:
+            seqs = [s for s in os.listdir(vid_pth)]
+        except:
+            continue
+        # if len(seqs) == 0:
+        #     shutil.rmtree(os.path.join(speaker_root, source_vid))
+            # None
+        for s in seqs:
+            quality = 0
+            total_seqs.append(os.path.join(vid_pth,s))
+            seq_root = os.path.join(vid_pth, s)
+            key = seq_root  # correspond to clip******
+            audio_fname = os.path.join(speaker_root, source_vid, s, '%s.wav' % (s))
+            # delete the data without audio or the audio file could not be read
+            if os.path.isfile(audio_fname):
+                try:
+                    audio = librosa.load(audio_fname)
+                except:
+                    # print(key)
+                    shutil.rmtree(key)
+                    huaide = huaide + 1
+                    continue
+            else:
+                huaide = huaide + 1
+                # print(key)
+                shutil.rmtree(key)
+                continue
+            # check motion file
+            motion_fname = os.path.join(speaker_root, source_vid, s, '%s.pkl' % (s))
+            try:
+                f = open(motion_fname, 'rb+')
+            except:
+                shutil.rmtree(key)
+                huaide = huaide + 1
+                continue
+            data = pickle.load(f)
+            w = read_pkl(data)
+            f.close()
+            quality = quality + w
+            if w == 1:
+                shutil.rmtree(key)
+                # print(key)
+                huaide = huaide + 1
+                continue
+            haode = haode + 1
+    print("huaide:{}, haode:{}, total_seqs:{}".format(huaide, haode, total_seqs.__len__()))
+for speaker_name in speakers:
+    speaker_root = os.path.join(data_root, speaker_name)
+    videos = [v for v in os.listdir(speaker_root)]
+    print(videos)
+    haode = huaide = 0
+    total_seqs = []
+    for vid in tqdm(videos, desc="Processing training data of {}......".format(speaker_name)):
+        # for vid in videos:
+        source_vid = vid
+        vid_pth = os.path.join(speaker_root, source_vid)
+        try:
+            seqs = [s for s in os.listdir(vid_pth)]
+        except:
+            continue
+        for s in seqs:
+            quality = 0
+            total_seqs.append(os.path.join(vid_pth, s))
+    print("total_seqs:{}".format(total_seqs.__len__()))
+    # split the dataset
+    test_list, val_list, train_list = split_list(total_seqs, True, 0.1)
+    print(len(test_list), len(val_list), len(train_list))
+    moveto(train_list, 'train')
+    moveto(test_list, 'test')
+    moveto(val_list, 'val')

data_utils/get_j.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import torch
+def to3d(poses, config):
+    if config.Data.pose.convert_to_6d:
+        if config.Data.pose.expression:
+            poses_exp = poses[:, -100:]
+            poses = poses[:, :-100]
+        poses = poses.reshape(poses.shape[0], -1, 5)
+        sin, cos = poses[:, :, 3], poses[:, :, 4]
+        pose_angle = torch.atan2(sin, cos)
+        poses = (poses[:, :, :3] * pose_angle.unsqueeze(dim=-1)).reshape(poses.shape[0], -1)
+        if config.Data.pose.expression:
+            poses = torch.cat([poses, poses_exp], dim=-1)
+    return poses
+def get_joint(smplx_model, betas, pred):
+    joint = smplx_model(betas=betas.repeat(pred.shape[0], 1),
+                        expression=pred[:, 165:265],
+                        jaw_pose=pred[:, 0:3],
+                        leye_pose=pred[:, 3:6],
+                        reye_pose=pred[:, 6:9],
+                        global_orient=pred[:, 9:12],
+                        body_pose=pred[:, 12:75],
+                        left_hand_pose=pred[:, 75:120],
+                        right_hand_pose=pred[:, 120:165],
+                        return_verts=True)['joints']
+    return joint
+def get_joints(smplx_model, betas, pred):
+    if len(pred.shape) == 3:
+        B = pred.shape[0]
+        x = 4 if B>= 4 else B
+        T = pred.shape[1]
+        pred = pred.reshape(-1, 265)
+        smplx_model.batch_size = L = T * x
+        times = pred.shape[0] // smplx_model.batch_size
+        joints = []
+        for i in range(times):
+            joints.append(get_joint(smplx_model, betas, pred[i*L:(i+1)*L]))
+        joints = torch.cat(joints, dim=0)
+        joints = joints.reshape(B, T, -1, 3)
+    else:
+        smplx_model.batch_size = pred.shape[0]
+        joints = get_joint(smplx_model, betas, pred)
+    return joints

data_utils/hand_component.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data_utils/lower_body.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import numpy as np
+import torch
+lower_pose = torch.tensor(
+    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0747, -0.0158, -0.0152, -1.1826512813568115, 0.23866955935955048,
+     0.15146760642528534, -1.2604516744613647, -0.3160211145877838,
+     -0.1603458970785141, 1.1654603481292725, 0.0, 0.0, 1.2521806955337524, 0.041598282754421234, -0.06312154978513718,
+     0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
+lower_pose_stand = torch.tensor([
+    8.9759e-04, 7.1074e-04, -5.9163e-06, 8.9759e-04, 7.1074e-04, -5.9163e-06,
+    3.0747, -0.0158, -0.0152,
+    -3.6665e-01, -8.8455e-03, 1.6113e-01, -3.6665e-01, -8.8455e-03, 1.6113e-01,
+    -3.9716e-01, -4.0229e-02, -1.2637e-01,
+    7.9163e-01, 6.8519e-02, -1.5091e-01, 7.9163e-01, 6.8519e-02, -1.5091e-01,
+    7.8632e-01, -4.3810e-02, 1.4375e-02,
+    -1.0675e-01, 1.2635e-01, 1.6711e-02, -1.0675e-01, 1.2635e-01, 1.6711e-02, ])
+# lower_pose_stand = torch.tensor(
+#     [6.4919e-02,  3.3018e-02,  1.7485e-02,  8.9759e-04,  7.1074e-04, -5.9163e-06,
+#      3.0747, -0.0158, -0.0152,
+#      -3.3633e+00, -9.3915e-02, 3.0996e-01, -3.6665e-01, -8.8455e-03, 1.6113e-01,
+#      1.1654603481292725, 0.0, 0.0,
+#      4.4167e-01,  6.7183e-03, -3.6379e-03,  7.9163e-01,  6.8519e-02, -1.5091e-01,
+#      0.0, 0.0, 0.0,
+#      2.2910e-02, -2.4797e-02, -5.5657e-03, -1.0675e-01,  1.2635e-01,  1.6711e-02,])
+lower_body = [0, 1, 3, 4, 6, 7, 9, 10]
+count_part = [6, 9, 12, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+              29, 30, 31, 32, 33, 34, 35, 36, 37,
+              38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54]
+fix_index = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+             29,
+             35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+             50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+             65, 66, 67, 68, 69, 70, 71, 72, 73, 74]
+all_index = np.ones(275)
+all_index[fix_index] = 0
+c_index = []
+i = 0
+for num in all_index:
+    if num == 1:
+        c_index.append(i)
+    i = i + 1
+c_index = np.asarray(c_index)
+fix_index_3d = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+                21, 22, 23, 24, 25, 26,
+                30, 31, 32, 33, 34, 35,
+                45, 46, 47, 48, 49, 50]
+all_index_3d = np.ones(165)
+all_index_3d[fix_index_3d] = 0
+c_index_3d = []
+i = 0
+for num in all_index_3d:
+    if num == 1:
+        c_index_3d.append(i)
+    i = i + 1
+c_index_3d = np.asarray(c_index_3d)
+c_index_6d = []
+i = 0
+for num in all_index_3d:
+    if num == 1:
+        c_index_6d.append(2*i)
+        c_index_6d.append(2 * i + 1)
+    i = i + 1
+c_index_6d = np.asarray(c_index_6d)
+def part2full(input, stand=False):
+    if stand:
+        # lp = lower_pose_stand.unsqueeze(dim=0).repeat(input.shape[0], 1).to(input.device)
+        lp = torch.zeros_like(lower_pose)
+        lp[6:9] = torch.tensor([3.0747, -0.0158, -0.0152])
+        lp = lp.unsqueeze(dim=0).repeat(input.shape[0], 1).to(input.device)
+    else:
+        lp = lower_pose.unsqueeze(dim=0).repeat(input.shape[0], 1).to(input.device)
+    input = torch.cat([input[:, :3],
+                       lp[:, :15],
+                       input[:, 3:6],
+                       lp[:, 15:21],
+                       input[:, 6:9],
+                       lp[:, 21:27],
+                       input[:, 9:12],
+                       lp[:, 27:],
+                       input[:, 12:]]
+                      , dim=1)
+    return input
+def pred2poses(input, gt):
+    input = torch.cat([input[:, :3],
+                       gt[0:1, 3:18].repeat(input.shape[0], 1),
+                       input[:, 3:6],
+                       gt[0:1, 21:27].repeat(input.shape[0], 1),
+                       input[:, 6:9],
+                       gt[0:1, 30:36].repeat(input.shape[0], 1),
+                       input[:, 9:12],
+                       gt[0:1, 39:45].repeat(input.shape[0], 1),
+                       input[:, 12:]]
+                      , dim=1)
+    return input
+def poses2poses(input, gt):
+    input = torch.cat([input[:, :3],
+                       gt[0:1, 3:18].repeat(input.shape[0], 1),
+                       input[:, 18:21],
+                       gt[0:1, 21:27].repeat(input.shape[0], 1),
+                       input[:, 27:30],
+                       gt[0:1, 30:36].repeat(input.shape[0], 1),
+                       input[:, 36:39],
+                       gt[0:1, 39:45].repeat(input.shape[0], 1),
+                       input[:, 45:]]
+                      , dim=1)
+    return input
+def poses2pred(input, stand=False):
+    if stand:
+        lp = lower_pose_stand.unsqueeze(dim=0).repeat(input.shape[0], 1).to(input.device)
+        # lp = torch.zeros_like(lower_pose).unsqueeze(dim=0).repeat(input.shape[0], 1).to(input.device)
+    else:
+        lp = lower_pose.unsqueeze(dim=0).repeat(input.shape[0], 1).to(input.device)
+    input = torch.cat([input[:, :3],
+                       lp[:, :15],
+                       input[:, 18:21],
+                       lp[:, 15:21],
+                       input[:, 27:30],
+                       lp[:, 21:27],
+                       input[:, 36:39],
+                       lp[:, 27:],
+                       input[:, 45:]]
+                      , dim=1)
+    return input
+rearrange = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]\
+            # ,22, 23, 24, 25, 40, 26, 41,
+            #  27, 42, 28, 43, 29, 44, 30, 45, 31, 46, 32, 47, 33, 48, 34, 49, 35, 50, 36, 51, 37, 52, 38, 53, 39, 54, 55,
+            #  57, 56, 59, 58, 60, 63, 61, 64, 62, 65, 66, 71, 67, 72, 68, 73, 69, 74, 70, 75]
+symmetry = [0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1]#, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+            # 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+            # 1, 1, 1, 1, 1, 1]

data_utils/mesh_dataset.py ADDED Viewed

	@@ -0,0 +1,348 @@

+import pickle
+import sys
+import os
+sys.path.append(os.getcwd())
+import json
+from glob import glob
+from data_utils.utils import *
+import torch.utils.data as data
+from data_utils.consts import speaker_id
+from data_utils.lower_body import count_part
+import random
+from data_utils.rotation_conversion import axis_angle_to_matrix, matrix_to_rotation_6d
+with open('data_utils/hand_component.json') as file_obj:
+    comp = json.load(file_obj)
+    left_hand_c = np.asarray(comp['left'])
+    right_hand_c = np.asarray(comp['right'])
+def to3d(data):
+    left_hand_pose = np.einsum('bi,ij->bj', data[:, 75:87], left_hand_c[:12, :])
+    right_hand_pose = np.einsum('bi,ij->bj', data[:, 87:99], right_hand_c[:12, :])
+    data = np.concatenate((data[:, :75], left_hand_pose, right_hand_pose), axis=-1)
+    return data
+class SmplxDataset():
+    '''
+    creat a dataset for every segment and concat.
+    '''
+    def __init__(self,
+                 data_root,
+                 speaker,
+                 motion_fn,
+                 audio_fn,
+                 audio_sr,
+                 fps,
+                 feat_method='mel_spec',
+                 audio_feat_dim=64,
+                 audio_feat_win_size=None,
+                 train=True,
+                 load_all=False,
+                 split_trans_zero=False,
+                 limbscaling=False,
+                 num_frames=25,
+                 num_pre_frames=25,
+                 num_generate_length=25,
+                 context_info=False,
+                 convert_to_6d=False,
+                 expression=False,
+                 config=None,
+                 am=None,
+                 am_sr=None,
+                 whole_video=False
+                 ):
+        self.data_root = data_root
+        self.speaker = speaker
+        self.feat_method = feat_method
+        self.audio_fn = audio_fn
+        self.audio_sr = audio_sr
+        self.fps = fps
+        self.audio_feat_dim = audio_feat_dim
+        self.audio_feat_win_size = audio_feat_win_size
+        self.context_info = context_info  # for aud feat
+        self.convert_to_6d = convert_to_6d
+        self.expression = expression
+        self.train = train
+        self.load_all = load_all
+        self.split_trans_zero = split_trans_zero
+        self.limbscaling = limbscaling
+        self.num_frames = num_frames
+        self.num_pre_frames = num_pre_frames
+        self.num_generate_length = num_generate_length
+        # print('num_generate_length ', self.num_generate_length)
+        self.config = config
+        self.am_sr = am_sr
+        self.whole_video = whole_video
+        load_mode = self.config.dataset_load_mode
+        if load_mode == 'pickle':
+            raise NotImplementedError
+        elif load_mode == 'csv':
+            import pickle
+            with open(data_root, 'rb') as f:
+                u = pickle._Unpickler(f)
+                data = u.load()
+                self.data = data[0]
+            if self.load_all:
+                self._load_npz_all()
+        elif load_mode == 'json':
+            self.annotations = glob(data_root + '/*pkl')
+            if len(self.annotations) == 0:
+                raise FileNotFoundError(data_root + ' are empty')
+            self.annotations = sorted(self.annotations)
+            self.img_name_list = self.annotations
+            if self.load_all:
+                self._load_them_all(am, am_sr, motion_fn)
+    def _load_npz_all(self):
+        self.loaded_data = {}
+        self.complete_data = []
+        data = self.data
+        shape = data['body_pose_axis'].shape[0]
+        self.betas = data['betas']
+        self.img_name_list = []
+        for index in range(shape):
+            img_name = f'{index:6d}'
+            self.img_name_list.append(img_name)
+            jaw_pose = data['jaw_pose'][index]
+            leye_pose = data['leye_pose'][index]
+            reye_pose = data['reye_pose'][index]
+            global_orient = data['global_orient'][index]
+            body_pose = data['body_pose_axis'][index]
+            left_hand_pose = data['left_hand_pose'][index]
+            right_hand_pose = data['right_hand_pose'][index]
+            full_body = np.concatenate(
+                (jaw_pose, leye_pose, reye_pose, global_orient, body_pose, left_hand_pose, right_hand_pose))
+            assert full_body.shape[0] == 99
+            if self.convert_to_6d:
+                full_body = to3d(full_body)
+                full_body = torch.from_numpy(full_body)
+                full_body = matrix_to_rotation_6d(axis_angle_to_matrix(full_body))
+                full_body = np.asarray(full_body)
+                if self.expression:
+                    expression = data['expression'][index]
+                    full_body = np.concatenate((full_body, expression))
+                # full_body = np.concatenate((full_body, non_zero))
+            else:
+                full_body = to3d(full_body)
+                if self.expression:
+                    expression = data['expression'][index]
+                    full_body = np.concatenate((full_body, expression))
+            self.loaded_data[img_name] = full_body.reshape(-1)
+            self.complete_data.append(full_body.reshape(-1))
+        self.complete_data = np.array(self.complete_data)
+        if self.audio_feat_win_size is not None:
+            self.audio_feat = get_mfcc_old(self.audio_fn).transpose(1, 0)
+            # print(self.audio_feat.shape)
+        else:
+            if self.feat_method == 'mel_spec':
+                self.audio_feat = get_melspec(self.audio_fn, fps=self.fps, sr=self.audio_sr, n_mels=self.audio_feat_dim)
+            elif self.feat_method == 'mfcc':
+                self.audio_feat = get_mfcc(self.audio_fn,
+                                           smlpx=True,
+                                           sr=self.audio_sr,
+                                           n_mfcc=self.audio_feat_dim,
+                                           win_size=self.audio_feat_win_size
+                                           )
+    def _load_them_all(self, am, am_sr, motion_fn):
+        self.loaded_data = {}
+        self.complete_data = []
+        f = open(motion_fn, 'rb+')
+        data = pickle.load(f)
+        self.betas = np.array(data['betas'])
+        jaw_pose = np.array(data['jaw_pose'])
+        leye_pose = np.array(data['leye_pose'])
+        reye_pose = np.array(data['reye_pose'])
+        global_orient = np.array(data['global_orient']).squeeze()
+        body_pose = np.array(data['body_pose_axis'])
+        left_hand_pose = np.array(data['left_hand_pose'])
+        right_hand_pose = np.array(data['right_hand_pose'])
+        full_body = np.concatenate(
+            (jaw_pose, leye_pose, reye_pose, global_orient, body_pose, left_hand_pose, right_hand_pose), axis=1)
+        assert full_body.shape[1] == 99
+        if self.convert_to_6d:
+            full_body = to3d(full_body)
+            full_body = torch.from_numpy(full_body)
+            full_body = matrix_to_rotation_6d(axis_angle_to_matrix(full_body.reshape(-1, 55, 3))).reshape(-1, 330)
+            full_body = np.asarray(full_body)
+            if self.expression:
+                expression = np.array(data['expression'])
+                full_body = np.concatenate((full_body, expression), axis=1)
+        else:
+            full_body = to3d(full_body)
+            expression = np.array(data['expression'])
+            full_body = np.concatenate((full_body, expression), axis=1)
+        self.complete_data = full_body
+        self.complete_data = np.array(self.complete_data)
+        if self.audio_feat_win_size is not None:
+            self.audio_feat = get_mfcc_old(self.audio_fn).transpose(1, 0)
+        else:
+            # if self.feat_method == 'mel_spec':
+            #     self.audio_feat = get_melspec(self.audio_fn, fps=self.fps, sr=self.audio_sr, n_mels=self.audio_feat_dim)
+            # elif self.feat_method == 'mfcc':
+            self.audio_feat = get_mfcc_ta(self.audio_fn,
+                                          smlpx=True,
+                                          fps=30,
+                                          sr=self.audio_sr,
+                                          n_mfcc=self.audio_feat_dim,
+                                          win_size=self.audio_feat_win_size,
+                                          type=self.feat_method,
+                                          am=am,
+                                          am_sr=am_sr,
+                                          encoder_choice=self.config.Model.encoder_choice,
+                                          )
+            # with open(audio_file, 'w', encoding='utf-8') as file:
+            #     file.write(json.dumps(self.audio_feat.__array__().tolist(), indent=0, ensure_ascii=False))
+    def get_dataset(self, normalization=False, normalize_stats=None, split='train'):
+        class __Worker__(data.Dataset):
+            def __init__(child, index_list, normalization, normalize_stats, split='train') -> None:
+                super().__init__()
+                child.index_list = index_list
+                child.normalization = normalization
+                child.normalize_stats = normalize_stats
+                child.split = split
+            def __getitem__(child, index):
+                num_generate_length = self.num_generate_length
+                num_pre_frames = self.num_pre_frames
+                seq_len = num_generate_length + num_pre_frames
+                # print(num_generate_length)
+                index = child.index_list[index]
+                index_new = index + random.randrange(0, 5, 3)
+                if index_new + seq_len > self.complete_data.shape[0]:
+                    index_new = index
+                index = index_new
+                if child.split in ['val', 'pre', 'test'] or self.whole_video:
+                    index = 0
+                    seq_len = self.complete_data.shape[0]
+                seq_data = []
+                assert index + seq_len <= self.complete_data.shape[0]
+                # print(seq_len)
+                seq_data = self.complete_data[index:(index + seq_len), :]
+                seq_data = np.array(seq_data)
+                '''
+                audio feature，
+                '''
+                if not self.context_info:
+                    if not self.whole_video:
+                        audio_feat = self.audio_feat[index:index + seq_len, ...]
+                        if audio_feat.shape[0] < seq_len:
+                            audio_feat = np.pad(audio_feat, [[0, seq_len - audio_feat.shape[0]], [0, 0]],
+                                                mode='reflect')
+                        assert audio_feat.shape[0] == seq_len and audio_feat.shape[1] == self.audio_feat_dim
+                    else:
+                        audio_feat = self.audio_feat
+                else:  # including feature and history
+                    if self.audio_feat_win_size is None:
+                        audio_feat = self.audio_feat[index:index + seq_len + num_pre_frames, ...]
+                        if audio_feat.shape[0] < seq_len + num_pre_frames:
+                            audio_feat = np.pad(audio_feat,
+                                                [[0, seq_len + self.num_frames - audio_feat.shape[0]], [0, 0]],
+                                                mode='constant')
+                        assert audio_feat.shape[0] == self.num_frames + seq_len and audio_feat.shape[
+                            1] == self.audio_feat_dim
+                if child.normalization:
+                    data_mean = child.normalize_stats['mean'].reshape(1, -1)
+                    data_std = child.normalize_stats['std'].reshape(1, -1)
+                    seq_data[:, :330] = (seq_data[:, :330] - data_mean) / data_std
+                if child.split in['train', 'test']:
+                    if self.convert_to_6d:
+                        if self.expression:
+                            data_sample = {
+                                'poses': seq_data[:, :330].astype(np.float).transpose(1, 0),
+                                'expression': seq_data[:, 330:].astype(np.float).transpose(1, 0),
+                                # 'nzero': seq_data[:, 375:].astype(np.float).transpose(1, 0),
+                                'aud_feat': audio_feat.astype(np.float).transpose(1, 0),
+                                'speaker': speaker_id[self.speaker],
+                                'betas': self.betas,
+                                'aud_file': self.audio_fn,
+                            }
+                        else:
+                            data_sample = {
+                                'poses': seq_data[:, :330].astype(np.float).transpose(1, 0),
+                                'nzero': seq_data[:, 330:].astype(np.float).transpose(1, 0),
+                                'aud_feat': audio_feat.astype(np.float).transpose(1, 0),
+                                'speaker': speaker_id[self.speaker],
+                                'betas': self.betas
+                            }
+                    else:
+                        if self.expression:
+                            data_sample = {
+                                'poses': seq_data[:, :165].astype(np.float).transpose(1, 0),
+                                'expression': seq_data[:, 165:].astype(np.float).transpose(1, 0),
+                                'aud_feat': audio_feat.astype(np.float).transpose(1, 0),
+                                # 'wv2_feat': wv2_feat.astype(np.float).transpose(1, 0),
+                                'speaker': speaker_id[self.speaker],
+                                'aud_file': self.audio_fn,
+                                'betas': self.betas
+                            }
+                        else:
+                            data_sample = {
+                                'poses': seq_data.astype(np.float).transpose(1, 0),
+                                'aud_feat': audio_feat.astype(np.float).transpose(1, 0),
+                                'speaker': speaker_id[self.speaker],
+                                'betas': self.betas
+                            }
+                    return data_sample
+                else:
+                    data_sample = {
+                        'poses': seq_data[:, :330].astype(np.float).transpose(1, 0),
+                        'expression': seq_data[:, 330:].astype(np.float).transpose(1, 0),
+                        # 'nzero': seq_data[:, 325:].astype(np.float).transpose(1, 0),
+                        'aud_feat': audio_feat.astype(np.float).transpose(1, 0),
+                        'aud_file': self.audio_fn,
+                        'speaker': speaker_id[self.speaker],
+                        'betas': self.betas
+                    }
+                    return data_sample
+            def __len__(child):
+                return len(child.index_list)
+        if split == 'train':
+            index_list = list(
+                range(0, min(self.complete_data.shape[0], self.audio_feat.shape[0]) - self.num_generate_length - self.num_pre_frames,
+                      6))
+        elif split in ['val', 'test']:
+            index_list = list([0])
+        if self.whole_video:
+            index_list = list([0])
+        self.all_dataset = __Worker__(index_list, normalization, normalize_stats, split)
+    def __len__(self):
+        return len(self.img_name_list)

data_utils/rotation_conversion.py ADDED Viewed

	@@ -0,0 +1,551 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
+# Check PYTORCH3D_LICENCE before use
+import functools
+from typing import Optional
+import torch
+import torch.nn.functional as F
+"""
+The transformation matrices returned from the functions in this file assume
+the points on which the transformation will be applied are column vectors.
+i.e. the R matrix is structured as
+    R = [
+            [Rxx, Rxy, Rxz],
+            [Ryx, Ryy, Ryz],
+            [Rzx, Rzy, Rzz],
+        ]  # (3, 3)
+This matrix can be applied to column vectors by post multiplication
+by the points e.g.
+    points = [[0], [1], [2]]  # (3 x 1) xyz coordinates of a point
+    transformed_points = R * points
+To apply the same matrix to points which are row vectors, the R matrix
+can be transposed and pre multiplied by the points:
+e.g.
+    points = [[0, 1, 2]]  # (1 x 3) xyz coordinates of a point
+    transformed_points = points * R.transpose(1, 0)
+"""
+def quaternion_to_matrix(quaternions):
+    """
+    Convert rotations given as quaternions to rotation matrices.
+    Args:
+        quaternions: quaternions with real part first,
+            as tensor of shape (..., 4).
+    Returns:
+        Rotation matrices as tensor of shape (..., 3, 3).
+    """
+    r, i, j, k = torch.unbind(quaternions, -1)
+    two_s = 2.0 / (quaternions * quaternions).sum(-1)
+    o = torch.stack(
+        (
+            1 - two_s * (j * j + k * k),
+            two_s * (i * j - k * r),
+            two_s * (i * k + j * r),
+            two_s * (i * j + k * r),
+            1 - two_s * (i * i + k * k),
+            two_s * (j * k - i * r),
+            two_s * (i * k - j * r),
+            two_s * (j * k + i * r),
+            1 - two_s * (i * i + j * j),
+        ),
+        -1,
+    )
+    return o.reshape(quaternions.shape[:-1] + (3, 3))
+def _copysign(a, b):
+    """
+    Return a tensor where each element has the absolute value taken from the,
+    corresponding element of a, with sign taken from the corresponding
+    element of b. This is like the standard copysign floating-point operation,
+    but is not careful about negative 0 and NaN.
+    Args:
+        a: source tensor.
+        b: tensor whose signs will be used, of the same shape as a.
+    Returns:
+        Tensor of the same shape as a with the signs of b.
+    """
+    signs_differ = (a < 0) != (b < 0)
+    return torch.where(signs_differ, -a, a)
+def _sqrt_positive_part(x):
+    """
+    Returns torch.sqrt(torch.max(0, x))
+    but with a zero subgradient where x is 0.
+    """
+    ret = torch.zeros_like(x)
+    positive_mask = x > 0
+    ret[positive_mask] = torch.sqrt(x[positive_mask])
+    return ret
+def matrix_to_quaternion(matrix):
+    """
+    Convert rotations given as rotation matrices to quaternions.
+    Args:
+        matrix: Rotation matrices as tensor of shape (..., 3, 3).
+    Returns:
+        quaternions with real part first, as tensor of shape (..., 4).
+    """
+    if matrix.size(-1) != 3 or matrix.size(-2) != 3:
+        raise ValueError(f"Invalid rotation matrix  shape f{matrix.shape}.")
+    m00 = matrix[..., 0, 0]
+    m11 = matrix[..., 1, 1]
+    m22 = matrix[..., 2, 2]
+    o0 = 0.5 * _sqrt_positive_part(1 + m00 + m11 + m22)
+    x = 0.5 * _sqrt_positive_part(1 + m00 - m11 - m22)
+    y = 0.5 * _sqrt_positive_part(1 - m00 + m11 - m22)
+    z = 0.5 * _sqrt_positive_part(1 - m00 - m11 + m22)
+    o1 = _copysign(x, matrix[..., 2, 1] - matrix[..., 1, 2])
+    o2 = _copysign(y, matrix[..., 0, 2] - matrix[..., 2, 0])
+    o3 = _copysign(z, matrix[..., 1, 0] - matrix[..., 0, 1])
+    return torch.stack((o0, o1, o2, o3), -1)
+def _axis_angle_rotation(axis: str, angle):
+    """
+    Return the rotation matrices for one of the rotations about an axis
+    of which Euler angles describe, for each value of the angle given.
+    Args:
+        axis: Axis label "X" or "Y or "Z".
+        angle: any shape tensor of Euler angles in radians
+    Returns:
+        Rotation matrices as tensor of shape (..., 3, 3).
+    """
+    cos = torch.cos(angle)
+    sin = torch.sin(angle)
+    one = torch.ones_like(angle)
+    zero = torch.zeros_like(angle)
+    if axis == "X":
+        R_flat = (one, zero, zero, zero, cos, -sin, zero, sin, cos)
+    if axis == "Y":
+        R_flat = (cos, zero, sin, zero, one, zero, -sin, zero, cos)
+    if axis == "Z":
+        R_flat = (cos, -sin, zero, sin, cos, zero, zero, zero, one)
+    return torch.stack(R_flat, -1).reshape(angle.shape + (3, 3))
+def euler_angles_to_matrix(euler_angles, convention: str):
+    """
+    Convert rotations given as Euler angles in radians to rotation matrices.
+    Args:
+        euler_angles: Euler angles in radians as tensor of shape (..., 3).
+        convention: Convention string of three uppercase letters from
+            {"X", "Y", and "Z"}.
+    Returns:
+        Rotation matrices as tensor of shape (..., 3, 3).
+    """
+    if euler_angles.dim() == 0 or euler_angles.shape[-1] != 3:
+        raise ValueError("Invalid input euler angles.")
+    if len(convention) != 3:
+        raise ValueError("Convention must have 3 letters.")
+    if convention[1] in (convention[0], convention[2]):
+        raise ValueError(f"Invalid convention {convention}.")
+    for letter in convention:
+        if letter not in ("X", "Y", "Z"):
+            raise ValueError(f"Invalid letter {letter} in convention string.")
+    matrices = map(_axis_angle_rotation, convention, torch.unbind(euler_angles, -1))
+    return functools.reduce(torch.matmul, matrices)
+def _angle_from_tan(
+    axis: str, other_axis: str, data, horizontal: bool, tait_bryan: bool
+):
+    """
+    Extract the first or third Euler angle from the two members of
+    the matrix which are positive constant times its sine and cosine.
+    Args:
+        axis: Axis label "X" or "Y or "Z" for the angle we are finding.
+        other_axis: Axis label "X" or "Y or "Z" for the middle axis in the
+            convention.
+        data: Rotation matrices as tensor of shape (..., 3, 3).
+        horizontal: Whether we are looking for the angle for the third axis,
+            which means the relevant entries are in the same row of the
+            rotation matrix. If not, they are in the same column.
+        tait_bryan: Whether the first and third axes in the convention differ.
+    Returns:
+        Euler Angles in radians for each matrix in data as a tensor
+        of shape (...).
+    """
+    i1, i2 = {"X": (2, 1), "Y": (0, 2), "Z": (1, 0)}[axis]
+    if horizontal:
+        i2, i1 = i1, i2
+    even = (axis + other_axis) in ["XY", "YZ", "ZX"]
+    if horizontal == even:
+        return torch.atan2(data[..., i1], data[..., i2])
+    if tait_bryan:
+        return torch.atan2(-data[..., i2], data[..., i1])
+    return torch.atan2(data[..., i2], -data[..., i1])
+def _index_from_letter(letter: str):
+    if letter == "X":
+        return 0
+    if letter == "Y":
+        return 1
+    if letter == "Z":
+        return 2
+def matrix_to_euler_angles(matrix, convention: str):
+    """
+    Convert rotations given as rotation matrices to Euler angles in radians.
+    Args:
+        matrix: Rotation matrices as tensor of shape (..., 3, 3).
+        convention: Convention string of three uppercase letters.
+    Returns:
+        Euler angles in radians as tensor of shape (..., 3).
+    """
+    if len(convention) != 3:
+        raise ValueError("Convention must have 3 letters.")
+    if convention[1] in (convention[0], convention[2]):
+        raise ValueError(f"Invalid convention {convention}.")
+    for letter in convention:
+        if letter not in ("X", "Y", "Z"):
+            raise ValueError(f"Invalid letter {letter} in convention string.")
+    if matrix.size(-1) != 3 or matrix.size(-2) != 3:
+        raise ValueError(f"Invalid rotation matrix  shape f{matrix.shape}.")
+    i0 = _index_from_letter(convention[0])
+    i2 = _index_from_letter(convention[2])
+    tait_bryan = i0 != i2
+    if tait_bryan:
+        central_angle = torch.asin(
+            matrix[..., i0, i2] * (-1.0 if i0 - i2 in [-1, 2] else 1.0)
+        )
+    else:
+        central_angle = torch.acos(matrix[..., i0, i0])
+    o = (
+        _angle_from_tan(
+            convention[0], convention[1], matrix[..., i2], False, tait_bryan
+        ),
+        central_angle,
+        _angle_from_tan(
+            convention[2], convention[1], matrix[..., i0, :], True, tait_bryan
+        ),
+    )
+    return torch.stack(o, -1)
+def random_quaternions(
+    n: int, dtype: Optional[torch.dtype] = None, device=None, requires_grad=False
+):
+    """
+    Generate random quaternions representing rotations,
+    i.e. versors with nonnegative real part.
+    Args:
+        n: Number of quaternions in a batch to return.
+        dtype: Type to return.
+        device: Desired device of returned tensor. Default:
+            uses the current device for the default tensor type.
+        requires_grad: Whether the resulting tensor should have the gradient
+            flag set.
+    Returns:
+        Quaternions as tensor of shape (N, 4).
+    """
+    o = torch.randn((n, 4), dtype=dtype, device=device, requires_grad=requires_grad)
+    s = (o * o).sum(1)
+    o = o / _copysign(torch.sqrt(s), o[:, 0])[:, None]
+    return o
+def random_rotations(
+    n: int, dtype: Optional[torch.dtype] = None, device=None, requires_grad=False
+):
+    """
+    Generate random rotations as 3x3 rotation matrices.
+    Args:
+        n: Number of rotation matrices in a batch to return.
+        dtype: Type to return.
+        device: Device of returned tensor. Default: if None,
+            uses the current device for the default tensor type.
+        requires_grad: Whether the resulting tensor should have the gradient
+            flag set.
+    Returns:
+        Rotation matrices as tensor of shape (n, 3, 3).
+    """
+    quaternions = random_quaternions(
+        n, dtype=dtype, device=device, requires_grad=requires_grad
+    )
+    return quaternion_to_matrix(quaternions)
+def random_rotation(
+    dtype: Optional[torch.dtype] = None, device=None, requires_grad=False
+):
+    """
+    Generate a single random 3x3 rotation matrix.
+    Args:
+        dtype: Type to return
+        device: Device of returned tensor. Default: if None,
+            uses the current device for the default tensor type
+        requires_grad: Whether the resulting tensor should have the gradient
+            flag set
+    Returns:
+        Rotation matrix as tensor of shape (3, 3).
+    """
+    return random_rotations(1, dtype, device, requires_grad)[0]
+def standardize_quaternion(quaternions):
+    """
+    Convert a unit quaternion to a standard form: one in which the real
+    part is non negative.
+    Args:
+        quaternions: Quaternions with real part first,
+            as tensor of shape (..., 4).
+    Returns:
+        Standardized quaternions as tensor of shape (..., 4).
+    """
+    return torch.where(quaternions[..., 0:1] < 0, -quaternions, quaternions)
+def quaternion_raw_multiply(a, b):
+    """
+    Multiply two quaternions.
+    Usual torch rules for broadcasting apply.
+    Args:
+        a: Quaternions as tensor of shape (..., 4), real part first.
+        b: Quaternions as tensor of shape (..., 4), real part first.
+    Returns:
+        The product of a and b, a tensor of quaternions shape (..., 4).
+    """
+    aw, ax, ay, az = torch.unbind(a, -1)
+    bw, bx, by, bz = torch.unbind(b, -1)
+    ow = aw * bw - ax * bx - ay * by - az * bz
+    ox = aw * bx + ax * bw + ay * bz - az * by
+    oy = aw * by - ax * bz + ay * bw + az * bx
+    oz = aw * bz + ax * by - ay * bx + az * bw
+    return torch.stack((ow, ox, oy, oz), -1)
+def quaternion_multiply(a, b):
+    """
+    Multiply two quaternions representing rotations, returning the quaternion
+    representing their composition, i.e. the versor with nonnegative real part.
+    Usual torch rules for broadcasting apply.
+    Args:
+        a: Quaternions as tensor of shape (..., 4), real part first.
+        b: Quaternions as tensor of shape (..., 4), real part first.
+    Returns:
+        The product of a and b, a tensor of quaternions of shape (..., 4).
+    """
+    ab = quaternion_raw_multiply(a, b)
+    return standardize_quaternion(ab)
+def quaternion_invert(quaternion):
+    """
+    Given a quaternion representing rotation, get the quaternion representing
+    its inverse.
+    Args:
+        quaternion: Quaternions as tensor of shape (..., 4), with real part
+            first, which must be versors (unit quaternions).
+    Returns:
+        The inverse, a tensor of quaternions of shape (..., 4).
+    """
+    return quaternion * quaternion.new_tensor([1, -1, -1, -1])
+def quaternion_apply(quaternion, point):
+    """
+    Apply the rotation given by a quaternion to a 3D point.
+    Usual torch rules for broadcasting apply.
+    Args:
+        quaternion: Tensor of quaternions, real part first, of shape (..., 4).
+        point: Tensor of 3D points of shape (..., 3).
+    Returns:
+        Tensor of rotated points of shape (..., 3).
+    """
+    if point.size(-1) != 3:
+        raise ValueError(f"Points are not in 3D, f{point.shape}.")
+    real_parts = point.new_zeros(point.shape[:-1] + (1,))
+    point_as_quaternion = torch.cat((real_parts, point), -1)
+    out = quaternion_raw_multiply(
+        quaternion_raw_multiply(quaternion, point_as_quaternion),
+        quaternion_invert(quaternion),
+    )
+    return out[..., 1:]
+def axis_angle_to_matrix(axis_angle):
+    """
+    Convert rotations given as axis/angle to rotation matrices.
+    Args:
+        axis_angle: Rotations given as a vector in axis angle form,
+            as a tensor of shape (..., 3), where the magnitude is
+            the angle turned anticlockwise in radians around the
+            vector's direction.
+    Returns:
+        Rotation matrices as tensor of shape (..., 3, 3).
+    """
+    return quaternion_to_matrix(axis_angle_to_quaternion(axis_angle))
+def matrix_to_axis_angle(matrix):
+    """
+    Convert rotations given as rotation matrices to axis/angle.
+    Args:
+        matrix: Rotation matrices as tensor of shape (..., 3, 3).
+    Returns:
+        Rotations given as a vector in axis angle form, as a tensor
+            of shape (..., 3), where the magnitude is the angle
+            turned anticlockwise in radians around the vector's
+            direction.
+    """
+    return quaternion_to_axis_angle(matrix_to_quaternion(matrix))
+def axis_angle_to_quaternion(axis_angle):
+    """
+    Convert rotations given as axis/angle to quaternions.
+    Args:
+        axis_angle: Rotations given as a vector in axis angle form,
+            as a tensor of shape (..., 3), where the magnitude is
+            the angle turned anticlockwise in radians around the
+            vector's direction.
+    Returns:
+        quaternions with real part first, as tensor of shape (..., 4).
+    """
+    angles = torch.norm(axis_angle, p=2, dim=-1, keepdim=True)
+    half_angles = 0.5 * angles
+    eps = 1e-6
+    small_angles = angles.abs() < eps
+    sin_half_angles_over_angles = torch.empty_like(angles)
+    sin_half_angles_over_angles[~small_angles] = (
+        torch.sin(half_angles[~small_angles]) / angles[~small_angles]
+    )
+    # for x small, sin(x/2) is about x/2 - (x/2)^3/6
+    # so sin(x/2)/x is about 1/2 - (x*x)/48
+    sin_half_angles_over_angles[small_angles] = (
+        0.5 - (angles[small_angles] * angles[small_angles]) / 48
+    )
+    quaternions = torch.cat(
+        [torch.cos(half_angles), axis_angle * sin_half_angles_over_angles], dim=-1
+    )
+    return quaternions
+def quaternion_to_axis_angle(quaternions):
+    """
+    Convert rotations given as quaternions to axis/angle.
+    Args:
+        quaternions: quaternions with real part first,
+            as tensor of shape (..., 4).
+    Returns:
+        Rotations given as a vector in axis angle form, as a tensor
+            of shape (..., 3), where the magnitude is the angle
+            turned anticlockwise in radians around the vector's
+            direction.
+    """
+    norms = torch.norm(quaternions[..., 1:], p=2, dim=-1, keepdim=True)
+    half_angles = torch.atan2(norms, quaternions[..., :1])
+    angles = 2 * half_angles
+    eps = 1e-6
+    small_angles = angles.abs() < eps
+    sin_half_angles_over_angles = torch.empty_like(angles)
+    sin_half_angles_over_angles[~small_angles] = (
+        torch.sin(half_angles[~small_angles]) / angles[~small_angles]
+    )
+    # for x small, sin(x/2) is about x/2 - (x/2)^3/6
+    # so sin(x/2)/x is about 1/2 - (x*x)/48
+    sin_half_angles_over_angles[small_angles] = (
+        0.5 - (angles[small_angles] * angles[small_angles]) / 48
+    )
+    return quaternions[..., 1:] / sin_half_angles_over_angles
+def rotation_6d_to_matrix(d6: torch.Tensor) -> torch.Tensor:
+    """
+    Converts 6D rotation representation by Zhou et al. [1] to rotation matrix
+    using Gram--Schmidt orthogonalisation per Section B of [1].
+    Args:
+        d6: 6D rotation representation, of size (*, 6)
+    Returns:
+        batch of rotation matrices of size (*, 3, 3)
+    [1] Zhou, Y., Barnes, C., Lu, J., Yang, J., & Li, H.
+    On the Continuity of Rotation Representations in Neural Networks.
+    IEEE Conference on Computer Vision and Pattern Recognition, 2019.
+    Retrieved from http://arxiv.org/abs/1812.07035
+    """
+    a1, a2 = d6[..., :3], d6[..., 3:]
+    b1 = F.normalize(a1, dim=-1)
+    b2 = a2 - (b1 * a2).sum(-1, keepdim=True) * b1
+    b2 = F.normalize(b2, dim=-1)
+    b3 = torch.cross(b1, b2, dim=-1)
+    return torch.stack((b1, b2, b3), dim=-2)
+def matrix_to_rotation_6d(matrix: torch.Tensor) -> torch.Tensor:
+    """
+    Converts rotation matrices to 6D rotation representation by Zhou et al. [1]
+    by dropping the last row. Note that 6D representation is not unique.
+    Args:
+        matrix: batch of rotation matrices of size (*, 3, 3)
+    Returns:
+        6D rotation representation, of size (*, 6)
+    [1] Zhou, Y., Barnes, C., Lu, J., Yang, J., & Li, H.
+    On the Continuity of Rotation Representations in Neural Networks.
+    IEEE Conference on Computer Vision and Pattern Recognition, 2019.
+    Retrieved from http://arxiv.org/abs/1812.07035
+    """
+    return matrix[..., :2, :].clone().reshape(*matrix.size()[:-2], 6)

data_utils/utils.py ADDED Viewed

	@@ -0,0 +1,333 @@

+import numpy as np
+# import librosa #has to do this cause librosa is not supported on my server
+import python_speech_features
+from scipy.io import wavfile
+from scipy import signal
+import librosa
+import torch
+import torchaudio as ta
+import torchaudio.functional as ta_F
+import torchaudio.transforms as ta_T
+# import pyloudnorm as pyln
+def load_wav_old(audio_fn, sr = 16000):
+    sample_rate, sig = wavfile.read(audio_fn)
+    if sample_rate != sr:
+        result = int((sig.shape[0]) / sample_rate * sr)
+        x_resampled = signal.resample(sig, result)
+        x_resampled = x_resampled.astype(np.float64)
+        return x_resampled, sr
+    sig = sig / (2**15)
+    return sig, sample_rate
+def get_mfcc(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=64, win_size=None):
+    y, sr = librosa.load(audio_fn, sr=sr, mono=True)
+    if win_size is None:
+        hop_len=int(sr / fps)
+    else:
+        hop_len=int(sr / win_size)
+    n_fft=2048
+    C = librosa.feature.mfcc(
+        y = y,
+        sr = sr,
+        n_mfcc = n_mfcc,
+        hop_length = hop_len,
+        n_fft = n_fft
+    )
+    if C.shape[0] == n_mfcc:
+        C = C.transpose(1, 0)
+    return C
+def get_melspec(audio_fn, eps=1e-6, fps = 25, sr=16000, n_mels=64):
+    raise NotImplementedError
+    '''
+    # y, sr = load_wav(audio_fn=audio_fn, sr=sr)
+    # hop_len = int(sr / fps)
+    # n_fft = 2048
+    # C = librosa.feature.melspectrogram(
+    #     y = y,
+    #     sr = sr,
+    #     n_fft=n_fft,
+    #     hop_length=hop_len,
+    #     n_mels = n_mels,
+    #     fmin=0,
+    #     fmax=8000)
+    # mask = (C == 0).astype(np.float)
+    # C = mask * eps + (1-mask) * C
+    # C = np.log(C)
+    # #wierd error may occur here
+    # assert not (np.isnan(C).any()), audio_fn
+    # if C.shape[0] == n_mels:
+    #     C = C.transpose(1, 0)
+    # return C
+    '''
+def extract_mfcc(audio,sample_rate=16000):
+    mfcc = zip(*python_speech_features.mfcc(audio,sample_rate, numcep=64, nfilt=64, nfft=2048, winstep=0.04))
+    mfcc = np.stack([np.array(i) for i in mfcc])
+    return mfcc
+def get_mfcc_psf(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=64, win_size=None):
+    y, sr = load_wav_old(audio_fn, sr=sr)
+    if y.shape.__len__() > 1:
+        y = (y[:,0]+y[:,1])/2
+    if win_size is None:
+        hop_len=int(sr / fps)
+    else:
+        hop_len=int(sr/ win_size)
+    n_fft=2048
+    #hard coded for 25 fps
+    if not smlpx:
+        C = python_speech_features.mfcc(y, sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=0.04)
+    else:
+        C = python_speech_features.mfcc(y, sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=1.01/15)
+    # if C.shape[0] == n_mfcc:
+    #     C = C.transpose(1, 0)
+    return C
+def get_mfcc_psf_min(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=64, win_size=None):
+    y, sr = load_wav_old(audio_fn, sr=sr)
+    if y.shape.__len__() > 1:
+        y = (y[:, 0] + y[:, 1]) / 2
+    n_fft = 2048
+    slice_len = 22000 * 5
+    slice = y.size // slice_len
+    C = []
+    for i in range(slice):
+        if i != (slice - 1):
+            feat = python_speech_features.mfcc(y[i*slice_len:(i+1)*slice_len], sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=1.01 / 15)
+        else:
+            feat = python_speech_features.mfcc(y[i * slice_len:], sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=1.01 / 15)
+        C.append(feat)
+    return C
+def audio_chunking(audio: torch.Tensor, frame_rate: int = 30, chunk_size: int = 16000):
+    """
+    :param audio: 1 x T tensor containing a 16kHz audio signal
+    :param frame_rate: frame rate for video (we need one audio chunk per video frame)
+    :param chunk_size: number of audio samples per chunk
+    :return: num_chunks x chunk_size tensor containing sliced audio
+    """
+    samples_per_frame = chunk_size // frame_rate
+    padding = (chunk_size - samples_per_frame) // 2
+    audio = torch.nn.functional.pad(audio.unsqueeze(0), pad=[padding, padding]).squeeze(0)
+    anchor_points = list(range(chunk_size//2, audio.shape[-1]-chunk_size//2, samples_per_frame))
+    audio = torch.cat([audio[:, i-chunk_size//2:i+chunk_size//2] for i in anchor_points], dim=0)
+    return audio
+def  get_mfcc_ta(audio_fn, eps=1e-6, fps=15, smlpx=False, sr=16000, n_mfcc=64, win_size=None, type='mfcc', am=None, am_sr=None, encoder_choice='mfcc'):
+    if am is None:
+        sr_0, audio = audio_fn
+        audio = torch.tensor(audio)/32767
+        if len(audio.shape) == 1:
+            audio.unsqueeze_(dim=0)
+        elif audio.shape[1] == 1 or audio.shape[1] == 2:
+            audio.transpose_(0, 1)
+        if sr != sr_0:
+            audio = ta.transforms.Resample(sr_0, sr)(audio)
+        if audio.shape[0] > 1:
+            audio = torch.mean(audio, dim=0, keepdim=True)
+        n_fft = 2048
+        if fps == 15:
+            hop_length = 1467
+        elif fps == 30:
+            hop_length = 734
+        win_length = hop_length * 2
+        n_mels = 256
+        n_mfcc = 64
+        if type == 'mfcc':
+            mfcc_transform = ta_T.MFCC(
+                sample_rate=sr,
+                n_mfcc=n_mfcc,
+                melkwargs={
+                    "n_fft": n_fft,
+                    "n_mels": n_mels,
+                    # "win_length": win_length,
+                    "hop_length": hop_length,
+                    "mel_scale": "htk",
+                },
+            )
+            audio_ft = mfcc_transform(audio).squeeze(dim=0).transpose(0,1).numpy()
+        elif type == 'mel':
+            # audio = 0.01 * audio / torch.mean(torch.abs(audio))
+            mel_transform = ta_T.MelSpectrogram(
+                sample_rate=sr, n_fft=n_fft, win_length=None, hop_length=hop_length, n_mels=n_mels
+            )
+            audio_ft = mel_transform(audio).squeeze(0).transpose(0,1).numpy()
+            # audio_ft = torch.log(audio_ft.clamp(min=1e-10, max=None)).transpose(0,1).numpy()
+        elif type == 'mel_mul':
+            audio = 0.01 * audio / torch.mean(torch.abs(audio))
+            audio = audio_chunking(audio, frame_rate=fps, chunk_size=sr)
+            mel_transform = ta_T.MelSpectrogram(
+                sample_rate=sr, n_fft=n_fft, win_length=int(sr/20), hop_length=int(sr/100), n_mels=n_mels
+            )
+            audio_ft = mel_transform(audio).squeeze(1)
+            audio_ft = torch.log(audio_ft.clamp(min=1e-10, max=None)).numpy()
+    else:
+        sampling_rate, speech_array = audio_fn
+        speech_array = torch.tensor(speech_array) / 32767
+        if len(speech_array.shape) == 1:
+            speech_array.unsqueeze_(0)
+        elif speech_array.shape[1] == 1 or speech_array.shape[1] == 2:
+            speech_array.transpose_(0, 1)
+        if sr != sampling_rate:
+            speech_array = ta.transforms.Resample(sampling_rate, sr)(speech_array)
+        speech_array = torch.mean(speech_array, dim=0, keepdim=True)
+        speech_array = speech_array.numpy()
+        if encoder_choice == 'faceformer':
+            # audio_ft = np.squeeze(am(speech_array, sampling_rate=16000).input_values).reshape(-1, 1)
+            audio_ft = speech_array.reshape(-1, 1)
+        elif encoder_choice == 'meshtalk':
+            audio_ft = 0.01 * speech_array / np.mean(np.abs(speech_array))
+        elif encoder_choice == 'onset':
+            audio_ft = librosa.onset.onset_detect(y=speech_array, sr=16000, units='time').reshape(-1, 1)
+        else:
+            audio, sr_0 = ta.load(audio_fn)
+            if sr != sr_0:
+                audio = ta.transforms.Resample(sr_0, sr)(audio)
+            if audio.shape[0] > 1:
+                audio = torch.mean(audio, dim=0, keepdim=True)
+            n_fft = 2048
+            if fps == 15:
+                hop_length = 1467
+            elif fps == 30:
+                hop_length = 734
+            win_length = hop_length * 2
+            n_mels = 256
+            n_mfcc = 64
+            mfcc_transform = ta_T.MFCC(
+                sample_rate=sr,
+                n_mfcc=n_mfcc,
+                melkwargs={
+                    "n_fft": n_fft,
+                    "n_mels": n_mels,
+                    # "win_length": win_length,
+                    "hop_length": hop_length,
+                    "mel_scale": "htk",
+                },
+            )
+            audio_ft = mfcc_transform(audio).squeeze(dim=0).transpose(0, 1).numpy()
+    return audio_ft
+def  get_mfcc_sepa(audio_fn, fps=15, sr=16000):
+    audio, sr_0 = ta.load(audio_fn)
+    if sr != sr_0:
+        audio = ta.transforms.Resample(sr_0, sr)(audio)
+    if audio.shape[0] > 1:
+        audio = torch.mean(audio, dim=0, keepdim=True)
+    n_fft = 2048
+    if fps == 15:
+        hop_length = 1467
+    elif fps == 30:
+        hop_length = 734
+    n_mels = 256
+    n_mfcc = 64
+    mfcc_transform = ta_T.MFCC(
+        sample_rate=sr,
+        n_mfcc=n_mfcc,
+        melkwargs={
+            "n_fft": n_fft,
+            "n_mels": n_mels,
+            # "win_length": win_length,
+            "hop_length": hop_length,
+            "mel_scale": "htk",
+        },
+    )
+    audio_ft_0 = mfcc_transform(audio[0, :sr*2]).squeeze(dim=0).transpose(0,1).numpy()
+    audio_ft_1 = mfcc_transform(audio[0, sr*2:]).squeeze(dim=0).transpose(0,1).numpy()
+    audio_ft = np.concatenate((audio_ft_0, audio_ft_1), axis=0)
+    return audio_ft, audio_ft_0.shape[0]
+def get_mfcc_old(wav_file):
+    sig, sample_rate = load_wav_old(wav_file)
+    mfcc = extract_mfcc(sig)
+    return mfcc
+def smooth_geom(geom, mask: torch.Tensor = None, filter_size: int = 9, sigma: float = 2.0):
+    """
+    :param geom: T x V x 3 tensor containing a temporal sequence of length T with V vertices in each frame
+    :param mask: V-dimensional Tensor containing a mask with vertices to be smoothed
+    :param filter_size: size of the Gaussian filter
+    :param sigma: standard deviation of the Gaussian filter
+    :return: T x V x 3 tensor containing smoothed geometry (i.e., smoothed in the area indicated by the mask)
+    """
+    assert filter_size % 2 == 1, f"filter size must be odd but is {filter_size}"
+    # Gaussian smoothing (low-pass filtering)
+    fltr = np.arange(-(filter_size // 2), filter_size // 2 + 1)
+    fltr = np.exp(-0.5 * fltr ** 2 / sigma ** 2)
+    fltr = torch.Tensor(fltr) / np.sum(fltr)
+    # apply fltr
+    fltr = fltr.view(1, 1, -1).to(device=geom.device)
+    T, V = geom.shape[1], geom.shape[2]
+    g = torch.nn.functional.pad(
+        geom.permute(2, 0, 1).view(V, 1, T),
+        pad=[filter_size // 2, filter_size // 2], mode='replicate'
+    )
+    g = torch.nn.functional.conv1d(g, fltr).view(V, 1, T)
+    smoothed = g.permute(1, 2, 0).contiguous()
+    # blend smoothed signal with original signal
+    if mask is None:
+        return smoothed
+    else:
+        return smoothed * mask[None, :, None] + geom * (-mask[None, :, None] + 1)
+if __name__ == '__main__':
+    audio_fn = '../sample_audio/clip000028_tCAkv4ggPgI.wav'
+    C = get_mfcc_psf(audio_fn)
+    print(C.shape)
+    C_2 = get_mfcc_librosa(audio_fn)
+    print(C.shape)
+    print(C)
+    print(C_2)
+    print((C == C_2).all())
+    # print(y.shape, sr)
+    # mel_spec = get_melspec(audio_fn)
+    # print(mel_spec.shape)
+    # mfcc = get_mfcc(audio_fn, sr = 16000)
+    # print(mfcc.shape)
+    # print(mel_spec.max(), mel_spec.min())
+    # print(mfcc.max(), mfcc.min())

demo/1st-page/1st-page-upper.mp4 ADDED Viewed

Binary file (837 kB). View file

demo/1st-page/1st-page-upper.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:159eefc300544ea95d919b49707afa466e6246135da9a986b4abbc55bbc54850
+size 407168

demo/french/french.mp4 ADDED Viewed

Binary file (592 kB). View file

demo/french/french.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:289d7a2abb18efa495587a4c4b094a109bdb7d3efd779800f028708bde4d1477
+size 305408

demo/rich/rich.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bc50b66c7df10233191921a6a3f19c2895249997206f30e5e099cc10b90903a
+size 3608757

demo/rich/rich.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d03c956ed3992980fe37581019ec12350531489b12b46a55cfc4c562f7bd8ddb
+size 1908128

demo/song/cut.mp4 ADDED Viewed

Binary file (655 kB). View file

demo/song/song.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8827d6daaec213bee7bd32af68a0cf8ea83d154f32d006bd7f38120e2c282045
+size 3178290

demo/song/song.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:157bfbde5a1b15ac812e52d8b08997be1a41fae93b3a7fe613b897d1ff5d8996
+size 1707788

demo/style/chemistry.mp4 ADDED Viewed

Binary file (670 kB). View file

demo/style/chemistry.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a8dc42938343bc10b149a6a74d43d5a4cef010c6f2a0c58bffee7f48b2a1e81
+size 318128

demo/style/conan.mp4 ADDED Viewed

Binary file (610 kB). View file

demo/style/conan.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:350ca76806d86ff7b36fbfeaec219d7c0cf515c3c23dfe6791143b82e7ec3327
+size 318128

demo/style/diversity.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09fd9e6330ced1ecbf10a6e7e0a4f6ebad098eb44115a2ee35a070d02e522ec8
+size 5882474

demo/style/diversity.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e4c37f510943dad934da97a8eade5ddce25165df20419e74606fb0160b4ce07
+size 3816128

demo/style/face.mp4 ADDED Viewed

Binary file (687 kB). View file

demo/style/face.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b51d0d309e92449323ab481a3cc16c88d2b04f6f487eb366720a9ad7f8754f03
+size 318128

demo/style/oliver.mp4 ADDED Viewed

Binary file (589 kB). View file

demo/style/oliver.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:149258f227975e1f07b449f0ab5e4c3e3e1458f97fa646360eac3f1428c52f5a
+size 318128

demo/style/seth.mp4 ADDED Viewed

Binary file (558 kB). View file

demo/style/seth.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6132a40d60ee8cf954d74293ea46e75cd4b4d2001ca96fc4713abe06a34b5a3c
+size 318128

demo_audio/1st-page.wav ADDED Viewed

Binary file (410 kB). View file

demo_audio/french.wav ADDED Viewed

Binary file (461 kB). View file

demo_audio/rich.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db9c793b66a64ffb11f0f673e70f9e0188bfa1ce95a391cb9af7d9c7ccf92597
+size 10584078