is_train: True ddp: False stat: ts training_speakers: [2] root_path: ./ out_path: ./outputs/audio2pose/ cache_path: datasets/beat_cache/beat_smplx_en_lower/ project: mage_smplx data_path: ./BEAT2/beat_english_v2.0.0/ e_path: weights/AESKConv_240_100.bin test_ckpt: weights/multi.bin data_path_1: ./EMAGE/ #torch_hub_path: datasets/hub/ additional_data: False dataset: beat_sep_lower new_cache: False ori_joints: beat_smplx_joints tar_joints: beat_smplx_lower pose_rep: smplxflame_30 pose_norm: False pose_fps: 30 vae_test_len: 64 vae_test_dim: 61 vae_test_stride: 20 vae_length: 256 vae_codebook_size: 256 vae_layer: 4 vae_grow: [1,1,2,1] variational: False pose_dims: 61 pose_length: 64 stride: 20 facial_dims: 100 word_index_num: 11195 word_dims: 300 batch_size: 64 lr_base: 3e-4 model: motion_representation g_name: VQVAEConvZero #eval_model: motion_autoencoder #e_name: HalfEmbeddingNet trainer: aelowerfoot decay_epochs: 780 # audio_f: 256 # a_pre_encoder: tcn_camn # a_encoder: lp # a_fix_pre: False # freeze_wordembed: False # word_f: 128 # t_pre_encoder: fasttext # t_encoder: lp # t_fix_pre: False # motion_f: 256 # m_pre_encoder: lp # m_encoder: lp # m_fix_pre: False # facial_f: 128 # f_pre_encoder: lp # f_encoder: lp # f_fix_pre: False #m_decoder: lstm #decode_fusion: cat #n_layer: 2 #hidden_size: 512 rec_weight: 1 rec_pos_weight: 1 rec_ver_weight: 1 # rec_fac_weight: 1 #ita_weight: 0 #iwa_weight: 0 #fusion_mode: sum # grad_norm: 1 epochs: 800 test_period: 100