is_train: True ddp: False stat: ts root_path: ./ out_path: ./outputs/audio2pose/ project: s2g data_path: ./datasets/BEAT_SMPL/beat_v2.0.0/beat_english_v2.0.0/ e_path: weights/AESKConv_240_100.bin eval_model: motion_representation e_name: VAESKConv test_ckpt: ./outputs/audio2pose/custom/0112_001634_emage/last_200.bin data_path_1: ./datasets/hub/ vae_test_len: 32 vae_test_dim: 330 vae_test_stride: 20 vae_length: 240 vae_codebook_size: 256 vae_layer: 4 vae_grow: [1,1,2,1] variational: False # data config training_speakers: [2] #[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] #[2] additional_data: False cache_path: datasets/beat_cache/beat_smplx_en_emage_2_rvqvae/ dataset: mix_sep new_cache: True use_amass: False # motion config ori_joints: beat_smplx_joints tar_joints: beat_smplx_full pose_rep: smplxflame_30 pose_norm: False pose_fps: 30 rot6d: True pre_frames: 4 pose_dims: 330 pose_length: 64 stride: 20 test_length: 64 motion_f: 256 m_pre_encoder: null m_encoder: null m_fix_pre: False # audio config audio_rep: onset+amplitude audio_sr: 16000 audio_fps: 16000 audio_norm: False audio_f: 256 # a_pre_encoder: tcn_camn # a_encoder: none # a_fix_pre: False # text config word_rep: textgrid word_index_num: 11195 word_dims: 300 freeze_wordembed: False word_f: 256 t_pre_encoder: fasttext t_encoder: null t_fix_pre: False # facial config facial_rep: smplxflame_30 facial_dims: 100 facial_norm: False facial_f: 0 f_pre_encoder: null f_encoder: null f_fix_pre: False # speaker config id_rep: onehot speaker_f: 0 # model config batch_size: 80 #80 # warmup_epochs: 1 # warmup_lr: 1e-6 lr_base: 4e-4 model: motion_representation g_name: VQVAEConvZero trainer: ae_total hidden_size: 768 n_layer: 1 rec_weight: 1 grad_norm: 0.99 epochs: 200 test_period: 20 ll: 3 lf: 3 lu: 3 lh: 3 cl: 1 cf: 0 cu: 1 ch: 1 #below is vavae config, copy from QPGESTURE #Codebook Configs levels: 1 downs_t: [3] strides_t : [2] emb_width : 512 l_bins : 512 l_mu : 0.99 commit : 0.1 hvqvae_multipliers : [1] width: 512 depth: 3 m_conv : 1.0 dilation_growth_rate : 3 sample_length: 80 use_bottleneck: True joint_channel: 6 # depth: 3 # width: 128 # m_conv: 1.0 # dilation_growth_rate: 1 # dilation_cycle: None vel: 1 # 1 -> 0 acc: 1 # 1 -> 0 vqvae_reverse_decoder_dilation: True ## below is special for emage rec_pos_weight : 1.0