EMAGE / configs /camn.yaml
H-Liu1997's picture
Upload folder using huggingface_hub
2d47d90 verified
raw
history blame
No virus
1.8 kB
is_train: True
ddp: False
stat: ts
root_path: ./
out_path: ./outputs/audio2pose/
project: s2g
data_path: ./BEAT2/beat_english_v2.0.0/
e_path: weights/AESKConv_240_100.bin
eval_model: motion_representation
e_name: VAESKConv
test_ckpt: ./EMAGE/camn.bin
data_path_1: ./EMAGE/
vae_test_len: 64
vae_test_dim: 330
vae_test_stride: 20
vae_length: 240
vae_codebook_size: 256
vae_layer: 4
vae_grow: [1,1,2,1]
variational: False
# data config
training_speakers: [2] #[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
additional_data: False
cache_path: datasets/beat_cache/beat_smplx_en_camn/
dataset: beat_sep
new_cache: False
# motion config
ori_joints: beat_smplx_joints
tar_joints: beat_smplx_full
pose_rep: smplxflame_30
pose_norm: False
pose_fps: 15
rot6d: True
pre_frames: 4
pose_dims: 330
pose_length: 32
stride: 10
test_length: 32
motion_f: 256
m_pre_encoder: null
m_encoder: null
m_fix_pre: False
# audio config
audio_rep: wave16k
audio_sr: 16000
audio_fps: 16000
audio_norm: False
audio_f: 128
# a_pre_encoder: tcn_camn
# a_encoder: none
# a_fix_pre: False
# text config
word_rep: textgrid
word_index_num: 11195
word_dims: 300
freeze_wordembed: False
word_f: 128
t_pre_encoder: fasttext
t_encoder: null
t_fix_pre: False
# facial config
facial_rep: smplxflame_30
facial_dims: 100
facial_norm: False
facial_f: 64
f_pre_encoder: null
f_encoder: null
f_fix_pre: False
# speaker config
id_rep: onehot
speaker_f: 16
emo_rep: emo
emotion_f: 8
# sem_rep: sem
# model config
batch_size: 128
# warmup_epochs: 1
# warmup_lr: 1e-6
lr_base: 3e-4
model: camn
g_name: CaMN
d_name: ConvDiscriminator
trainer: camn
hidden_size: 512
n_layer: 4
rec_weight: 500
no_adv_epoch: 999
# rec_pos_weight: 1
# rec_ver_weight: 0
# rec_fac_weight: 1
# grad_norm: 1
epochs: 100
test_period: 20