Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,214 Bytes
1da48bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
is_train: True
ddp: False
stat: ts
root_path: ./
out_path: ./outputs/audio2pose/
project: s2g
data_path: ./datasets/BEAT_SMPL/beat_v2.0.0/beat_english_v2.0.0/
e_path: weights/AESKConv_240_100.bin
eval_model: motion_representation
e_name: VAESKConv
test_ckpt: ./ckpt/beatx2_cospeech_diffusion/last_500.bin
data_path_1: ./datasets/hub/
pose_norm: True
mean_pose_path: ./mean_std/beatx_2_330_mean.npy
std_pose_path: ./mean_std/beatx_2_330_std.npy
mean_trans_path: ./mean_std/beatx_2_trans_mean.npy
std_trans_path: ./mean_std/beatx_2_trans_std.npy
vqvae_upper_path: ./ckpt/beatx2_rvqvae/RVQVAE_upper/net_300000.pth
vqvae_hands_path: ./ckpt/beatx2_rvqvae/RVQVAE_hands/net_300000.pth
vqvae_lower_path: ./ckpt/beatx2_rvqvae/RVQVAE_lower/net_300000.pth
vqvae_lower_trans_path: ./ckpt/beatx2_rvqvae/RVQVAE_lower_trans/net_300000.pth
use_trans: True
decay_epoch: 500
vqvae_squeeze_scale: 4
vqvae_type: rvqvae
vqvae_latent_scale: 5
vae_test_len: 32
vae_test_dim: 330
vae_test_stride: 20
vae_length: 240
vae_codebook_size: 256
vae_layer: 4
vae_grow: [1,1,2,1]
variational: False
# data config
training_speakers: [2] #[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
additional_data: False
cache_path: datasets/beat_cache/beat_smplx_en_emage_2_128/
dataset: beat_sep_lower
new_cache: False
# motion config
ori_joints: beat_smplx_joints
tar_joints: beat_smplx_full
pose_rep: smplxflame_30
pose_fps: 30
rot6d: True
pre_frames: 4
pose_dims: 330
pose_length: 128
stride: 20
test_length: 128
motion_f: 256
m_pre_encoder: null
m_encoder: null
m_fix_pre: False
audio_rep: onset+amplitude
audio_sr: 16000
audio_fps: 16000
audio_norm: False
audio_f: 256
word_rep: textgrid
word_index_num: 11195
word_dims: 300
freeze_wordembed: False
word_f: 256
t_pre_encoder: fasttext
t_encoder: null
t_fix_pre: False
facial_rep: smplxflame_30
facial_dims: 100
facial_norm: False
facial_f: 0
f_pre_encoder: null
f_encoder: null
f_fix_pre: False
id_rep: onehot
speaker_f: 0
batch_size: 40
lr_base: 5e-5
model: denoiser
g_name: MDM
trainer: diffusion_rvqvae
hidden_size: 768
n_layer: 1
rec_weight: 1
grad_norm: 0.99
epochs: 2000
test_period: 20
ll: 3
lf: 3
lu: 3
lh: 3
cl: 1
cf: 0
cu: 1
ch: 1
|