zejunyang commited on
Commit
558ddd8
1 Parent(s): 8b870cd
configs/inference/head_pose_temp/pose_temp.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed399e8a6dc5faab4eb98a676f211dbc108bddad5ec27164540cf25ad6d96818
3
+ size 8048
configs/inference/inference_audio.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a2m_model:
2
+ out_dim: 1404
3
+ latent_dim: 512
4
+ model_path: ./pretrained_model/wav2vec2-base-960h
5
+ only_last_fetures: True
6
+ from_pretrained: True
7
+
8
+ a2p_model:
9
+ out_dim: 6
10
+ latent_dim: 512
11
+ model_path: ./pretrained_model/wav2vec2-base-960h
12
+ only_last_fetures: True
13
+ from_pretrained: True
14
+
15
+ pretrained_model:
16
+ a2m_ckpt: ./pretrained_model/audio2mesh.pt
17
+ a2p_ckpt: ./pretrained_model/audio2pose.pt
configs/inference/inference_v2.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ unet_additional_kwargs:
2
+ use_inflated_groupnorm: true
3
+ unet_use_cross_frame_attention: false
4
+ unet_use_temporal_attention: false
5
+ use_motion_module: true
6
+ motion_module_resolutions:
7
+ - 1
8
+ - 2
9
+ - 4
10
+ - 8
11
+ motion_module_mid_block: true
12
+ motion_module_decoder_only: false
13
+ motion_module_type: Vanilla
14
+ motion_module_kwargs:
15
+ num_attention_heads: 8
16
+ num_transformer_block: 1
17
+ attention_block_types:
18
+ - Temporal_Self
19
+ - Temporal_Self
20
+ temporal_position_encoding: true
21
+ temporal_position_encoding_max_len: 32
22
+ temporal_attention_dim_div: 1
23
+
24
+ noise_scheduler_kwargs:
25
+ beta_start: 0.00085
26
+ beta_end: 0.012
27
+ beta_schedule: "linear"
28
+ clip_sample: false
29
+ steps_offset: 1
30
+ ### Zero-SNR params
31
+ prediction_type: "v_prediction"
32
+ rescale_betas_zero_snr: True
33
+ timestep_spacing: "trailing"
34
+
35
+ sampler: DDIM
configs/prompts/animation_audio.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pretrained_base_model_path: './pretrained_model/stable-diffusion-v1-5'
2
+ pretrained_vae_path: './pretrained_model/sd-vae-ft-mse'
3
+ image_encoder_path: './pretrained_model/image_encoder'
4
+
5
+ denoising_unet_path: "./pretrained_model/denoising_unet.pth"
6
+ reference_unet_path: "./pretrained_model/reference_unet.pth"
7
+ pose_guider_path: "./pretrained_model/pose_guider.pth"
8
+ motion_module_path: "./pretrained_model/motion_module.pth"
9
+
10
+ audio_inference_config: "./configs/inference/inference_audio.yaml"
11
+ inference_config: "./configs/inference/inference_v2.yaml"
12
+ weight_dtype: 'fp16'
13
+
14
+ pose_temp: "./configs/inference/head_pose_temp/pose_temp.npy"
configs/prompts/animation_facereenac.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pretrained_base_model_path: './pretrained_model/stable-diffusion-v1-5'
2
+ pretrained_vae_path: './pretrained_model/sd-vae-ft-mse'
3
+ image_encoder_path: './pretrained_model/image_encoder'
4
+
5
+ denoising_unet_path: "./pretrained_model/denoising_unet.pth"
6
+ reference_unet_path: "./pretrained_model/reference_unet.pth"
7
+ pose_guider_path: "./pretrained_model/pose_guider.pth"
8
+ motion_module_path: "./pretrained_model/motion_module.pth"
9
+
10
+ inference_config: "./configs/inference/inference_v2.yaml"
11
+ weight_dtype: 'fp16'