| unet_additional_kwargs: | |
| unet_use_cross_frame_attention: false | |
| unet_use_temporal_attention: false | |
| use_motion_module: true | |
| motion_module_resolutions: | |
| - 1 | |
| - 2 | |
| - 4 | |
| - 8 | |
| motion_module_mid_block: false | |
| motion_module_decoder_only: false | |
| motion_module_type: Vanilla | |
| motion_module_kwargs: | |
| num_attention_heads: 8 | |
| num_transformer_block: 1 | |
| attention_block_types: | |
| - Temporal_Self | |
| - Temporal_Self | |
| temporal_position_encoding: true | |
| temporal_position_encoding_max_len: 24 | |
| temporal_attention_dim_div: 1 | |
| noise_scheduler_kwargs: | |
| beta_start: 0.00085 | |
| beta_end: 0.012 | |
| beta_schedule: "linear" | |