unet_additional_kwargs: use_inflated_groupnorm: true use_motion_module: true motion_module_resolutions: [1,2,4,8] motion_module_mid_block: false motion_module_type: Vanilla motion_module_kwargs: num_attention_heads: 8 num_transformer_block: 1 attention_block_types: [ "Temporal_Self", "Temporal_Self" ] temporal_position_encoding: true temporal_position_encoding_max_len: 32 temporal_attention_dim_div: 1 zero_initialize: true noise_scheduler_kwargs: beta_start: 0.00085 beta_end: 0.012 beta_schedule: "linear" steps_offset: 1 clip_sample: False