unet_additional_kwargs: use_pixel_encoder: false use_img_encoder: false use_inflated_groupnorm: true unet_use_cross_frame_attention: false unet_use_temporal_attention: false use_motion_module: true use_motion_resnet: false motion_module_resolutions: - 1 - 2 - 4 - 8 motion_module_mid_block: true motion_module_decoder_only: false motion_module_type: Vanilla motion_module_kwargs: num_attention_heads: 8 num_transformer_block: 1 attention_block_types: - Temporal_Self - Temporal_Self temporal_position_encoding: true temporal_attention_dim_div: 1 noise_scheduler_kwargs: beta_start: 0.00085 beta_end: 0.012 beta_schedule: "linear" original_inference_steps: 100 steps_offset: 1