pretrained_model_path: /home/ubuntu/models/model_scope_diffusers/ output_dir: /home/ubuntu/outputs train_data: width: 1024 height: 576 use_bucketing: true sample_start_idx: 1 fps: 24 frame_step: 1 n_sample_frames: 24 single_video_path: path/to/single/video.mp4 single_video_prompt: '' fallback_prompt: '' path: path/to/folder/of/videos/ json_path: /home/ubuntu/all.json image_dir: path/to/image/directory single_img_prompt: '' validation_data: prompt: '' sample_preview: false num_frames: 16 width: 384 height: 384 num_inference_steps: 25 guidance_scale: 9 dataset_types: - json validation_steps: 500 extra_unet_params: null extra_text_encoder_params: null train_batch_size: 1 max_train_steps: 50000 learning_rate: 5.0e-06 scale_lr: false lr_scheduler: constant lr_warmup_steps: 0 adam_beta1: 0.9 adam_beta2: 0.999 adam_weight_decay: 0.01 adam_epsilon: 1.0e-08 max_grad_norm: 1.0 gradient_accumulation_steps: 1 checkpointing_steps: 5000 resume_from_checkpoint: null mixed_precision: fp16 use_8bit_adam: false enable_xformers_memory_efficient_attention: false enable_torch_2_attn: true seed: 64 extend_dataset: false cached_latent_dir: null use_unet_lora: false unet_lora_modules: - ResnetBlock2D text_encoder_lora_modules: - CLIPEncoderLayer lora_rank: 16 lora_path: '' kwargs: {} cache_latents: false gradient_checkpointing: true offset_noise_strength: 0.1 text_encoder_gradient_checkpointing: false train_text_encoder: false trainable_modules: - attn1 - attn2 - temp_conv trainable_text_modules: - all use_offset_noise: false use_text_lora: false