pretrained_model_path: "./checkpoints/stable-diffusion-v1-4" output_dir: "./outputs/man-skiing" train_data: video_path: "data/man-skiing.mp4" prompt: "a man is skiing" n_sample_frames: 24 width: 512 height: 512 sample_start_idx: 0 sample_frame_rate: 2 validation_data: prompts: - "mickey mouse is skiing on the snow" - "spider man is skiing on the beach, cartoon style" - "wonder woman, wearing a cowboy hat, is skiing" - "a man, wearing pink clothes, is skiing at sunset" video_length: 24 width: 512 height: 512 num_inference_steps: 50 guidance_scale: 12.5 use_inv_latent: True num_inv_steps: 50 learning_rate: 3e-5 train_batch_size: 1 max_train_steps: 500 checkpointing_steps: 1000 validation_steps: 100 trainable_modules: - "attn1.to_q" - "attn2.to_q" - "attn_temp" seed: 33 mixed_precision: fp16 use_8bit_adam: False gradient_checkpointing: True enable_xformers_memory_efficient_attention: True