image_finetune: true output_dir: "outputs" pretrained_model_path: "models/StableDiffusion/stable-diffusion-v1-5" noise_scheduler_kwargs: num_train_timesteps: 1000 beta_start: 0.00085 beta_end: 0.012 beta_schedule: "scaled_linear" steps_offset: 1 clip_sample: false train_data: csv_path: "/mnt/petrelfs/guoyuwei/projects/datasets/webvid/results_2M_val.csv" video_folder: "/mnt/petrelfs/guoyuwei/projects/datasets/webvid/2M_val" sample_size: 256 validation_data: prompts: - "Snow rocky mountains peaks canyon. Snow blanketed rocky mountains surround and shadow deep canyons." - "A drone view of celebration with Christma tree and fireworks, starry sky - background." - "Robot dancing in times square." - "Pacific coast, carmel by the sea ocean and waves." num_inference_steps: 25 guidance_scale: 8. trainable_modules: - "." unet_checkpoint_path: "" learning_rate: 1.e-5 train_batch_size: 50 max_train_epoch: -1 max_train_steps: 100 checkpointing_epochs: -1 checkpointing_steps: 60 validation_steps: 5000 validation_steps_tuple: [2, 50] global_seed: 42 mixed_precision_training: true enable_xformers_memory_efficient_attention: True is_debug: False