|
image_finetune: true |
|
|
|
output_dir: "outputs" |
|
pretrained_model_path: "models/StableDiffusion/stable-diffusion-v1-5" |
|
|
|
noise_scheduler_kwargs: |
|
num_train_timesteps: 1000 |
|
beta_start: 0.00085 |
|
beta_end: 0.012 |
|
beta_schedule: "scaled_linear" |
|
steps_offset: 1 |
|
clip_sample: false |
|
|
|
train_data: |
|
csv_path: "/mnt/petrelfs/guoyuwei/projects/datasets/webvid/results_2M_val.csv" |
|
video_folder: "/mnt/petrelfs/guoyuwei/projects/datasets/webvid/2M_val" |
|
sample_size: 256 |
|
|
|
validation_data: |
|
prompts: |
|
- "Snow rocky mountains peaks canyon. Snow blanketed rocky mountains surround and shadow deep canyons." |
|
- "A drone view of celebration with Christma tree and fireworks, starry sky - background." |
|
- "Robot dancing in times square." |
|
- "Pacific coast, carmel by the sea ocean and waves." |
|
num_inference_steps: 25 |
|
guidance_scale: 8. |
|
|
|
trainable_modules: |
|
- "." |
|
|
|
unet_checkpoint_path: "" |
|
|
|
learning_rate: 1.e-5 |
|
train_batch_size: 50 |
|
|
|
max_train_epoch: -1 |
|
max_train_steps: 100 |
|
checkpointing_epochs: -1 |
|
checkpointing_steps: 60 |
|
|
|
validation_steps: 5000 |
|
validation_steps_tuple: [2, 50] |
|
|
|
global_seed: 42 |
|
mixed_precision_training: true |
|
enable_xformers_memory_efficient_attention: True |
|
|
|
is_debug: False |
|
|