pretrained_model_path: CompVis/stable-diffusion-v1-4 | |
output_dir: /content/Tune-A-Video/outputs/city | |
train_data: | |
video_path: /content/Tune-A-Video/data/city.mp4 | |
prompt: A 360 degree view video of a street with Skies, Walls, Roads, Poles, Buildings, | |
Riders, Sidewalks, People, Traffic signs, Bicycles and Cars | |
n_sample_frames: 24 | |
width: 512 | |
height: 512 | |
sample_start_idx: 0 | |
sample_frame_rate: 1 | |
validation_data: | |
prompts: | |
- A 360 degree view video of a street with Skies, Walls, Roads, Poles, Buildings, | |
Riders, Sidewalks, People, Traffic signs, Bicycles and Cars | |
video_length: 24 | |
width: 512 | |
height: 512 | |
num_inference_steps: 50 | |
guidance_scale: 12.5 | |
use_inv_latent: true | |
num_inv_steps: 50 | |
validation_steps: 1 | |
trainable_modules: | |
- attn1.to_q | |
- attn2.to_q | |
- attn_temp | |
train_batch_size: 1 | |
max_train_steps: 5 | |
learning_rate: 3.0e-05 | |
scale_lr: false | |
lr_scheduler: constant | |
lr_warmup_steps: 0 | |
adam_beta1: 0.9 | |
adam_beta2: 0.999 | |
adam_weight_decay: 0.01 | |
adam_epsilon: 1.0e-08 | |
max_grad_norm: 1.0 | |
gradient_accumulation_steps: 1 | |
gradient_checkpointing: true | |
checkpointing_steps: 5 | |
resume_from_checkpoint: null | |
mixed_precision: fp16 | |
use_8bit_adam: false | |
enable_xformers_memory_efficient_attention: true | |
seed: 33 | |