pretrained_model_path: /home/ubuntu/models/model_scope_diffusers/ | |
output_dir: /home/ubuntu/outputs | |
train_data: | |
width: 1024 | |
height: 576 | |
use_bucketing: true | |
sample_start_idx: 1 | |
fps: 24 | |
frame_step: 1 | |
n_sample_frames: 20 | |
json_path: /home/ubuntu/Video-BLIP2-Preprocessor/train_data/my_videos.json | |
validation_data: | |
prompt: duck | |
sample_preview: false | |
num_frames: 16 | |
width: 1024 | |
height: 576 | |
num_inference_steps: 25 | |
guidance_scale: 9 | |
dataset_types: | |
- json | |
validation_steps: 500 | |
extra_unet_params: null | |
extra_text_encoder_params: null | |
train_batch_size: 1 | |
max_train_steps: 50000 | |
learning_rate: 5.0e-06 | |
scale_lr: false | |
lr_scheduler: constant | |
lr_warmup_steps: 0 | |
adam_beta1: 0.9 | |
adam_beta2: 0.999 | |
adam_weight_decay: 0.01 | |
adam_epsilon: 1.0e-08 | |
max_grad_norm: 1.0 | |
gradient_accumulation_steps: 1 | |
checkpointing_steps: 5000 | |
resume_from_checkpoint: null | |
mixed_precision: fp16 | |
use_8bit_adam: false | |
enable_xformers_memory_efficient_attention: false | |
enable_torch_2_attn: true | |
seed: 64 | |
extend_dataset: false | |
cached_latent_dir: null | |
use_unet_lora: false | |
unet_lora_modules: | |
- ResnetBlock2D | |
text_encoder_lora_modules: | |
- CLIPEncoderLayer | |
lora_rank: 16 | |
lora_path: '' | |
kwargs: {} | |
cache_latents: false | |
gradient_checkpointing: true | |
offset_noise_strength: 0.1 | |
text_encoder_gradient_checkpointing: false | |
train_text_encoder: true | |
trainable_modules: | |
- attn1 | |
- attn2 | |
- temp_conv | |
trainable_text_modules: | |
- all | |
use_offset_noise: false | |
use_text_lora: false | |