tracker_project_name: memo | |
output_dir: outputs | |
resume_from_checkpoint: null | |
model_name_or_path: memoavatar/memo | |
vae: stabilityai/sd-vae-ft-mse | |
gradient_checkpointing: true | |
gradient_accumulation_steps: 1 | |
train_batch_size: 2 | |
max_train_steps: 3500 | |
num_train_epochs: -1 | |
enable_xformers_memory_efficient_attention: true | |
checkpoints_total_limit: 3 | |
robust_training: true | |
learning_rate: 1e-5 | |
max_grad_norm: 1.0 | |
scale_lr: false | |
lr_scheduler: constant | |
lr_warmup_steps: 0 | |
seed: 42 | |
mixed_precision: bf16 | |
use_8bit_adam: false | |
allow_tf32: true | |
use_ema: false | |
adam_beta1: 0.9 | |
adam_beta2: 0.999 | |
adam_weight_decay: 0.01 | |
adam_epsilon: 1e-08 | |
dataloader_num_workers: 16 | |
prefetch_factor: 4 | |
checkpointing_steps: 5000 | |
data: | |
width: 512 | |
height: 512 | |
num_past_frames: 16 | |
dynamic_past_frames: false | |
n_sample_frames: 16 | |
audio_margin: 2 | |
metadata_paths: | |
- assets/embedding/metadata.jsonl | |
weighting_scheme: logit_normal | |
logit_mean: 0.0 | |
logit_std: 1.0 | |
mode_scale: 1.29 | |
noise_scheduler_kwargs: | |
num_train_timesteps: 1000 | |
train_reference_net: true | |
train_diffusion_net: true | |
train_image_proj: true | |
train_audio_proj: true | |
trainable_modules: | |
- to_q | |
- to_k | |
- to_v | |
uncond_img_ratio: 0.05 | |
uncond_audio_ratio: 0.05 | |
start_ratio: 0.05 |