# data settings
data:
  data_dir: []
  caption_proportion:
    prompt: 1
  external_caption_suffixes: []
  external_clipscore_suffixes: []
  clip_thr_temperature: 1.0
  clip_thr: 0.0
  sort_dataset: false
  load_text_feat: false
  load_vae_feat: false
  transform: default_train
  type: SanaWebDatasetMS
  image_size: 512
  hq_only: false
  valid_num: 0
# model settings
model:
  model: SanaMS_600M_P1_D28
  image_size: 512
  mixed_precision: fp16 # ['fp16', 'fp32', 'bf16']
  fp32_attention: true
  load_from:
  resume_from:
    checkpoint:
    load_ema: false
    resume_lr_scheduler: true
    resume_optimizer: true
  aspect_ratio_type: ASPECT_RATIO_1024
  multi_scale: true
  pe_interpolation: 1.0
  micro_condition: false
  attn_type: linear # 'flash', 'linear', 'vanilla', 'triton_linear'
  cross_norm: false
  autocast_linear_attn: false
  ffn_type: glumbconv
  mlp_acts:
    - silu
    - silu
    -
  mlp_ratio: 2.5
  use_pe: false
  qk_norm: false
  class_dropout_prob: 0.0
  linear_head_dim: 32
  # CFG & PAG settings
  cfg_scale: 4
  guidance_type: classifier-free
  pag_applied_layers: [14]
# text encoder settings
text_encoder:
  text_encoder_name: gemma-2-2b-it
  caption_channels: 2304
  y_norm: false
  y_norm_scale_factor: 1.0
  model_max_length: 300
  chi_prompt: []
# VAE settings
vae:
  vae_type: dc-ae
  vae_pretrained: mit-han-lab/dc-ae-f32c32-sana-1.0
  scale_factor: 0.41407
  vae_latent_dim: 32
  vae_downsample_rate: 32
  sample_posterior: true
# Scheduler settings
scheduler:
  train_sampling_steps: 1000
  predict_v: True
  noise_schedule: linear_flow
  pred_sigma: false
  flow_shift: 1.0
  weighting_scheme: logit_normal
  logit_mean: 0.0
  logit_std: 1.0
  vis_sampler: flow_dpm-solver
# training settings
train:
  num_workers: 4
  seed: 43
  train_batch_size: 32
  num_epochs: 100
  gradient_accumulation_steps: 1
  grad_checkpointing: false
  gradient_clip: 1.0
  gc_step: 1
  # optimizer settings
  optimizer:
    eps: 1.0e-10
    lr: 0.0001
    type: AdamW
    weight_decay: 0.03
  lr_schedule: constant
  lr_schedule_args:
    num_warmup_steps: 500
  auto_lr:
    rule: sqrt
  ema_rate: 0.9999
  eval_batch_size: 16
  use_fsdp: false
  use_flash_attn: false
  eval_sampling_steps: 250
  lora_rank: 4
  log_interval: 50
  mask_type: 'null'
  mask_loss_coef: 0.0
  load_mask_index: false
  snr_loss: false
  real_prompt_ratio: 1.0
  debug_nan: false
  # checkpoint settings
  save_image_epochs: 1
  save_model_epochs: 1
  save_model_steps: 1000000
  # visualization settings
  visualize: false
  null_embed_root: output/pretrained_models/
  valid_prompt_embed_root: output/tmp_embed/
  validation_prompts:
    - dog
    - portrait photo of a girl, photograph, highly detailed face, depth of field
    - Self-portrait oil painting, a beautiful cyborg with golden hair, 8k
    - Astronaut in a jungle, cold color palette, muted colors, detailed, 8k
    - A photo of beautiful mountain with realistic sunset and blue lake, highly detailed, masterpiece
  local_save_vis: false
  deterministic_validation: true
  online_metric: false
  eval_metric_step: 5000
  online_metric_dir: metric_helper
  # work dir settings
  work_dir: /cache/exps/
  skip_step: 0
  # LCM settings
  loss_type: huber
  huber_c: 0.001
  num_ddim_timesteps: 50
  w_max: 15.0
  w_min: 3.0
  ema_decay: 0.95