model:
  use_celeb: True
  use_svd: True
  rm_repeats: True
  n_components: 512  # consistent with meta_inner_dim, should be <= n_samples-1
  use_sample_reduce: False
  n_samples: 513
  use_flatten: False
  num_embeds_per_token: 2  # consistent with personalization_config
  target: models.embedding_manager.EmbeddingManagerId
  params:
    linear_start: 0.00085
    linear_end: 0.0120
    num_timesteps_cond: 1
    log_every_t: 200
    timesteps: 1000
    first_stage_key: image
    cond_stage_key: caption
    image_size: 64
    channels: 4
    cond_stage_trainable: true   # Note: different from the one we trained before
    conditioning_key: crossattn
    monitor: val/loss_simple_ema
    scale_factor: 0.18215
    use_ema: False
    embedding_reg_weight: 0.0
    unfreeze_model: False
    model_lr: 0.0

  personalization_config:
    params:
      num_embeds_per_token: 2  # consistent with cond_stage_config
      mlp_depth: 2
      input_dim: 64
      token_dim: 1024
      loss_type: 'none'