# data settings data: data_dir: [] caption_proportion: prompt: 1 external_caption_suffixes: [] external_clipscore_suffixes: [] clip_thr_temperature: 1.0 clip_thr: 0.0 sort_dataset: false load_text_feat: false load_vae_feat: false transform: default_train type: SanaWebDatasetMS image_size: 512 hq_only: false valid_num: 0 # model settings model: model: SanaMS_600M_P1_D28 image_size: 512 mixed_precision: fp16 # ['fp16', 'fp32', 'bf16'] fp32_attention: true load_from: resume_from: checkpoint: load_ema: false resume_lr_scheduler: true resume_optimizer: true aspect_ratio_type: ASPECT_RATIO_1024 multi_scale: true pe_interpolation: 1.0 micro_condition: false attn_type: linear # 'flash', 'linear', 'vanilla', 'triton_linear' cross_norm: false autocast_linear_attn: false ffn_type: glumbconv mlp_acts: - silu - silu - mlp_ratio: 2.5 use_pe: false qk_norm: false class_dropout_prob: 0.0 linear_head_dim: 32 # CFG & PAG settings cfg_scale: 4 guidance_type: classifier-free pag_applied_layers: [14] # text encoder settings text_encoder: text_encoder_name: gemma-2-2b-it caption_channels: 2304 y_norm: false y_norm_scale_factor: 1.0 model_max_length: 300 chi_prompt: [] # VAE settings vae: vae_type: dc-ae vae_pretrained: mit-han-lab/dc-ae-f32c32-sana-1.0 scale_factor: 0.41407 vae_latent_dim: 32 vae_downsample_rate: 32 sample_posterior: true # Scheduler settings scheduler: train_sampling_steps: 1000 predict_v: True noise_schedule: linear_flow pred_sigma: false flow_shift: 1.0 weighting_scheme: logit_normal logit_mean: 0.0 logit_std: 1.0 vis_sampler: flow_dpm-solver # training settings train: num_workers: 4 seed: 43 train_batch_size: 32 num_epochs: 100 gradient_accumulation_steps: 1 grad_checkpointing: false gradient_clip: 1.0 gc_step: 1 # optimizer settings optimizer: eps: 1.0e-10 lr: 0.0001 type: AdamW weight_decay: 0.03 lr_schedule: constant lr_schedule_args: num_warmup_steps: 500 auto_lr: rule: sqrt ema_rate: 0.9999 eval_batch_size: 16 use_fsdp: false use_flash_attn: false eval_sampling_steps: 250 lora_rank: 4 log_interval: 50 mask_type: 'null' mask_loss_coef: 0.0 load_mask_index: false snr_loss: false real_prompt_ratio: 1.0 debug_nan: false # checkpoint settings save_image_epochs: 1 save_model_epochs: 1 save_model_steps: 1000000 # visualization settings visualize: false null_embed_root: output/pretrained_models/ valid_prompt_embed_root: output/tmp_embed/ validation_prompts: - dog - portrait photo of a girl, photograph, highly detailed face, depth of field - Self-portrait oil painting, a beautiful cyborg with golden hair, 8k - Astronaut in a jungle, cold color palette, muted colors, detailed, 8k - A photo of beautiful mountain with realistic sunset and blue lake, highly detailed, masterpiece local_save_vis: false deterministic_validation: true online_metric: false eval_metric_step: 5000 online_metric_dir: metric_helper # work dir settings work_dir: /cache/exps/ skip_step: 0 # LCM settings loss_type: huber huber_c: 0.001 num_ddim_timesteps: 50 w_max: 15.0 w_min: 3.0 ema_decay: 0.95