zzc0208's picture
Upload 265 files
f1f9265 verified
# data settings
data:
data_dir: []
caption_proportion:
prompt: 1
external_caption_suffixes: []
external_clipscore_suffixes: []
clip_thr_temperature: 1.0
clip_thr: 0.0
sort_dataset: false
load_text_feat: false
load_vae_feat: false
transform: default_train
type: SanaWebDatasetMS
image_size: 512
hq_only: false
valid_num: 0
# model settings
model:
model: SanaMS_600M_P1_D28
image_size: 512
mixed_precision: fp16 # ['fp16', 'fp32', 'bf16']
fp32_attention: true
load_from:
resume_from:
checkpoint:
load_ema: false
resume_lr_scheduler: true
resume_optimizer: true
aspect_ratio_type: ASPECT_RATIO_1024
multi_scale: true
pe_interpolation: 1.0
micro_condition: false
attn_type: linear # 'flash', 'linear', 'vanilla', 'triton_linear'
cross_norm: false
autocast_linear_attn: false
ffn_type: glumbconv
mlp_acts:
- silu
- silu
-
mlp_ratio: 2.5
use_pe: false
qk_norm: false
class_dropout_prob: 0.0
linear_head_dim: 32
# CFG & PAG settings
cfg_scale: 4
guidance_type: classifier-free
pag_applied_layers: [14]
# text encoder settings
text_encoder:
text_encoder_name: gemma-2-2b-it
caption_channels: 2304
y_norm: false
y_norm_scale_factor: 1.0
model_max_length: 300
chi_prompt: []
# VAE settings
vae:
vae_type: dc-ae
vae_pretrained: mit-han-lab/dc-ae-f32c32-sana-1.0
scale_factor: 0.41407
vae_latent_dim: 32
vae_downsample_rate: 32
sample_posterior: true
# Scheduler settings
scheduler:
train_sampling_steps: 1000
predict_v: True
noise_schedule: linear_flow
pred_sigma: false
flow_shift: 1.0
weighting_scheme: logit_normal
logit_mean: 0.0
logit_std: 1.0
vis_sampler: flow_dpm-solver
# training settings
train:
num_workers: 4
seed: 43
train_batch_size: 32
num_epochs: 100
gradient_accumulation_steps: 1
grad_checkpointing: false
gradient_clip: 1.0
gc_step: 1
# optimizer settings
optimizer:
eps: 1.0e-10
lr: 0.0001
type: AdamW
weight_decay: 0.03
lr_schedule: constant
lr_schedule_args:
num_warmup_steps: 500
auto_lr:
rule: sqrt
ema_rate: 0.9999
eval_batch_size: 16
use_fsdp: false
use_flash_attn: false
eval_sampling_steps: 250
lora_rank: 4
log_interval: 50
mask_type: 'null'
mask_loss_coef: 0.0
load_mask_index: false
snr_loss: false
real_prompt_ratio: 1.0
debug_nan: false
# checkpoint settings
save_image_epochs: 1
save_model_epochs: 1
save_model_steps: 1000000
# visualization settings
visualize: false
null_embed_root: output/pretrained_models/
valid_prompt_embed_root: output/tmp_embed/
validation_prompts:
- dog
- portrait photo of a girl, photograph, highly detailed face, depth of field
- Self-portrait oil painting, a beautiful cyborg with golden hair, 8k
- Astronaut in a jungle, cold color palette, muted colors, detailed, 8k
- A photo of beautiful mountain with realistic sunset and blue lake, highly detailed, masterpiece
local_save_vis: false
deterministic_validation: true
online_metric: false
eval_metric_step: 5000
online_metric_dir: metric_helper
# work dir settings
work_dir: /cache/exps/
skip_step: 0
# LCM settings
loss_type: huber
huber_c: 0.001
num_ddim_timesteps: 50
w_max: 15.0
w_min: 3.0
ema_decay: 0.95