Spaces:
Runtime error
Runtime error
File size: 3,308 Bytes
f1f9265 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
# data settings
data:
data_dir: []
caption_proportion:
prompt: 1
external_caption_suffixes: []
external_clipscore_suffixes: []
clip_thr_temperature: 1.0
clip_thr: 0.0
sort_dataset: false
load_text_feat: false
load_vae_feat: false
transform: default_train
type: SanaWebDatasetMS
image_size: 512
hq_only: false
valid_num: 0
# model settings
model:
model: SanaMS_600M_P1_D28
image_size: 512
mixed_precision: fp16 # ['fp16', 'fp32', 'bf16']
fp32_attention: true
load_from:
resume_from:
checkpoint:
load_ema: false
resume_lr_scheduler: true
resume_optimizer: true
aspect_ratio_type: ASPECT_RATIO_1024
multi_scale: true
pe_interpolation: 1.0
micro_condition: false
attn_type: linear # 'flash', 'linear', 'vanilla', 'triton_linear'
cross_norm: false
autocast_linear_attn: false
ffn_type: glumbconv
mlp_acts:
- silu
- silu
-
mlp_ratio: 2.5
use_pe: false
qk_norm: false
class_dropout_prob: 0.0
linear_head_dim: 32
# CFG & PAG settings
cfg_scale: 4
guidance_type: classifier-free
pag_applied_layers: [14]
# text encoder settings
text_encoder:
text_encoder_name: gemma-2-2b-it
caption_channels: 2304
y_norm: false
y_norm_scale_factor: 1.0
model_max_length: 300
chi_prompt: []
# VAE settings
vae:
vae_type: dc-ae
vae_pretrained: mit-han-lab/dc-ae-f32c32-sana-1.0
scale_factor: 0.41407
vae_latent_dim: 32
vae_downsample_rate: 32
sample_posterior: true
# Scheduler settings
scheduler:
train_sampling_steps: 1000
predict_v: True
noise_schedule: linear_flow
pred_sigma: false
flow_shift: 1.0
weighting_scheme: logit_normal
logit_mean: 0.0
logit_std: 1.0
vis_sampler: flow_dpm-solver
# training settings
train:
num_workers: 4
seed: 43
train_batch_size: 32
num_epochs: 100
gradient_accumulation_steps: 1
grad_checkpointing: false
gradient_clip: 1.0
gc_step: 1
# optimizer settings
optimizer:
eps: 1.0e-10
lr: 0.0001
type: AdamW
weight_decay: 0.03
lr_schedule: constant
lr_schedule_args:
num_warmup_steps: 500
auto_lr:
rule: sqrt
ema_rate: 0.9999
eval_batch_size: 16
use_fsdp: false
use_flash_attn: false
eval_sampling_steps: 250
lora_rank: 4
log_interval: 50
mask_type: 'null'
mask_loss_coef: 0.0
load_mask_index: false
snr_loss: false
real_prompt_ratio: 1.0
debug_nan: false
# checkpoint settings
save_image_epochs: 1
save_model_epochs: 1
save_model_steps: 1000000
# visualization settings
visualize: false
null_embed_root: output/pretrained_models/
valid_prompt_embed_root: output/tmp_embed/
validation_prompts:
- dog
- portrait photo of a girl, photograph, highly detailed face, depth of field
- Self-portrait oil painting, a beautiful cyborg with golden hair, 8k
- Astronaut in a jungle, cold color palette, muted colors, detailed, 8k
- A photo of beautiful mountain with realistic sunset and blue lake, highly detailed, masterpiece
local_save_vis: false
deterministic_validation: true
online_metric: false
eval_metric_step: 5000
online_metric_dir: metric_helper
# work dir settings
work_dir: /cache/exps/
skip_step: 0
# LCM settings
loss_type: huber
huber_c: 0.001
num_ddim_timesteps: 50
w_max: 15.0
w_min: 3.0
ema_decay: 0.95
|