ortha / experiments /single-concept /elsa /0022_elsa_ortho.yml
ujin-song's picture
upload experiments folder
4fba92e verified
raw
history blame
No virus
2.38 kB
# GENERATE TIME: Fri May 24 08:49:55 2024
# CMD:
# train_edlora.py -opt ortho_datasets/train_configs/ortho/0022_elsa_ortho.yml
name: 0022_elsa_ortho
manual_seed: 1022
mixed_precision: fp16
gradient_accumulation_steps: 1
# dataset and data loader settings
datasets:
train:
name: LoraDataset
concept_list: ortho_datasets/data_configs/elsa.json
use_caption: true
use_mask: true
instance_transform:
- { type: HumanResizeCropFinalV3, size: 512, crop_p: 0.5 }
- { type: ToTensor }
- { type: Normalize, mean: [ 0.5 ], std: [ 0.5 ] }
- { type: ShuffleCaption, keep_token_num: 1 }
- { type: EnhanceText, enhance_type: human }
replace_mapping:
<TOK>: <elsa1> <elsa2>
batch_size_per_gpu: 2
dataset_enlarge_ratio: 500
val_vis:
name: PromptDataset
prompts: datasets/validation_prompts/single-concept/characters/test_girl.txt
num_samples_per_prompt: 8
latent_size: [ 4,64,64 ]
replace_mapping:
<TOK>: <elsa1> <elsa2>
batch_size_per_gpu: 4
models:
pretrained_path: nitrosocke/mo-di-diffusion
enable_edlora: true # true means ED-LoRA, false means vanilla LoRA
finetune_cfg:
text_embedding:
enable_tuning: true
lr: !!float 1e-3
text_encoder:
enable_tuning: true
lora_cfg:
rank: 5
alpha: 1.0
where: CLIPAttention
lr: !!float 1e-5
unet:
enable_tuning: true
lora_cfg:
rank: 5
alpha: 1.0
where: Attention
lr: !!float 1e-4
new_concept_token: <elsa1>+<elsa2>
initializer_token: <rand-0.013>+man
noise_offset: 0.01
attn_reg_weight: 0.01
reg_full_identity: false
use_mask_loss: true
gradient_checkpoint: false
enable_xformers: true
# path
path:
pretrain_network: ~
# training settings
train:
optim_g:
type: AdamW
lr: !!float 0.0 # no use since we define different component lr in model
weight_decay: 0.01
betas: [ 0.9, 0.999 ] # align with taming
# dropkv
unet_kv_drop_rate: 0
scheduler: linear
emb_norm_threshold: !!float 5.5e-1
# validation settings
val:
val_during_save: true
compose_visualize: true
alpha_list: [0, 0.7, 1.0] # 0 means only visualize embedding (without lora weight)
sample:
num_inference_steps: 50
guidance_scale: 7.5
# logging settings
logger:
print_freq: 10
save_checkpoint_freq: !!float 10000