stable-diffusion-v1-5 / config.yaml
Anyou's picture
Upload 11 files
4a6e43e
# device
mode: sample # train sample
gpu_ids: [3] # gpu ids
batch_size: 1 # batch size each item denotes one story
num_workers: 4 # number of workers
num_cpu_cores: -1 # number of cpu cores
seed: 0 # random seed
ckpt_dir: /root/lihui/StoryVisualization/save_ckpt_epoch5_new # checkpoint directory
run_name: ARLDM # name for this run
# task
dataset: pororo # pororo flintstones vistsis vistdii
task: visualization # continuation visualization
# train
init_lr: 1e-5 # initial learning rate
warmup_epochs: 1 # warmup epochs
max_epochs: 5 #50 # max epochs
train_model_file: /root/lihui/StoryVisualization/save_ckpt_3last50/ARLDM/last.ckpt # model file for resume, none for train from scratch
freeze_clip: True #False # whether to freeze clip
freeze_blip: True #False # whether to freeze blip
freeze_resnet: True #False # whether to freeze resnet
# sample
test_model_file: /root/lihui/StoryVisualization/save_ckpt_3last50/ARLDM/last.ckpt # model file for test
calculate_fid: True # whether to calculate FID scores
scheduler: ddim # ddim pndm
guidance_scale: 6 # guidance scale
num_inference_steps: 250 # number of inference steps
sample_output_dir: /root/lihui/StoryVisualization/save_samples_128_epoch50 # output directory
pororo:
hdf5_file: /root/lihui/StoryVisualization/pororo.h5
max_length: 85
new_tokens: [ "pororo", "loopy", "eddy", "harry", "poby", "tongtong", "crong", "rody", "petty" ]
clip_embedding_tokens: 49416
blip_embedding_tokens: 30530
flintstones:
hdf5_file: /path/to/flintstones.h5
max_length: 91
new_tokens: [ "fred", "barney", "wilma", "betty", "pebbles", "dino", "slate" ]
clip_embedding_tokens: 49412
blip_embedding_tokens: 30525
vistsis:
hdf5_file: /path/to/vist.h5
max_length: 100
clip_embedding_tokens: 49408
blip_embedding_tokens: 30524
vistdii:
hdf5_file: /path/to/vist.h5
max_length: 65
clip_embedding_tokens: 49408
blip_embedding_tokens: 30524
hydra:
run:
dir: .
output_subdir: null
hydra/job_logging: disabled
hydra/hydra_logging: disabled