File size: 2,041 Bytes
4a6e43e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# device
mode: sample  # train sample
gpu_ids: [3]  # gpu ids
batch_size: 1  # batch size each item denotes one story
num_workers: 4  # number of workers
num_cpu_cores: -1  # number of cpu cores
seed: 0  # random seed
ckpt_dir: /root/lihui/StoryVisualization/save_ckpt_epoch5_new # checkpoint directory
run_name: ARLDM # name for this run

# task
dataset: pororo  # pororo flintstones vistsis vistdii
task: visualization  # continuation visualization

# train
init_lr: 1e-5  # initial learning rate
warmup_epochs: 1  # warmup epochs
max_epochs: 5   #50  # max epochs
train_model_file: /root/lihui/StoryVisualization/save_ckpt_3last50/ARLDM/last.ckpt # model file for resume, none for train from scratch
freeze_clip: True  #False  # whether to freeze clip
freeze_blip: True  #False  # whether to freeze blip
freeze_resnet: True  #False  # whether to freeze resnet

# sample
test_model_file: /root/lihui/StoryVisualization/save_ckpt_3last50/ARLDM/last.ckpt # model file for test
calculate_fid: True  # whether to calculate FID scores
scheduler: ddim  # ddim pndm
guidance_scale: 6  # guidance scale
num_inference_steps: 250  # number of inference steps
sample_output_dir: /root/lihui/StoryVisualization/save_samples_128_epoch50 # output directory

pororo:
  hdf5_file: /root/lihui/StoryVisualization/pororo.h5
  max_length: 85
  new_tokens: [ "pororo", "loopy", "eddy", "harry", "poby", "tongtong", "crong", "rody", "petty" ]
  clip_embedding_tokens: 49416
  blip_embedding_tokens: 30530

flintstones:
  hdf5_file: /path/to/flintstones.h5
  max_length: 91
  new_tokens: [ "fred", "barney", "wilma", "betty", "pebbles", "dino", "slate" ]
  clip_embedding_tokens: 49412
  blip_embedding_tokens: 30525

vistsis:
  hdf5_file: /path/to/vist.h5
  max_length: 100
  clip_embedding_tokens: 49408
  blip_embedding_tokens: 30524

vistdii:
  hdf5_file: /path/to/vist.h5
  max_length: 65
  clip_embedding_tokens: 49408
  blip_embedding_tokens: 30524

hydra:
  run:
    dir: .
  output_subdir: null
hydra/job_logging: disabled
hydra/hydra_logging: disabled