# device mode: sample # train sample gpu_ids: [3] # gpu ids batch_size: 1 # batch size each item denotes one story num_workers: 4 # number of workers num_cpu_cores: -1 # number of cpu cores seed: 0 # random seed ckpt_dir: /root/lihui/StoryVisualization/save_ckpt_epoch5_new # checkpoint directory run_name: ARLDM # name for this run # task dataset: pororo # pororo flintstones vistsis vistdii task: visualization # continuation visualization # train init_lr: 1e-5 # initial learning rate warmup_epochs: 1 # warmup epochs max_epochs: 5 #50 # max epochs train_model_file: /root/lihui/StoryVisualization/save_ckpt_3last50/ARLDM/last.ckpt # model file for resume, none for train from scratch freeze_clip: True #False # whether to freeze clip freeze_blip: True #False # whether to freeze blip freeze_resnet: True #False # whether to freeze resnet # sample test_model_file: /root/lihui/StoryVisualization/save_ckpt_3last50/ARLDM/last.ckpt # model file for test calculate_fid: True # whether to calculate FID scores scheduler: ddim # ddim pndm guidance_scale: 6 # guidance scale num_inference_steps: 250 # number of inference steps sample_output_dir: /root/lihui/StoryVisualization/save_samples_128_epoch50 # output directory pororo: hdf5_file: /root/lihui/StoryVisualization/pororo.h5 max_length: 85 new_tokens: [ "pororo", "loopy", "eddy", "harry", "poby", "tongtong", "crong", "rody", "petty" ] clip_embedding_tokens: 49416 blip_embedding_tokens: 30530 flintstones: hdf5_file: /path/to/flintstones.h5 max_length: 91 new_tokens: [ "fred", "barney", "wilma", "betty", "pebbles", "dino", "slate" ] clip_embedding_tokens: 49412 blip_embedding_tokens: 30525 vistsis: hdf5_file: /path/to/vist.h5 max_length: 100 clip_embedding_tokens: 49408 blip_embedding_tokens: 30524 vistdii: hdf5_file: /path/to/vist.h5 max_length: 65 clip_embedding_tokens: 49408 blip_embedding_tokens: 30524 hydra: run: dir: . output_subdir: null hydra/job_logging: disabled hydra/hydra_logging: disabled