libra-11b-chat / vision_tokenizer_config.yaml
yifanxu
model version 1.0
c6b22dd
raw
history blame contribute delete
671 Bytes
freeze: True
max_vision_token_length: 578 # 24*24 (resolution) + 2 (<img> and <\img>); corresponding to model_config.max_vision_token_length, dataset_config.image_size
params:
embed_dim: 1024 # debug
ckpt_path: vqgan.ckpt
codebook_size: 512
num_codebook: 2
ddconfig:
# only_auto_encoder: True
encoder_name: openai-clip-vit-large-patch14-336
select_layer: [2,10,18,22]
double_z: False
z_channels: 1024
resolution: 336 # 336
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [ 1,1,2,4,8] # num_down = len(ch_mult)-1
num_res_blocks: 2
attn_resolutions: [24]
dropout: 0.0
initial_resolution: 24
num_attn_head: 8