|
{ |
|
"model_cfg": { |
|
"embed_dim": 1152, |
|
"init_logit_bias": -10, |
|
"custom_text": true, |
|
"vision_cfg": { |
|
"image_size": 384, |
|
"timm_model_name": "vit_so400m_patch14_siglip_384", |
|
"timm_model_pretrained": false, |
|
"timm_pool": "map", |
|
"timm_proj": "none" |
|
}, |
|
"text_cfg": { |
|
"context_length": 64, |
|
"vocab_size": 32000, |
|
"hf_tokenizer_name": "timm/ViT-B-16-SigLIP", |
|
"tokenizer_kwargs": { |
|
"clean": "canonicalize" |
|
}, |
|
"width": 1152, |
|
"heads": 16, |
|
"layers": 27, |
|
"mlp_ratio": 3.7362, |
|
"no_causal_mask": true, |
|
"proj_bias": true, |
|
"pool_type": "last", |
|
"norm_kwargs": { |
|
"eps": 1e-06 |
|
} |
|
} |
|
}, |
|
"preprocess_cfg": { |
|
"mean": [ |
|
0.48145466, |
|
0.4578275, |
|
0.40821073 |
|
], |
|
"std": [ |
|
0.26862954, |
|
0.26130258, |
|
0.27577711 |
|
], |
|
"interpolation": "bicubic", |
|
"resize_mode": "shortest" |
|
} |
|
} |