{ "model_cfg": { "embed_dim": 512, "vision_cfg": { "timm_model_name": "vit_base_mci_224", "timm_model_pretrained": false, "timm_pool": "token", "timm_proj": null, "timm_drop": 0.0, "timm_drop_path": 0.0, "image_size": 224 }, "text_cfg": { "context_length": 77, "vocab_size": 49408, "width": 512, "heads": 8, "layers": 12, "no_causal_mask": false }, "custom_text": true }, "preprocess_cfg": { "mean": [ 0.0, 0.0, 0.0 ], "std": [ 1.0, 1.0, 1.0 ], "interpolation": "bilinear", "resize_mode": "shortest" } }