{ "embed_dim": 512, "vision_cfg": { "image_size": 256, "layers": 12, "width": 768, "patch_size": 32, "global_average_pool": true }, "text_cfg": { "context_length": 77, "vocab_size": 49408, "width": 512, "heads": 8, "layers": 12 } }