{ "embed_dim": 512, "image_resolution": 224, "vision_layers": 12, "vision_width": 768, "vision_patch_size": 32, "context_length": 77, "vocab_size": 49408, "transformer_width": 512, "transformer_heads": 8, "transformer_layers": 12, "mean": [0.48145466, 0.4578275, 0.40821073], "std": [0.26862954, 0.26130258, 0.27577711] }