{ "architectures": [ "CLIPModel" ], "initializer_factor": 1.0, "logit_scale_init_value": 2.6592, "model_type": "clip", "projection_dim": 512, "text_config": { "heads": 8, "layers": 12, "model_type": "clip_text_model" }, "torch_dtype": "float32", "transformers_version": "4.34.0", "vision_config": { "model_type": "clip_vision_model", "patch_size": 16 } }