{ "architectures": [ "CLIPModel" ], "initializer_factor": 1.0, "logit_scale_init_value": 2.6592, "model_type": "clip", "projection_dim": 512, "text_config": { "hidden_act": "gelu", "max_position_embeddings": 256, "model_type": "clip_text_model" }, "torch_dtype": "float32", "transformers_version": "4.35.2", "vision_config": { "hidden_act": "gelu", "image_size": 512, "model_type": "clip_vision_model", "patch_size": 16 } }