{ "architectures": [ "Blip3ModelForConditionalGeneration" ], "auto_map": { "AutoConfig": "configuration_blip_3.Blip3Config", "AutoModelForVision2Seq": "modeling_blip_3.Blip3ModelForConditionalGeneration" }, "model_type": "blip_3", "text_config": { "initial_tokenizer_len": 32012, "model_type": "phi3", "sliding_window": 2047, "torch_dtype": "bfloat16" }, "torch_dtype": "float32", "transformers_version": "4.41.0.dev0", "vision_encoder_config": { "anyres_patch_sampling": true, "image_aspect_ratio": "anyres", "model_type": "blip_3_vision_encoder" }, "vision_tokenizer_config": { "model_type": "blip_3_vision_tokenizer" } }