{ "_name_or_path": "", "architectures": [ "MultiModalLLM_PT" ], "auto_map": { "AutoConfig": "model_config.VideoChatEConfig", "AutoModel": "modeling_videochate.MultiModalLLM_PT" }, "model_config": { "bridge": { "extra_num_query_token": 64, "name": "qformer", "num_query_token": 32, "qformer_attention_probs_dropout_prob": 0.1, "qformer_drop_path_rate": 0.2, "qformer_hidden_dropout_prob": 0.1 }, "freeze_bridge": false, "freeze_llm": false, "freeze_vision_encoder": false, "llm": { "lora_alpha": 32, "lora_dropout": 0.1, "lora_r": 16, "name": "mistral_7b", "pretrained_llm_path": "mistralai/Mistral-7B-Instruct-v0.3", "use_lora": true, "hidden_size": 4096 }, "loss": { "use_vision_regression_loss": false }, "pretrained_paths": {}, "vision_encoder": { "name":"vit_l14", "img_size":224, "patch_size":16, "d_model":1024, "encoder_embed_dim":1024, "encoder_depth":24, "encoder_num_heads":16, "drop_path_rate": 0.0, "num_frames":16, "tubelet_size":1, "use_checkpoint":false, "checkpoint_num":0, "return_index":-2, "vit_add_ln":true, "pretrained": null } }, "torch_dtype": "float32", "transformers_version": "4.38.0", "use_flash_attention": true, "use_cache": true, "build_decoder":true, "hidden_size": 4096 }