ChatRex-7B / config.json
CRIS-Yang's picture
Model Initial Update 1
7b6241f verified
{
"architectures": [
"ChatRexAuxForConditionalGeneration"
],
"auto_map": {
"AutoConfig": "modeling_chatrex.ChatRexAuxConfig",
"AutoModelForCausalLM": "modeling_chatrex.ChatRexAuxForConditionalGeneration"
},
"ignore_index": -100,
"image_token_index": 32000,
"model_type": "chatrex",
"projector_depth": 2,
"projector_hidden_act": "gelu",
"text_config": {
"_name_or_path": "huggingface_checkpoints/lmsys/vicuna-7b-v1.5",
"architectures": [
"LlamaForCausalLM"
],
"max_position_embeddings": 4096,
"model_type": "llama",
"pad_token_id": 0,
"rms_norm_eps": 1e-05,
"torch_dtype": "bfloat16",
"vocab_size": 32104
},
"torch_dtype": "float32",
"transformers_version": "4.44.2",
"vision_aux_config": {
"optimize_vision_tower_aux": false,
"type": "OpenCLIPVisionTower",
"use_last_feat": true,
"vision_tower": "openclip-convnext-large-d-320-laion2B-s29B-b131K-ft-soup"
},
"vision_config": {
"_name_or_path": "huggingface_checkpoints/openai/clip-vit-large-patch14-336",
"dropout": 0.0,
"hidden_size": 1024,
"image_size": 336,
"intermediate_size": 4096,
"model_type": "clip_vision_model",
"num_attention_heads": 16,
"num_hidden_layers": 24,
"patch_size": 14,
"projection_dim": 768
},
"vision_feature_layer": -2,
"vision_feature_select_strategy": "default",
"visual_prompt_encoder_config": {
"add_pos_embedding": true,
"channel_per_level": [
192,
384,
768,
1536
],
"output_size": 7,
"pos_embedding_dim": 2880,
"spatail_scale": 0.25,
"type": "MultiLevelROIVisualPrompt",
"with_additional_projection": false
},
"visual_prompt_hidden_size": 2880
}