Tongjilibo commited on
Commit
c93cbe4
·
1 Parent(s): ae6679e

增加qwenvl

Browse files
{BELLE-LLaMA-7B-2M-enc → BelleGroup/BELLE-LLaMA-7B-2M-enc}/bert4torch_config.json RENAMED
File without changes
Qwen/Qwen2-VL-2B-Instruct/bert4torch_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "qwen2_vl",
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 151643,
5
+ "eos_token_id": 151645,
6
+ "image_token_id": 151655,
7
+ "video_token_id": 151656,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 1536,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 8960,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 28,
14
+ "num_attention_heads": 12,
15
+ "num_hidden_layers": 28,
16
+ "num_key_value_heads": 2,
17
+ "layer_norm_eps": 1e-06,
18
+ "rope_theta": 1000000.0,
19
+ "sliding_window": 32768,
20
+ "tie_word_embeddings": true,
21
+ "torch_dtype": "bfloat16",
22
+ "_attn_implementation": "sdpa",
23
+ "use_sliding_window": false,
24
+ "skip_init": true,
25
+ "segment_vocab_size": 0,
26
+ "rope_rank": "updown",
27
+ "convert_lm_logits_dtype": "float32",
28
+ "generation_config": {
29
+ "tokenizer_config": {"skip_special_tokens": true},
30
+ "eos_token_id": [151643, 151645],
31
+ "max_length": 32768
32
+ },
33
+
34
+ "vision_start_token_id": 151652,
35
+ "vision_end_token_id": 151653,
36
+ "vision_token_id": 151654,
37
+ "vision_config": {
38
+ "depth": 32,
39
+ "embed_dim": 1280,
40
+ "mlp_ratio": 4,
41
+ "num_heads": 16,
42
+ "in_chans": 3,
43
+ "hidden_size": 1536,
44
+ "patch_size": 14,
45
+ "spatial_merge_size": 2,
46
+ "spatial_patch_size": 14,
47
+ "temporal_patch_size": 2,
48
+ "_attn_implementation_internal": null
49
+ },
50
+ "rope_scaling": {
51
+ "type": "mrope",
52
+ "mrope_section": [
53
+ 16,
54
+ 24,
55
+ 24
56
+ ]
57
+ },
58
+ "vocab_size": 151936
59
+ }
Qwen/Qwen2-VL-7B-Instruct/bert4torch_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "qwen2_vl",
3
+ "attention_dropout": 0.0,
4
+ "bos_token_id": 151643,
5
+ "eos_token_id": 151645,
6
+ "image_token_id": 151655,
7
+ "video_token_id": 151656,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 3584,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 18944,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 28,
14
+ "num_attention_heads": 28,
15
+ "num_hidden_layers": 28,
16
+ "num_key_value_heads": 4,
17
+ "layer_norm_eps": 1e-06,
18
+ "rope_theta": 1000000.0,
19
+ "sliding_window": 131072,
20
+ "tie_word_embeddings": false,
21
+ "torch_dtype": "bfloat16",
22
+ "_attn_implementation": "sdpa",
23
+ "use_sliding_window": false,
24
+ "skip_init": true,
25
+ "segment_vocab_size": 0,
26
+ "rope_rank": "updown",
27
+ "convert_lm_logits_dtype": "float32",
28
+ "generation_config": {
29
+ "tokenizer_config": {"skip_special_tokens": true},
30
+ "eos_token_id": [151643, 151645],
31
+ "max_length": 32768
32
+ },
33
+
34
+ "vision_start_token_id": 151652,
35
+ "vision_end_token_id": 151653,
36
+ "vision_token_id": 151654,
37
+ "vision_config": {
38
+ "depth": 32,
39
+ "embed_dim": 1280,
40
+ "mlp_ratio": 4,
41
+ "num_heads": 16,
42
+ "in_chans": 3,
43
+ "hidden_size": 3584,
44
+ "patch_size": 14,
45
+ "spatial_merge_size": 2,
46
+ "spatial_patch_size": 14,
47
+ "temporal_patch_size": 2,
48
+ "_attn_implementation_internal": null
49
+ },
50
+ "rope_scaling": {
51
+ "type": "mrope",
52
+ "mrope_section": [
53
+ 16,
54
+ 24,
55
+ 24
56
+ ]
57
+ },
58
+ "vocab_size": 152064
59
+ }
{chinese_alpaca_plus_7b → hfl/chinese_alpaca_plus_7b}/bert4torch_config.json RENAMED
File without changes
{chinese_llama_plus_7b → hfl/chinese_llama_plus_7b}/bert4torch_config.json RENAMED
File without changes
openbmb/MiniCPM-Llama3-V-2_5/bert4torch_config.json CHANGED
@@ -39,7 +39,6 @@
39
  "vision_config": {
40
  "output_attentions": false,
41
  "output_hidden_states": false,
42
- "use_return_dict": true,
43
  "attention_dropout": 0.0,
44
  "hidden_act": "gelu_pytorch_tanh",
45
  "hidden_size": 1152,
 
39
  "vision_config": {
40
  "output_attentions": false,
41
  "output_hidden_states": false,
 
42
  "attention_dropout": 0.0,
43
  "hidden_act": "gelu_pytorch_tanh",
44
  "hidden_size": 1152,