ramu0e commited on
Commit
2db96cc
·
verified ·
1 Parent(s): 89c4322

Training in progress, step 20

Browse files
config.json ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "deepseek-ai/deepseek-vl2-tiny",
3
+ "architectures": [
4
+ "DeepseekVLV2ForCausalLM"
5
+ ],
6
+ "candidate_resolutions": [
7
+ [
8
+ 384,
9
+ 384
10
+ ],
11
+ [
12
+ 384,
13
+ 768
14
+ ],
15
+ [
16
+ 768,
17
+ 384
18
+ ],
19
+ [
20
+ 384,
21
+ 1152
22
+ ],
23
+ [
24
+ 1152,
25
+ 384
26
+ ],
27
+ [
28
+ 384,
29
+ 1536
30
+ ],
31
+ [
32
+ 1536,
33
+ 384
34
+ ],
35
+ [
36
+ 768,
37
+ 768
38
+ ],
39
+ [
40
+ 384,
41
+ 1920
42
+ ],
43
+ [
44
+ 1920,
45
+ 384
46
+ ],
47
+ [
48
+ 384,
49
+ 2304
50
+ ],
51
+ [
52
+ 2304,
53
+ 384
54
+ ],
55
+ [
56
+ 768,
57
+ 1152
58
+ ],
59
+ [
60
+ 1152,
61
+ 768
62
+ ],
63
+ [
64
+ 384,
65
+ 2688
66
+ ],
67
+ [
68
+ 2688,
69
+ 384
70
+ ],
71
+ [
72
+ 384,
73
+ 3072
74
+ ],
75
+ [
76
+ 3072,
77
+ 384
78
+ ],
79
+ [
80
+ 768,
81
+ 1536
82
+ ],
83
+ [
84
+ 1536,
85
+ 768
86
+ ],
87
+ [
88
+ 384,
89
+ 3456
90
+ ],
91
+ [
92
+ 3456,
93
+ 384
94
+ ],
95
+ [
96
+ 1152,
97
+ 1152
98
+ ]
99
+ ],
100
+ "global_view_pos": "head",
101
+ "language_config": {
102
+ "_attn_implementation_autoset": true,
103
+ "architectures": [
104
+ "DeepseekV2ForCausalLM"
105
+ ],
106
+ "auto_map": {
107
+ "AutoConfig": "configuration_deepseek.DeepseekV2Config",
108
+ "AutoModel": "modeling_deepseek.DeepseekV2Model",
109
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV2ForCausalLM"
110
+ },
111
+ "bos_token_id": 0,
112
+ "eos_token_id": 1,
113
+ "first_k_dense_replace": 1,
114
+ "hidden_size": 1280,
115
+ "intermediate_size": 6848,
116
+ "kv_lora_rank": null,
117
+ "lm_head": true,
118
+ "max_position_embeddings": 4096,
119
+ "model_type": "deepseek_v2",
120
+ "moe_intermediate_size": 896,
121
+ "n_group": 1,
122
+ "n_routed_experts": 64,
123
+ "n_shared_experts": 2,
124
+ "num_attention_heads": 10,
125
+ "num_experts_per_tok": 6,
126
+ "num_hidden_layers": 12,
127
+ "num_key_value_heads": 10,
128
+ "q_lora_rank": null,
129
+ "qk_nope_head_dim": 0,
130
+ "qk_rope_head_dim": 0,
131
+ "rm_head": false,
132
+ "topk_group": 1,
133
+ "topk_method": "greedy",
134
+ "torch_dtype": "bfloat16",
135
+ "use_mla": false,
136
+ "v_head_dim": 0,
137
+ "vocab_size": 129280
138
+ },
139
+ "model_type": "deepseek_vl_v2",
140
+ "projector_config": {
141
+ "model_type": "mlp_projector",
142
+ "n_embed": 1280
143
+ },
144
+ "tile_tag": "2D",
145
+ "torch_dtype": "bfloat16",
146
+ "transformers_version": "4.47.0",
147
+ "vision_config": {
148
+ "layers": 27,
149
+ "mlp_ratio": 3.7362,
150
+ "model_name": "siglip_so400m_patch14_384",
151
+ "model_type": "vision",
152
+ "patch_size": 14,
153
+ "width": 1152
154
+ }
155
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dffe2419820d047290d8198c122d1742190bca5423bcb5e284c8cd35369c26f
3
+ size 4998791376
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:265a427b804a1a362ac68816ef5de51812a402cfca763814ac2d59a697d6fc27
3
+ size 1742542000
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|User|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|Assistant|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": {
19
+ "content": "<|begin▁of▁sentence|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "eos_token": {
26
+ "content": "<|end▁of▁sentence|>",
27
+ "lstrip": false,
28
+ "normalized": false,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ },
32
+ "pad_token": {
33
+ "content": "<|▁pad▁|>",
34
+ "lstrip": false,
35
+ "normalized": false,
36
+ "rstrip": false,
37
+ "single_word": false
38
+ }
39
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2cf0e3331c5f0423ce96f9e49fe6108503de3ca67688a65518161a20f0b12ef
3
+ size 5179