zwd973-deepseek commited on
Commit
7c0fdaa
1 Parent(s): 487c5e7

initial commit

Browse files
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_deepseek.DeepseekConfig",
9
+ "AutoModel": "modeling_deepseek.DeepseekModel",
10
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekForCausalLM"
11
+ },
12
+ "bos_token_id": 100000,
13
+ "eos_token_id": 100001,
14
+ "first_k_dense_replace": 1,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 2048,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 10944,
19
+ "max_position_embeddings": 4096,
20
+ "model_type": "deepseek",
21
+ "moe_intermediate_size": 1408,
22
+ "moe_layer_freq": 1,
23
+ "n_routed_experts": 64,
24
+ "n_shared_experts": 2,
25
+ "norm_topk_prob": false,
26
+ "num_attention_heads": 16,
27
+ "num_experts_per_tok": 6,
28
+ "num_hidden_layers": 28,
29
+ "num_key_value_heads": 16,
30
+ "pretraining_tp": 1,
31
+ "rms_norm_eps": 1e-06,
32
+ "rope_scaling": null,
33
+ "rope_theta": 10000,
34
+ "scoring_func": "softmax",
35
+ "tie_word_embeddings": false,
36
+ "torch_dtype": "bfloat16",
37
+ "transformers_version": "4.36.0",
38
+ "use_cache": true,
39
+ "vocab_size": 102400
40
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 100000,
4
+ "eos_token_id": 100001,
5
+ "transformers_version": "4.36.2"
6
+ }
model-00001-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90b014e0c17f625e69057e97a7e7854e157f5213ba6e422f1bbbb4acf73b6ef1
3
+ size 4996067952
model-00002-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a35ec5893767f38413ebbeb71d161293a26af0d4bdcfe8933d646b047b15e4a5
3
+ size 4997398120
model-00003-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2c65b9540872d11d0b50accc05163e8eabcf385186122d02d58aa3c2c42b1ea
3
+ size 4996079584
model-00004-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fce5415ba1bbbc5fe7dcb5a8f4c8e9aba55daaed9ecef7f9a728e73eaf407b4
3
+ size 4997128672
model-00005-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7da53a8b5f088c14ca9765f3ce92da06392281c5f4cc75b1a76f403af6950f2
3
+ size 4997128672
model-00006-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89dc70cfd1fb49d07e0914d62693cb66875b9526103b72f37b122f4931bb1722
3
+ size 4997398984
model-00007-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f9373cf2861f95f7dd5448b120c47c2b18f722931fed885690dfbdc7d236728
3
+ size 2770933048
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|begin▁of▁sentence|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|end▁of▁sentence|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "legacy": true,
22
+ "model_max_length": 16384,
23
+ "pad_token": {
24
+ "__type": "AddedToken",
25
+ "content": "<|end▁of▁sentence|>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ "sp_model_kwargs": {},
32
+ "unk_token": null,
33
+ "tokenizer_class": "LlamaTokenizerFast"
34
+ }