Upload folder using huggingface_hub
Browse files- models/neuralmagic/mpt-7b-gsm8k-pt/fp32/args.json +6 -0
- models/neuralmagic/mpt-7b-gsm8k-pt/fp32/config.json +49 -0
- models/neuralmagic/mpt-7b-gsm8k-pt/fp32/openvino_model.bin.part-aa +3 -0
- models/neuralmagic/mpt-7b-gsm8k-pt/fp32/openvino_model.bin.part-ab +3 -0
- models/neuralmagic/mpt-7b-gsm8k-pt/fp32/openvino_model.bin.part-ac +3 -0
- models/neuralmagic/mpt-7b-gsm8k-pt/fp32/openvino_model.xml +0 -0
- models/neuralmagic/mpt-7b-gsm8k-pt/fp32/ov_weights_type.md +197 -0
- models/neuralmagic/mpt-7b-gsm8k-pt/fp32/special_tokens_map.json +6 -0
- models/neuralmagic/mpt-7b-gsm8k-pt/fp32/tokenizer.json +0 -0
- models/neuralmagic/mpt-7b-gsm8k-pt/fp32/tokenizer_config.json +212 -0
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/args.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_id": "neuralmagic/mpt-7b-gsm8k-pt",
|
3 |
+
"run_name": "fp32",
|
4 |
+
"quant_mode": "original",
|
5 |
+
"force_run": true
|
6 |
+
}
|
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/config.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "neuralmagic/mpt-7b-gsm8k-pt",
|
3 |
+
"architectures": [
|
4 |
+
"MPTForCausalLM"
|
5 |
+
],
|
6 |
+
"attn_config": {
|
7 |
+
"model_type": ""
|
8 |
+
},
|
9 |
+
"auto_map": {
|
10 |
+
"AutoConfig": "neuralmagic/mpt-7b-gsm8k-pt--configuration_mpt.MPTConfig",
|
11 |
+
"AutoModelForCausalLM": "neuralmagic/mpt-7b-gsm8k-pt--modeling_mpt.MPTForCausalLM"
|
12 |
+
},
|
13 |
+
"d_model": 4096,
|
14 |
+
"emb_pdrop": 0,
|
15 |
+
"embedding_fraction": 1.0,
|
16 |
+
"expansion_ratio": 4,
|
17 |
+
"init_config": {
|
18 |
+
"emb_init_std": null,
|
19 |
+
"emb_init_uniform_lim": null,
|
20 |
+
"fan_mode": "fan_in",
|
21 |
+
"init_div_is_residual": true,
|
22 |
+
"init_gain": 0,
|
23 |
+
"init_nonlinearity": "relu",
|
24 |
+
"init_std": 0.02,
|
25 |
+
"name": "kaiming_normal_",
|
26 |
+
"verbose": 0
|
27 |
+
},
|
28 |
+
"init_device": "cpu",
|
29 |
+
"initializer_range": 0.02,
|
30 |
+
"is_decoder": true,
|
31 |
+
"layer_norm_epsilon": 1e-05,
|
32 |
+
"learned_pos_emb": true,
|
33 |
+
"logit_scale": null,
|
34 |
+
"max_seq_len": 2048,
|
35 |
+
"model_type": "mpt",
|
36 |
+
"n_heads": 32,
|
37 |
+
"n_layers": 32,
|
38 |
+
"no_bias": true,
|
39 |
+
"norm_type": "low_precision_layernorm",
|
40 |
+
"resid_pdrop": 0,
|
41 |
+
"tie_weights": false,
|
42 |
+
"tie_word_embeddings": false,
|
43 |
+
"tokenizer_name": "EleutherAI/gpt-neox-20b",
|
44 |
+
"torch_dtype": "bfloat16",
|
45 |
+
"transformers_version": "4.34.1",
|
46 |
+
"use_cache": true,
|
47 |
+
"verbose": 0,
|
48 |
+
"vocab_size": 50432
|
49 |
+
}
|
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/openvino_model.bin.part-aa
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd953e68651fa7c5998daa8855d9253344e3de2c4b2ce696b7bf5b2411d783cd
|
3 |
+
size 9663676416
|
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/openvino_model.bin.part-ab
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36cdaba54ad1c43273fa4d5505b31bd1ad198a208ac4275b77fd412ca6d49aff
|
3 |
+
size 9663676416
|
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/openvino_model.bin.part-ac
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f872652fa4415118fbea7b0617fb1a2f08b211ecc3344d2c70dbe6f2438ab485
|
3 |
+
size 7270056209
|
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/openvino_model.xml
ADDED
The diff for this file is too large to render.
See raw diff
|
|
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/ov_weights_type.md
ADDED
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
| | name | shape | type | sparsity |
|
2 |
+
|----:|:------------------------------------------------|:-------------|:------------------|------------:|
|
3 |
+
| 0 | self.transformer.wte.weight | [50432,4096] | <Type: 'float32'> | 0 |
|
4 |
+
| 1 | Constant_103534 | [1,1,4096] | <Type: 'float32'> | 0 |
|
5 |
+
| 2 | self.transformer.blocks.0.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
6 |
+
| 3 | 5 | [32,1,2048] | <Type: 'float32'> | 0.000488281 |
|
7 |
+
| 4 | self.transformer.blocks.0.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
8 |
+
| 5 | Constant_103536 | [1,1,4096] | <Type: 'float32'> | 0 |
|
9 |
+
| 6 | self.transformer.blocks.0.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
10 |
+
| 7 | self.transformer.blocks.0.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
11 |
+
| 8 | Constant_103537 | [1,1,4096] | <Type: 'float32'> | 0 |
|
12 |
+
| 9 | self.transformer.blocks.1.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
13 |
+
| 10 | self.transformer.blocks.1.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
14 |
+
| 11 | Constant_103539 | [1,1,4096] | <Type: 'float32'> | 0 |
|
15 |
+
| 12 | self.transformer.blocks.1.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
16 |
+
| 13 | self.transformer.blocks.1.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
17 |
+
| 14 | Constant_103540 | [1,1,4096] | <Type: 'float32'> | 0 |
|
18 |
+
| 15 | self.transformer.blocks.2.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
19 |
+
| 16 | self.transformer.blocks.2.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
20 |
+
| 17 | Constant_103542 | [1,1,4096] | <Type: 'float32'> | 0 |
|
21 |
+
| 18 | self.transformer.blocks.2.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
22 |
+
| 19 | self.transformer.blocks.2.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
23 |
+
| 20 | Constant_103543 | [1,1,4096] | <Type: 'float32'> | 0 |
|
24 |
+
| 21 | self.transformer.blocks.3.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
25 |
+
| 22 | self.transformer.blocks.3.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
26 |
+
| 23 | Constant_103545 | [1,1,4096] | <Type: 'float32'> | 0 |
|
27 |
+
| 24 | self.transformer.blocks.3.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
28 |
+
| 25 | self.transformer.blocks.3.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
29 |
+
| 26 | Constant_103546 | [1,1,4096] | <Type: 'float32'> | 0 |
|
30 |
+
| 27 | self.transformer.blocks.4.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
31 |
+
| 28 | self.transformer.blocks.4.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
32 |
+
| 29 | Constant_103548 | [1,1,4096] | <Type: 'float32'> | 0 |
|
33 |
+
| 30 | self.transformer.blocks.4.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
34 |
+
| 31 | self.transformer.blocks.4.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
35 |
+
| 32 | Constant_103549 | [1,1,4096] | <Type: 'float32'> | 0 |
|
36 |
+
| 33 | self.transformer.blocks.5.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
37 |
+
| 34 | self.transformer.blocks.5.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
38 |
+
| 35 | Constant_103551 | [1,1,4096] | <Type: 'float32'> | 0 |
|
39 |
+
| 36 | self.transformer.blocks.5.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
40 |
+
| 37 | self.transformer.blocks.5.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
41 |
+
| 38 | Constant_103552 | [1,1,4096] | <Type: 'float32'> | 0 |
|
42 |
+
| 39 | self.transformer.blocks.6.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
43 |
+
| 40 | self.transformer.blocks.6.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
44 |
+
| 41 | Constant_103554 | [1,1,4096] | <Type: 'float32'> | 0 |
|
45 |
+
| 42 | self.transformer.blocks.6.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
46 |
+
| 43 | self.transformer.blocks.6.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
47 |
+
| 44 | Constant_103555 | [1,1,4096] | <Type: 'float32'> | 0 |
|
48 |
+
| 45 | self.transformer.blocks.7.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
49 |
+
| 46 | self.transformer.blocks.7.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
50 |
+
| 47 | Constant_103557 | [1,1,4096] | <Type: 'float32'> | 0 |
|
51 |
+
| 48 | self.transformer.blocks.7.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
52 |
+
| 49 | self.transformer.blocks.7.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
53 |
+
| 50 | Constant_103558 | [1,1,4096] | <Type: 'float32'> | 0 |
|
54 |
+
| 51 | self.transformer.blocks.8.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
55 |
+
| 52 | self.transformer.blocks.8.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
56 |
+
| 53 | Constant_103560 | [1,1,4096] | <Type: 'float32'> | 0 |
|
57 |
+
| 54 | self.transformer.blocks.8.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
58 |
+
| 55 | self.transformer.blocks.8.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
59 |
+
| 56 | Constant_103561 | [1,1,4096] | <Type: 'float32'> | 0 |
|
60 |
+
| 57 | self.transformer.blocks.9.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
61 |
+
| 58 | self.transformer.blocks.9.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
62 |
+
| 59 | Constant_103563 | [1,1,4096] | <Type: 'float32'> | 0 |
|
63 |
+
| 60 | self.transformer.blocks.9.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
64 |
+
| 61 | self.transformer.blocks.9.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
65 |
+
| 62 | Constant_103564 | [1,1,4096] | <Type: 'float32'> | 0 |
|
66 |
+
| 63 | self.transformer.blocks.10.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
67 |
+
| 64 | self.transformer.blocks.10.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
68 |
+
| 65 | Constant_103566 | [1,1,4096] | <Type: 'float32'> | 0 |
|
69 |
+
| 66 | self.transformer.blocks.10.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
70 |
+
| 67 | self.transformer.blocks.10.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
71 |
+
| 68 | Constant_103567 | [1,1,4096] | <Type: 'float32'> | 0 |
|
72 |
+
| 69 | self.transformer.blocks.11.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
73 |
+
| 70 | self.transformer.blocks.11.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
74 |
+
| 71 | Constant_103569 | [1,1,4096] | <Type: 'float32'> | 0 |
|
75 |
+
| 72 | self.transformer.blocks.11.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
76 |
+
| 73 | self.transformer.blocks.11.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
77 |
+
| 74 | Constant_103570 | [1,1,4096] | <Type: 'float32'> | 0 |
|
78 |
+
| 75 | self.transformer.blocks.12.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
79 |
+
| 76 | self.transformer.blocks.12.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
80 |
+
| 77 | Constant_103572 | [1,1,4096] | <Type: 'float32'> | 0 |
|
81 |
+
| 78 | self.transformer.blocks.12.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
82 |
+
| 79 | self.transformer.blocks.12.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
83 |
+
| 80 | Constant_103573 | [1,1,4096] | <Type: 'float32'> | 0 |
|
84 |
+
| 81 | self.transformer.blocks.13.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
85 |
+
| 82 | self.transformer.blocks.13.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
86 |
+
| 83 | Constant_103575 | [1,1,4096] | <Type: 'float32'> | 0 |
|
87 |
+
| 84 | self.transformer.blocks.13.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
88 |
+
| 85 | self.transformer.blocks.13.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
89 |
+
| 86 | Constant_103576 | [1,1,4096] | <Type: 'float32'> | 0 |
|
90 |
+
| 87 | self.transformer.blocks.14.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
91 |
+
| 88 | self.transformer.blocks.14.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
92 |
+
| 89 | Constant_103578 | [1,1,4096] | <Type: 'float32'> | 0 |
|
93 |
+
| 90 | self.transformer.blocks.14.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
94 |
+
| 91 | self.transformer.blocks.14.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
95 |
+
| 92 | Constant_103579 | [1,1,4096] | <Type: 'float32'> | 0 |
|
96 |
+
| 93 | self.transformer.blocks.15.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
97 |
+
| 94 | self.transformer.blocks.15.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
98 |
+
| 95 | Constant_103581 | [1,1,4096] | <Type: 'float32'> | 0 |
|
99 |
+
| 96 | self.transformer.blocks.15.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
100 |
+
| 97 | self.transformer.blocks.15.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
101 |
+
| 98 | Constant_103582 | [1,1,4096] | <Type: 'float32'> | 0 |
|
102 |
+
| 99 | self.transformer.blocks.16.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
103 |
+
| 100 | self.transformer.blocks.16.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
104 |
+
| 101 | Constant_103584 | [1,1,4096] | <Type: 'float32'> | 0 |
|
105 |
+
| 102 | self.transformer.blocks.16.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
106 |
+
| 103 | self.transformer.blocks.16.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
107 |
+
| 104 | Constant_103585 | [1,1,4096] | <Type: 'float32'> | 0 |
|
108 |
+
| 105 | self.transformer.blocks.17.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
109 |
+
| 106 | self.transformer.blocks.17.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
110 |
+
| 107 | Constant_103587 | [1,1,4096] | <Type: 'float32'> | 0 |
|
111 |
+
| 108 | self.transformer.blocks.17.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
112 |
+
| 109 | self.transformer.blocks.17.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
113 |
+
| 110 | Constant_103588 | [1,1,4096] | <Type: 'float32'> | 0 |
|
114 |
+
| 111 | self.transformer.blocks.18.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
115 |
+
| 112 | self.transformer.blocks.18.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
116 |
+
| 113 | Constant_103590 | [1,1,4096] | <Type: 'float32'> | 0 |
|
117 |
+
| 114 | self.transformer.blocks.18.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
118 |
+
| 115 | self.transformer.blocks.18.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
119 |
+
| 116 | Constant_103591 | [1,1,4096] | <Type: 'float32'> | 0 |
|
120 |
+
| 117 | self.transformer.blocks.19.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
121 |
+
| 118 | self.transformer.blocks.19.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
122 |
+
| 119 | Constant_103593 | [1,1,4096] | <Type: 'float32'> | 0 |
|
123 |
+
| 120 | self.transformer.blocks.19.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
124 |
+
| 121 | self.transformer.blocks.19.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
125 |
+
| 122 | Constant_103594 | [1,1,4096] | <Type: 'float32'> | 0 |
|
126 |
+
| 123 | self.transformer.blocks.20.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
127 |
+
| 124 | self.transformer.blocks.20.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
128 |
+
| 125 | Constant_103596 | [1,1,4096] | <Type: 'float32'> | 0 |
|
129 |
+
| 126 | self.transformer.blocks.20.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
130 |
+
| 127 | self.transformer.blocks.20.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
131 |
+
| 128 | Constant_103597 | [1,1,4096] | <Type: 'float32'> | 0 |
|
132 |
+
| 129 | self.transformer.blocks.21.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
133 |
+
| 130 | self.transformer.blocks.21.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
134 |
+
| 131 | Constant_103599 | [1,1,4096] | <Type: 'float32'> | 0 |
|
135 |
+
| 132 | self.transformer.blocks.21.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
136 |
+
| 133 | self.transformer.blocks.21.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
137 |
+
| 134 | Constant_103600 | [1,1,4096] | <Type: 'float32'> | 0 |
|
138 |
+
| 135 | self.transformer.blocks.22.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
139 |
+
| 136 | self.transformer.blocks.22.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
140 |
+
| 137 | Constant_103602 | [1,1,4096] | <Type: 'float32'> | 0 |
|
141 |
+
| 138 | self.transformer.blocks.22.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
142 |
+
| 139 | self.transformer.blocks.22.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
143 |
+
| 140 | Constant_103603 | [1,1,4096] | <Type: 'float32'> | 0 |
|
144 |
+
| 141 | self.transformer.blocks.23.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
145 |
+
| 142 | self.transformer.blocks.23.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
146 |
+
| 143 | Constant_103605 | [1,1,4096] | <Type: 'float32'> | 0 |
|
147 |
+
| 144 | self.transformer.blocks.23.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
148 |
+
| 145 | self.transformer.blocks.23.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
149 |
+
| 146 | Constant_103606 | [1,1,4096] | <Type: 'float32'> | 0 |
|
150 |
+
| 147 | self.transformer.blocks.24.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
151 |
+
| 148 | self.transformer.blocks.24.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
152 |
+
| 149 | Constant_103608 | [1,1,4096] | <Type: 'float32'> | 0 |
|
153 |
+
| 150 | self.transformer.blocks.24.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
154 |
+
| 151 | self.transformer.blocks.24.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
155 |
+
| 152 | Constant_103609 | [1,1,4096] | <Type: 'float32'> | 0 |
|
156 |
+
| 153 | self.transformer.blocks.25.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
157 |
+
| 154 | self.transformer.blocks.25.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
158 |
+
| 155 | Constant_103611 | [1,1,4096] | <Type: 'float32'> | 0 |
|
159 |
+
| 156 | self.transformer.blocks.25.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
160 |
+
| 157 | self.transformer.blocks.25.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
161 |
+
| 158 | Constant_103612 | [1,1,4096] | <Type: 'float32'> | 0 |
|
162 |
+
| 159 | self.transformer.blocks.26.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
163 |
+
| 160 | self.transformer.blocks.26.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
164 |
+
| 161 | Constant_103614 | [1,1,4096] | <Type: 'float32'> | 0 |
|
165 |
+
| 162 | self.transformer.blocks.26.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
166 |
+
| 163 | self.transformer.blocks.26.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
167 |
+
| 164 | Constant_103615 | [1,1,4096] | <Type: 'float32'> | 0 |
|
168 |
+
| 165 | self.transformer.blocks.27.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
169 |
+
| 166 | self.transformer.blocks.27.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
170 |
+
| 167 | Constant_103617 | [1,1,4096] | <Type: 'float32'> | 0 |
|
171 |
+
| 168 | self.transformer.blocks.27.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
172 |
+
| 169 | self.transformer.blocks.27.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
173 |
+
| 170 | Constant_103618 | [1,1,4096] | <Type: 'float32'> | 0 |
|
174 |
+
| 171 | self.transformer.blocks.28.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
175 |
+
| 172 | self.transformer.blocks.28.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
176 |
+
| 173 | Constant_103620 | [1,1,4096] | <Type: 'float32'> | 0 |
|
177 |
+
| 174 | self.transformer.blocks.28.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
178 |
+
| 175 | self.transformer.blocks.28.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
179 |
+
| 176 | Constant_103621 | [1,1,4096] | <Type: 'float32'> | 0 |
|
180 |
+
| 177 | self.transformer.blocks.29.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
181 |
+
| 178 | self.transformer.blocks.29.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
182 |
+
| 179 | Constant_103623 | [1,1,4096] | <Type: 'float32'> | 0 |
|
183 |
+
| 180 | self.transformer.blocks.29.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
184 |
+
| 181 | self.transformer.blocks.29.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
185 |
+
| 182 | Constant_103624 | [1,1,4096] | <Type: 'float32'> | 0 |
|
186 |
+
| 183 | self.transformer.blocks.30.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
187 |
+
| 184 | self.transformer.blocks.30.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
188 |
+
| 185 | Constant_103626 | [1,1,4096] | <Type: 'float32'> | 0 |
|
189 |
+
| 186 | self.transformer.blocks.30.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
190 |
+
| 187 | self.transformer.blocks.30.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
191 |
+
| 188 | Constant_103627 | [1,1,4096] | <Type: 'float32'> | 0 |
|
192 |
+
| 189 | self.transformer.blocks.31.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
|
193 |
+
| 190 | self.transformer.blocks.31.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
|
194 |
+
| 191 | Constant_103629 | [1,1,4096] | <Type: 'float32'> | 0 |
|
195 |
+
| 192 | self.transformer.blocks.31.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
|
196 |
+
| 193 | self.transformer.blocks.31.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
|
197 |
+
| 194 | Constant_103630 | [1,1,4096] | <Type: 'float32'> | 0 |
|
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/special_tokens_map.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<|endoftext|>",
|
3 |
+
"eos_token": "<|endoftext|>",
|
4 |
+
"pad_token": "<|endoftext|>",
|
5 |
+
"unk_token": "<|endoftext|>"
|
6 |
+
}
|
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/tokenizer_config.json
ADDED
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"0": {
|
5 |
+
"content": "<|endoftext|>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": false,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"1": {
|
13 |
+
"content": "<|padding|>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": false,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"50254": {
|
21 |
+
"content": " ",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": true,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": false
|
27 |
+
},
|
28 |
+
"50255": {
|
29 |
+
"content": " ",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": true,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": false
|
35 |
+
},
|
36 |
+
"50256": {
|
37 |
+
"content": " ",
|
38 |
+
"lstrip": false,
|
39 |
+
"normalized": true,
|
40 |
+
"rstrip": false,
|
41 |
+
"single_word": false,
|
42 |
+
"special": false
|
43 |
+
},
|
44 |
+
"50257": {
|
45 |
+
"content": " ",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": true,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false,
|
50 |
+
"special": false
|
51 |
+
},
|
52 |
+
"50258": {
|
53 |
+
"content": " ",
|
54 |
+
"lstrip": false,
|
55 |
+
"normalized": true,
|
56 |
+
"rstrip": false,
|
57 |
+
"single_word": false,
|
58 |
+
"special": false
|
59 |
+
},
|
60 |
+
"50259": {
|
61 |
+
"content": " ",
|
62 |
+
"lstrip": false,
|
63 |
+
"normalized": true,
|
64 |
+
"rstrip": false,
|
65 |
+
"single_word": false,
|
66 |
+
"special": false
|
67 |
+
},
|
68 |
+
"50260": {
|
69 |
+
"content": " ",
|
70 |
+
"lstrip": false,
|
71 |
+
"normalized": true,
|
72 |
+
"rstrip": false,
|
73 |
+
"single_word": false,
|
74 |
+
"special": false
|
75 |
+
},
|
76 |
+
"50261": {
|
77 |
+
"content": " ",
|
78 |
+
"lstrip": false,
|
79 |
+
"normalized": true,
|
80 |
+
"rstrip": false,
|
81 |
+
"single_word": false,
|
82 |
+
"special": false
|
83 |
+
},
|
84 |
+
"50262": {
|
85 |
+
"content": " ",
|
86 |
+
"lstrip": false,
|
87 |
+
"normalized": true,
|
88 |
+
"rstrip": false,
|
89 |
+
"single_word": false,
|
90 |
+
"special": false
|
91 |
+
},
|
92 |
+
"50263": {
|
93 |
+
"content": " ",
|
94 |
+
"lstrip": false,
|
95 |
+
"normalized": true,
|
96 |
+
"rstrip": false,
|
97 |
+
"single_word": false,
|
98 |
+
"special": false
|
99 |
+
},
|
100 |
+
"50264": {
|
101 |
+
"content": " ",
|
102 |
+
"lstrip": false,
|
103 |
+
"normalized": true,
|
104 |
+
"rstrip": false,
|
105 |
+
"single_word": false,
|
106 |
+
"special": false
|
107 |
+
},
|
108 |
+
"50265": {
|
109 |
+
"content": " ",
|
110 |
+
"lstrip": false,
|
111 |
+
"normalized": true,
|
112 |
+
"rstrip": false,
|
113 |
+
"single_word": false,
|
114 |
+
"special": false
|
115 |
+
},
|
116 |
+
"50266": {
|
117 |
+
"content": " ",
|
118 |
+
"lstrip": false,
|
119 |
+
"normalized": true,
|
120 |
+
"rstrip": false,
|
121 |
+
"single_word": false,
|
122 |
+
"special": false
|
123 |
+
},
|
124 |
+
"50267": {
|
125 |
+
"content": " ",
|
126 |
+
"lstrip": false,
|
127 |
+
"normalized": true,
|
128 |
+
"rstrip": false,
|
129 |
+
"single_word": false,
|
130 |
+
"special": false
|
131 |
+
},
|
132 |
+
"50268": {
|
133 |
+
"content": " ",
|
134 |
+
"lstrip": false,
|
135 |
+
"normalized": true,
|
136 |
+
"rstrip": false,
|
137 |
+
"single_word": false,
|
138 |
+
"special": false
|
139 |
+
},
|
140 |
+
"50269": {
|
141 |
+
"content": " ",
|
142 |
+
"lstrip": false,
|
143 |
+
"normalized": true,
|
144 |
+
"rstrip": false,
|
145 |
+
"single_word": false,
|
146 |
+
"special": false
|
147 |
+
},
|
148 |
+
"50270": {
|
149 |
+
"content": " ",
|
150 |
+
"lstrip": false,
|
151 |
+
"normalized": true,
|
152 |
+
"rstrip": false,
|
153 |
+
"single_word": false,
|
154 |
+
"special": false
|
155 |
+
},
|
156 |
+
"50271": {
|
157 |
+
"content": " ",
|
158 |
+
"lstrip": false,
|
159 |
+
"normalized": true,
|
160 |
+
"rstrip": false,
|
161 |
+
"single_word": false,
|
162 |
+
"special": false
|
163 |
+
},
|
164 |
+
"50272": {
|
165 |
+
"content": " ",
|
166 |
+
"lstrip": false,
|
167 |
+
"normalized": true,
|
168 |
+
"rstrip": false,
|
169 |
+
"single_word": false,
|
170 |
+
"special": false
|
171 |
+
},
|
172 |
+
"50273": {
|
173 |
+
"content": " ",
|
174 |
+
"lstrip": false,
|
175 |
+
"normalized": true,
|
176 |
+
"rstrip": false,
|
177 |
+
"single_word": false,
|
178 |
+
"special": false
|
179 |
+
},
|
180 |
+
"50274": {
|
181 |
+
"content": " ",
|
182 |
+
"lstrip": false,
|
183 |
+
"normalized": true,
|
184 |
+
"rstrip": false,
|
185 |
+
"single_word": false,
|
186 |
+
"special": false
|
187 |
+
},
|
188 |
+
"50275": {
|
189 |
+
"content": " ",
|
190 |
+
"lstrip": false,
|
191 |
+
"normalized": true,
|
192 |
+
"rstrip": false,
|
193 |
+
"single_word": false,
|
194 |
+
"special": false
|
195 |
+
},
|
196 |
+
"50276": {
|
197 |
+
"content": " ",
|
198 |
+
"lstrip": false,
|
199 |
+
"normalized": true,
|
200 |
+
"rstrip": false,
|
201 |
+
"single_word": false,
|
202 |
+
"special": false
|
203 |
+
}
|
204 |
+
},
|
205 |
+
"bos_token": "<|endoftext|>",
|
206 |
+
"clean_up_tokenization_spaces": true,
|
207 |
+
"eos_token": "<|endoftext|>",
|
208 |
+
"model_max_length": 512,
|
209 |
+
"pad_token": "<|endoftext|>",
|
210 |
+
"tokenizer_class": "GPTNeoXTokenizer",
|
211 |
+
"unk_token": "<|endoftext|>"
|
212 |
+
}
|