yujiepan commited on
Commit
f6a2c63
1 Parent(s): 7fdd9f8

Upload folder using huggingface_hub

Browse files
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/args.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "neuralmagic/mpt-7b-gsm8k-pt",
3
+ "run_name": "fp32",
4
+ "quant_mode": "original",
5
+ "force_run": true
6
+ }
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "neuralmagic/mpt-7b-gsm8k-pt",
3
+ "architectures": [
4
+ "MPTForCausalLM"
5
+ ],
6
+ "attn_config": {
7
+ "model_type": ""
8
+ },
9
+ "auto_map": {
10
+ "AutoConfig": "neuralmagic/mpt-7b-gsm8k-pt--configuration_mpt.MPTConfig",
11
+ "AutoModelForCausalLM": "neuralmagic/mpt-7b-gsm8k-pt--modeling_mpt.MPTForCausalLM"
12
+ },
13
+ "d_model": 4096,
14
+ "emb_pdrop": 0,
15
+ "embedding_fraction": 1.0,
16
+ "expansion_ratio": 4,
17
+ "init_config": {
18
+ "emb_init_std": null,
19
+ "emb_init_uniform_lim": null,
20
+ "fan_mode": "fan_in",
21
+ "init_div_is_residual": true,
22
+ "init_gain": 0,
23
+ "init_nonlinearity": "relu",
24
+ "init_std": 0.02,
25
+ "name": "kaiming_normal_",
26
+ "verbose": 0
27
+ },
28
+ "init_device": "cpu",
29
+ "initializer_range": 0.02,
30
+ "is_decoder": true,
31
+ "layer_norm_epsilon": 1e-05,
32
+ "learned_pos_emb": true,
33
+ "logit_scale": null,
34
+ "max_seq_len": 2048,
35
+ "model_type": "mpt",
36
+ "n_heads": 32,
37
+ "n_layers": 32,
38
+ "no_bias": true,
39
+ "norm_type": "low_precision_layernorm",
40
+ "resid_pdrop": 0,
41
+ "tie_weights": false,
42
+ "tie_word_embeddings": false,
43
+ "tokenizer_name": "EleutherAI/gpt-neox-20b",
44
+ "torch_dtype": "bfloat16",
45
+ "transformers_version": "4.34.1",
46
+ "use_cache": true,
47
+ "verbose": 0,
48
+ "vocab_size": 50432
49
+ }
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/openvino_model.bin.part-aa ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd953e68651fa7c5998daa8855d9253344e3de2c4b2ce696b7bf5b2411d783cd
3
+ size 9663676416
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/openvino_model.bin.part-ab ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36cdaba54ad1c43273fa4d5505b31bd1ad198a208ac4275b77fd412ca6d49aff
3
+ size 9663676416
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/openvino_model.bin.part-ac ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f872652fa4415118fbea7b0617fb1a2f08b211ecc3344d2c70dbe6f2438ab485
3
+ size 7270056209
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/openvino_model.xml ADDED
The diff for this file is too large to render. See raw diff
 
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/ov_weights_type.md ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ | | name | shape | type | sparsity |
2
+ |----:|:------------------------------------------------|:-------------|:------------------|------------:|
3
+ | 0 | self.transformer.wte.weight | [50432,4096] | <Type: 'float32'> | 0 |
4
+ | 1 | Constant_103534 | [1,1,4096] | <Type: 'float32'> | 0 |
5
+ | 2 | self.transformer.blocks.0.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
6
+ | 3 | 5 | [32,1,2048] | <Type: 'float32'> | 0.000488281 |
7
+ | 4 | self.transformer.blocks.0.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
8
+ | 5 | Constant_103536 | [1,1,4096] | <Type: 'float32'> | 0 |
9
+ | 6 | self.transformer.blocks.0.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
10
+ | 7 | self.transformer.blocks.0.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
11
+ | 8 | Constant_103537 | [1,1,4096] | <Type: 'float32'> | 0 |
12
+ | 9 | self.transformer.blocks.1.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
13
+ | 10 | self.transformer.blocks.1.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
14
+ | 11 | Constant_103539 | [1,1,4096] | <Type: 'float32'> | 0 |
15
+ | 12 | self.transformer.blocks.1.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
16
+ | 13 | self.transformer.blocks.1.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
17
+ | 14 | Constant_103540 | [1,1,4096] | <Type: 'float32'> | 0 |
18
+ | 15 | self.transformer.blocks.2.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
19
+ | 16 | self.transformer.blocks.2.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
20
+ | 17 | Constant_103542 | [1,1,4096] | <Type: 'float32'> | 0 |
21
+ | 18 | self.transformer.blocks.2.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
22
+ | 19 | self.transformer.blocks.2.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
23
+ | 20 | Constant_103543 | [1,1,4096] | <Type: 'float32'> | 0 |
24
+ | 21 | self.transformer.blocks.3.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
25
+ | 22 | self.transformer.blocks.3.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
26
+ | 23 | Constant_103545 | [1,1,4096] | <Type: 'float32'> | 0 |
27
+ | 24 | self.transformer.blocks.3.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
28
+ | 25 | self.transformer.blocks.3.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
29
+ | 26 | Constant_103546 | [1,1,4096] | <Type: 'float32'> | 0 |
30
+ | 27 | self.transformer.blocks.4.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
31
+ | 28 | self.transformer.blocks.4.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
32
+ | 29 | Constant_103548 | [1,1,4096] | <Type: 'float32'> | 0 |
33
+ | 30 | self.transformer.blocks.4.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
34
+ | 31 | self.transformer.blocks.4.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
35
+ | 32 | Constant_103549 | [1,1,4096] | <Type: 'float32'> | 0 |
36
+ | 33 | self.transformer.blocks.5.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
37
+ | 34 | self.transformer.blocks.5.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
38
+ | 35 | Constant_103551 | [1,1,4096] | <Type: 'float32'> | 0 |
39
+ | 36 | self.transformer.blocks.5.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
40
+ | 37 | self.transformer.blocks.5.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
41
+ | 38 | Constant_103552 | [1,1,4096] | <Type: 'float32'> | 0 |
42
+ | 39 | self.transformer.blocks.6.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
43
+ | 40 | self.transformer.blocks.6.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
44
+ | 41 | Constant_103554 | [1,1,4096] | <Type: 'float32'> | 0 |
45
+ | 42 | self.transformer.blocks.6.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
46
+ | 43 | self.transformer.blocks.6.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
47
+ | 44 | Constant_103555 | [1,1,4096] | <Type: 'float32'> | 0 |
48
+ | 45 | self.transformer.blocks.7.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
49
+ | 46 | self.transformer.blocks.7.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
50
+ | 47 | Constant_103557 | [1,1,4096] | <Type: 'float32'> | 0 |
51
+ | 48 | self.transformer.blocks.7.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
52
+ | 49 | self.transformer.blocks.7.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
53
+ | 50 | Constant_103558 | [1,1,4096] | <Type: 'float32'> | 0 |
54
+ | 51 | self.transformer.blocks.8.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
55
+ | 52 | self.transformer.blocks.8.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
56
+ | 53 | Constant_103560 | [1,1,4096] | <Type: 'float32'> | 0 |
57
+ | 54 | self.transformer.blocks.8.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
58
+ | 55 | self.transformer.blocks.8.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
59
+ | 56 | Constant_103561 | [1,1,4096] | <Type: 'float32'> | 0 |
60
+ | 57 | self.transformer.blocks.9.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
61
+ | 58 | self.transformer.blocks.9.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
62
+ | 59 | Constant_103563 | [1,1,4096] | <Type: 'float32'> | 0 |
63
+ | 60 | self.transformer.blocks.9.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
64
+ | 61 | self.transformer.blocks.9.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
65
+ | 62 | Constant_103564 | [1,1,4096] | <Type: 'float32'> | 0 |
66
+ | 63 | self.transformer.blocks.10.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
67
+ | 64 | self.transformer.blocks.10.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
68
+ | 65 | Constant_103566 | [1,1,4096] | <Type: 'float32'> | 0 |
69
+ | 66 | self.transformer.blocks.10.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
70
+ | 67 | self.transformer.blocks.10.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
71
+ | 68 | Constant_103567 | [1,1,4096] | <Type: 'float32'> | 0 |
72
+ | 69 | self.transformer.blocks.11.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
73
+ | 70 | self.transformer.blocks.11.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
74
+ | 71 | Constant_103569 | [1,1,4096] | <Type: 'float32'> | 0 |
75
+ | 72 | self.transformer.blocks.11.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
76
+ | 73 | self.transformer.blocks.11.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
77
+ | 74 | Constant_103570 | [1,1,4096] | <Type: 'float32'> | 0 |
78
+ | 75 | self.transformer.blocks.12.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
79
+ | 76 | self.transformer.blocks.12.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
80
+ | 77 | Constant_103572 | [1,1,4096] | <Type: 'float32'> | 0 |
81
+ | 78 | self.transformer.blocks.12.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
82
+ | 79 | self.transformer.blocks.12.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
83
+ | 80 | Constant_103573 | [1,1,4096] | <Type: 'float32'> | 0 |
84
+ | 81 | self.transformer.blocks.13.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
85
+ | 82 | self.transformer.blocks.13.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
86
+ | 83 | Constant_103575 | [1,1,4096] | <Type: 'float32'> | 0 |
87
+ | 84 | self.transformer.blocks.13.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
88
+ | 85 | self.transformer.blocks.13.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
89
+ | 86 | Constant_103576 | [1,1,4096] | <Type: 'float32'> | 0 |
90
+ | 87 | self.transformer.blocks.14.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
91
+ | 88 | self.transformer.blocks.14.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
92
+ | 89 | Constant_103578 | [1,1,4096] | <Type: 'float32'> | 0 |
93
+ | 90 | self.transformer.blocks.14.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
94
+ | 91 | self.transformer.blocks.14.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
95
+ | 92 | Constant_103579 | [1,1,4096] | <Type: 'float32'> | 0 |
96
+ | 93 | self.transformer.blocks.15.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
97
+ | 94 | self.transformer.blocks.15.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
98
+ | 95 | Constant_103581 | [1,1,4096] | <Type: 'float32'> | 0 |
99
+ | 96 | self.transformer.blocks.15.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
100
+ | 97 | self.transformer.blocks.15.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
101
+ | 98 | Constant_103582 | [1,1,4096] | <Type: 'float32'> | 0 |
102
+ | 99 | self.transformer.blocks.16.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
103
+ | 100 | self.transformer.blocks.16.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
104
+ | 101 | Constant_103584 | [1,1,4096] | <Type: 'float32'> | 0 |
105
+ | 102 | self.transformer.blocks.16.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
106
+ | 103 | self.transformer.blocks.16.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
107
+ | 104 | Constant_103585 | [1,1,4096] | <Type: 'float32'> | 0 |
108
+ | 105 | self.transformer.blocks.17.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
109
+ | 106 | self.transformer.blocks.17.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
110
+ | 107 | Constant_103587 | [1,1,4096] | <Type: 'float32'> | 0 |
111
+ | 108 | self.transformer.blocks.17.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
112
+ | 109 | self.transformer.blocks.17.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
113
+ | 110 | Constant_103588 | [1,1,4096] | <Type: 'float32'> | 0 |
114
+ | 111 | self.transformer.blocks.18.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
115
+ | 112 | self.transformer.blocks.18.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
116
+ | 113 | Constant_103590 | [1,1,4096] | <Type: 'float32'> | 0 |
117
+ | 114 | self.transformer.blocks.18.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
118
+ | 115 | self.transformer.blocks.18.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
119
+ | 116 | Constant_103591 | [1,1,4096] | <Type: 'float32'> | 0 |
120
+ | 117 | self.transformer.blocks.19.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
121
+ | 118 | self.transformer.blocks.19.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
122
+ | 119 | Constant_103593 | [1,1,4096] | <Type: 'float32'> | 0 |
123
+ | 120 | self.transformer.blocks.19.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
124
+ | 121 | self.transformer.blocks.19.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
125
+ | 122 | Constant_103594 | [1,1,4096] | <Type: 'float32'> | 0 |
126
+ | 123 | self.transformer.blocks.20.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
127
+ | 124 | self.transformer.blocks.20.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
128
+ | 125 | Constant_103596 | [1,1,4096] | <Type: 'float32'> | 0 |
129
+ | 126 | self.transformer.blocks.20.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
130
+ | 127 | self.transformer.blocks.20.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
131
+ | 128 | Constant_103597 | [1,1,4096] | <Type: 'float32'> | 0 |
132
+ | 129 | self.transformer.blocks.21.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
133
+ | 130 | self.transformer.blocks.21.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
134
+ | 131 | Constant_103599 | [1,1,4096] | <Type: 'float32'> | 0 |
135
+ | 132 | self.transformer.blocks.21.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
136
+ | 133 | self.transformer.blocks.21.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
137
+ | 134 | Constant_103600 | [1,1,4096] | <Type: 'float32'> | 0 |
138
+ | 135 | self.transformer.blocks.22.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
139
+ | 136 | self.transformer.blocks.22.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
140
+ | 137 | Constant_103602 | [1,1,4096] | <Type: 'float32'> | 0 |
141
+ | 138 | self.transformer.blocks.22.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
142
+ | 139 | self.transformer.blocks.22.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
143
+ | 140 | Constant_103603 | [1,1,4096] | <Type: 'float32'> | 0 |
144
+ | 141 | self.transformer.blocks.23.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
145
+ | 142 | self.transformer.blocks.23.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
146
+ | 143 | Constant_103605 | [1,1,4096] | <Type: 'float32'> | 0 |
147
+ | 144 | self.transformer.blocks.23.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
148
+ | 145 | self.transformer.blocks.23.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
149
+ | 146 | Constant_103606 | [1,1,4096] | <Type: 'float32'> | 0 |
150
+ | 147 | self.transformer.blocks.24.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
151
+ | 148 | self.transformer.blocks.24.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
152
+ | 149 | Constant_103608 | [1,1,4096] | <Type: 'float32'> | 0 |
153
+ | 150 | self.transformer.blocks.24.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
154
+ | 151 | self.transformer.blocks.24.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
155
+ | 152 | Constant_103609 | [1,1,4096] | <Type: 'float32'> | 0 |
156
+ | 153 | self.transformer.blocks.25.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
157
+ | 154 | self.transformer.blocks.25.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
158
+ | 155 | Constant_103611 | [1,1,4096] | <Type: 'float32'> | 0 |
159
+ | 156 | self.transformer.blocks.25.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
160
+ | 157 | self.transformer.blocks.25.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
161
+ | 158 | Constant_103612 | [1,1,4096] | <Type: 'float32'> | 0 |
162
+ | 159 | self.transformer.blocks.26.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
163
+ | 160 | self.transformer.blocks.26.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
164
+ | 161 | Constant_103614 | [1,1,4096] | <Type: 'float32'> | 0 |
165
+ | 162 | self.transformer.blocks.26.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
166
+ | 163 | self.transformer.blocks.26.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
167
+ | 164 | Constant_103615 | [1,1,4096] | <Type: 'float32'> | 0 |
168
+ | 165 | self.transformer.blocks.27.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
169
+ | 166 | self.transformer.blocks.27.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
170
+ | 167 | Constant_103617 | [1,1,4096] | <Type: 'float32'> | 0 |
171
+ | 168 | self.transformer.blocks.27.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
172
+ | 169 | self.transformer.blocks.27.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
173
+ | 170 | Constant_103618 | [1,1,4096] | <Type: 'float32'> | 0 |
174
+ | 171 | self.transformer.blocks.28.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
175
+ | 172 | self.transformer.blocks.28.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
176
+ | 173 | Constant_103620 | [1,1,4096] | <Type: 'float32'> | 0 |
177
+ | 174 | self.transformer.blocks.28.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
178
+ | 175 | self.transformer.blocks.28.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
179
+ | 176 | Constant_103621 | [1,1,4096] | <Type: 'float32'> | 0 |
180
+ | 177 | self.transformer.blocks.29.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
181
+ | 178 | self.transformer.blocks.29.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
182
+ | 179 | Constant_103623 | [1,1,4096] | <Type: 'float32'> | 0 |
183
+ | 180 | self.transformer.blocks.29.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
184
+ | 181 | self.transformer.blocks.29.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
185
+ | 182 | Constant_103624 | [1,1,4096] | <Type: 'float32'> | 0 |
186
+ | 183 | self.transformer.blocks.30.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
187
+ | 184 | self.transformer.blocks.30.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
188
+ | 185 | Constant_103626 | [1,1,4096] | <Type: 'float32'> | 0 |
189
+ | 186 | self.transformer.blocks.30.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
190
+ | 187 | self.transformer.blocks.30.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
191
+ | 188 | Constant_103627 | [1,1,4096] | <Type: 'float32'> | 0 |
192
+ | 189 | self.transformer.blocks.31.attn.Wqkv.weight | [12288,4096] | <Type: 'float32'> | 0 |
193
+ | 190 | self.transformer.blocks.31.attn.out_proj.weight | [4096,4096] | <Type: 'float32'> | 0 |
194
+ | 191 | Constant_103629 | [1,1,4096] | <Type: 'float32'> | 0 |
195
+ | 192 | self.transformer.blocks.31.ffn.up_proj.weight | [16384,4096] | <Type: 'float32'> | 0 |
196
+ | 193 | self.transformer.blocks.31.ffn.down_proj.weight | [4096,16384] | <Type: 'float32'> | 0 |
197
+ | 194 | Constant_103630 | [1,1,4096] | <Type: 'float32'> | 0 |
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/neuralmagic/mpt-7b-gsm8k-pt/fp32/tokenizer_config.json ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<|padding|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "50254": {
21
+ "content": " ",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": false
27
+ },
28
+ "50255": {
29
+ "content": " ",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": false
35
+ },
36
+ "50256": {
37
+ "content": " ",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": false
43
+ },
44
+ "50257": {
45
+ "content": " ",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "50258": {
53
+ "content": " ",
54
+ "lstrip": false,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": false
59
+ },
60
+ "50259": {
61
+ "content": " ",
62
+ "lstrip": false,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": false
67
+ },
68
+ "50260": {
69
+ "content": " ",
70
+ "lstrip": false,
71
+ "normalized": true,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": false
75
+ },
76
+ "50261": {
77
+ "content": " ",
78
+ "lstrip": false,
79
+ "normalized": true,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": false
83
+ },
84
+ "50262": {
85
+ "content": " ",
86
+ "lstrip": false,
87
+ "normalized": true,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": false
91
+ },
92
+ "50263": {
93
+ "content": " ",
94
+ "lstrip": false,
95
+ "normalized": true,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": false
99
+ },
100
+ "50264": {
101
+ "content": " ",
102
+ "lstrip": false,
103
+ "normalized": true,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": false
107
+ },
108
+ "50265": {
109
+ "content": " ",
110
+ "lstrip": false,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": false
115
+ },
116
+ "50266": {
117
+ "content": " ",
118
+ "lstrip": false,
119
+ "normalized": true,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "50267": {
125
+ "content": " ",
126
+ "lstrip": false,
127
+ "normalized": true,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "50268": {
133
+ "content": " ",
134
+ "lstrip": false,
135
+ "normalized": true,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "50269": {
141
+ "content": " ",
142
+ "lstrip": false,
143
+ "normalized": true,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "50270": {
149
+ "content": " ",
150
+ "lstrip": false,
151
+ "normalized": true,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "50271": {
157
+ "content": " ",
158
+ "lstrip": false,
159
+ "normalized": true,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "50272": {
165
+ "content": " ",
166
+ "lstrip": false,
167
+ "normalized": true,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "50273": {
173
+ "content": " ",
174
+ "lstrip": false,
175
+ "normalized": true,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ },
180
+ "50274": {
181
+ "content": " ",
182
+ "lstrip": false,
183
+ "normalized": true,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": false
187
+ },
188
+ "50275": {
189
+ "content": " ",
190
+ "lstrip": false,
191
+ "normalized": true,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": false
195
+ },
196
+ "50276": {
197
+ "content": " ",
198
+ "lstrip": false,
199
+ "normalized": true,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": false
203
+ }
204
+ },
205
+ "bos_token": "<|endoftext|>",
206
+ "clean_up_tokenization_spaces": true,
207
+ "eos_token": "<|endoftext|>",
208
+ "model_max_length": 512,
209
+ "pad_token": "<|endoftext|>",
210
+ "tokenizer_class": "GPTNeoXTokenizer",
211
+ "unk_token": "<|endoftext|>"
212
+ }