Xenova HF staff commited on
Commit
6103c21
1 Parent(s): eb56139

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "efederici/ipt-350m",
3
+ "architectures": [
4
+ "MPTForCausalLM"
5
+ ],
6
+ "attn_config": {
7
+ "model_type": ""
8
+ },
9
+ "auto_map": {
10
+ "AutoConfig": "efederici/ipt-350m--configuration_mpt.MPTConfig",
11
+ "AutoModelForCausalLM": "efederici/ipt-350m--modeling_mpt.MPTForCausalLM"
12
+ },
13
+ "d_model": 1024,
14
+ "emb_pdrop": 0.0,
15
+ "embedding_fraction": 1.0,
16
+ "expansion_ratio": 4,
17
+ "init_config": {
18
+ "emb_init_std": null,
19
+ "emb_init_uniform_lim": null,
20
+ "fan_mode": "fan_in",
21
+ "init_div_is_residual": true,
22
+ "init_gain": 0.0,
23
+ "init_nonlinearity": "relu",
24
+ "init_std": null,
25
+ "name": "kaiming_normal_",
26
+ "verbose": 0
27
+ },
28
+ "init_device": "cpu",
29
+ "initializer_range": 0.02,
30
+ "layer_norm_epsilon": 1e-05,
31
+ "learned_pos_emb": true,
32
+ "logit_scale": null,
33
+ "max_seq_len": 2048,
34
+ "model_type": "mpt",
35
+ "n_heads": 16,
36
+ "n_layers": 24,
37
+ "no_bias": true,
38
+ "norm_type": "low_precision_layernorm",
39
+ "resid_pdrop": 0.0,
40
+ "transformers_version": "4.32.1",
41
+ "use_cache": false,
42
+ "verbose": 0,
43
+ "vocab_size": 50432
44
+ }
generation_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.32.1",
4
+ "use_cache": false
5
+ }
onnx/decoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e434ce50a6b81fd4b234c5d4b1d29bdd0c48e1dca27c435d3f76b5d0f6f030f
3
+ size 1621899428
onnx/decoder_model_merged.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3d5e8f131d5aac51c90315ce723147eb7b7d875fb7d7c7fdd7eb98123ce13bb
3
+ size 1622503619
onnx/decoder_model_merged_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:620cfd14fb3e3618226e8468406c55a17b2a11ee417e47a18855d4fb264589f7
3
+ size 408595992
onnx/decoder_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7a2ecfd618b43a6a816381bcb400696104217c1f541b8db7f891504337cd84c
3
+ size 407714462
onnx/decoder_with_past_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54d7dc2514a0deceb5f70fd54c8e6f4a2a8bc5e78030f3f501fdc72985a8fd00
3
+ size 1621900820
onnx/decoder_with_past_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c636d0f47f3a5fc2b99b2a8c613bcafc7fa4fb2790a87b198ea94daaf4ebe9da
3
+ size 407715249
quantize_config.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_channel": true,
3
+ "reduce_range": true,
4
+ "per_model_config": {
5
+ "decoder_model_merged": {
6
+ "op_types": [
7
+ "Range",
8
+ "If",
9
+ "Concat",
10
+ "Equal",
11
+ "ReduceMean",
12
+ "Sub",
13
+ "Cast",
14
+ "Mul",
15
+ "Reshape",
16
+ "ConstantOfShape",
17
+ "MatMul",
18
+ "Erf",
19
+ "Gather",
20
+ "Unsqueeze",
21
+ "Expand",
22
+ "Not",
23
+ "Less",
24
+ "Constant",
25
+ "Slice",
26
+ "ScatterND",
27
+ "Div",
28
+ "Shape",
29
+ "Add",
30
+ "Pow",
31
+ "Softmax",
32
+ "Sqrt",
33
+ "Where",
34
+ "Or",
35
+ "Transpose"
36
+ ],
37
+ "weight_type": "QInt8"
38
+ },
39
+ "decoder_model": {
40
+ "op_types": [
41
+ "Range",
42
+ "Concat",
43
+ "Equal",
44
+ "ReduceMean",
45
+ "Sub",
46
+ "Cast",
47
+ "Mul",
48
+ "Reshape",
49
+ "ConstantOfShape",
50
+ "MatMul",
51
+ "Erf",
52
+ "Gather",
53
+ "Unsqueeze",
54
+ "Expand",
55
+ "Not",
56
+ "Less",
57
+ "Constant",
58
+ "Slice",
59
+ "ScatterND",
60
+ "Div",
61
+ "Shape",
62
+ "Add",
63
+ "Pow",
64
+ "Softmax",
65
+ "Sqrt",
66
+ "Where",
67
+ "Or",
68
+ "Transpose"
69
+ ],
70
+ "weight_type": "QInt8"
71
+ },
72
+ "decoder_with_past_model": {
73
+ "op_types": [
74
+ "Concat",
75
+ "Equal",
76
+ "ReduceMean",
77
+ "Sub",
78
+ "Cast",
79
+ "Mul",
80
+ "Reshape",
81
+ "ConstantOfShape",
82
+ "MatMul",
83
+ "Erf",
84
+ "Gather",
85
+ "Unsqueeze",
86
+ "Expand",
87
+ "Not",
88
+ "Constant",
89
+ "Slice",
90
+ "Div",
91
+ "Shape",
92
+ "Add",
93
+ "Pow",
94
+ "Softmax",
95
+ "Sqrt",
96
+ "Where",
97
+ "Transpose"
98
+ ],
99
+ "weight_type": "QInt8"
100
+ }
101
+ }
102
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "eos_token": "<|endoftext|>",
6
+ "model_max_length": 2048,
7
+ "tokenizer_class": "GPTNeoXTokenizer",
8
+ "unk_token": "<|endoftext|>"
9
+ }