jojo1899 commited on
Commit
d448458
1 Parent(s): de241a9

Quantized using nncf 2.13.0

Browse files
README.md CHANGED
@@ -7,23 +7,22 @@ tags:
7
 
8
  This is an INT4 quantized version of the `Phi-3-mini-128k-instruct` model. The Python packages used in creating this model are as follows:
9
  ```
10
- openvino==2024.3.0.dev20240528
11
- openvino-nightly==2024.3.0.dev20240528
12
- openvino-tokenizers==2024.3.0.0.dev20240528
13
- optimum==1.19.2
14
- optimum-intel==1.17.0.dev0+aefabf0
15
- nncf==2.11.0.dev0+90a7f0d5
16
- torch==2.3.0+cu121
17
- transformers==4.40.2
18
  ```
19
  This quantized model is created using the following command:
20
  ```
21
- optimum-cli export openvino -m "microsoft/Phi-3-mini-128k-instruct" --task text-generation-with-past --weight-format int4 --group-size 128 --trust-remote-code ./Phi-3-mini-128k-instruct-ov-int4
22
  ```
23
  For more details, run the following command from your Python environment: `optimum-cli export openvino --help`
24
 
25
  INFO:nncf:Statistics of the bitwidth distribution:
26
  | Num bits (N) | % all parameters (layers) | % ratio-defining parameters (layers) |
27
  |----------------|-----------------------------|----------------------------------------|
28
- | 8 | 25% (27 / 130) | 21% (25 / 128) |
29
- | 4 | 75% (103 / 130) | 79% (103 / 128) |
 
 
7
 
8
  This is an INT4 quantized version of the `Phi-3-mini-128k-instruct` model. The Python packages used in creating this model are as follows:
9
  ```
10
+ openvino==2024.4.0
11
+ optimum==1.23.3
12
+ optimum-intel==1.20.1
13
+ nncf==2.13.0
14
+ torch==2.5.1
15
+ transformers==4.46.1
 
 
16
  ```
17
  This quantized model is created using the following command:
18
  ```
19
+ optimum-cli export openvino -m "microsoft/Phi-3-mini-128k-instruct" --task text-generation-with-past --weight-format int4 --group-size 128 --ratio 0.8 --trust-remote-code ./Phi-3-mini-128k-instruct-ov-int4
20
  ```
21
  For more details, run the following command from your Python environment: `optimum-cli export openvino --help`
22
 
23
  INFO:nncf:Statistics of the bitwidth distribution:
24
  | Num bits (N) | % all parameters (layers) | % ratio-defining parameters (layers) |
25
  |----------------|-----------------------------|----------------------------------------|
26
+ | 8 | 24% (23 / 130) | 20% (21 / 128) |
27
+ | 4 | 76% (107 / 130) | 80% (107 / 128) |
28
+
config.json CHANGED
@@ -1,8 +1,10 @@
1
  {
 
2
  "_name_or_path": "microsoft/Phi-3-mini-128k-instruct",
3
  "architectures": [
4
  "Phi3ForCausalLM"
5
  ],
 
6
  "attention_dropout": 0.0,
7
  "auto_map": {
8
  "AutoConfig": "microsoft/Phi-3-mini-128k-instruct--configuration_phi3.Phi3Config",
@@ -26,71 +28,64 @@
26
  "rms_norm_eps": 1e-05,
27
  "rope_scaling": {
28
  "long_factor": [
29
- 1.0299999713897705,
30
- 1.0499999523162842,
31
- 1.0499999523162842,
32
- 1.0799999237060547,
33
- 1.2299998998641968,
34
- 1.2299998998641968,
35
- 1.2999999523162842,
36
- 1.4499999284744263,
37
- 1.5999999046325684,
38
- 1.6499998569488525,
39
- 1.8999998569488525,
40
- 2.859999895095825,
41
- 3.68999981880188,
42
- 5.419999599456787,
43
- 5.489999771118164,
44
- 5.489999771118164,
45
- 9.09000015258789,
46
- 11.579999923706055,
47
- 15.65999984741211,
48
- 15.769999504089355,
49
- 15.789999961853027,
50
- 18.360000610351562,
51
- 21.989999771118164,
52
- 23.079999923706055,
53
- 30.009998321533203,
54
- 32.35000228881836,
55
- 32.590003967285156,
56
- 35.56000518798828,
57
- 39.95000457763672,
58
- 53.840003967285156,
59
- 56.20000457763672,
60
- 57.95000457763672,
61
- 59.29000473022461,
62
- 59.77000427246094,
63
- 59.920005798339844,
64
- 61.190006256103516,
65
- 61.96000671386719,
66
- 62.50000762939453,
67
- 63.3700065612793,
68
- 63.48000717163086,
69
- 63.48000717163086,
70
- 63.66000747680664,
71
- 63.850006103515625,
72
- 64.08000946044922,
73
- 64.760009765625,
74
- 64.80001068115234,
75
- 64.81001281738281,
76
- 64.81001281738281
77
  ],
78
  "short_factor": [
79
- 1.05,
80
- 1.05,
81
- 1.05,
82
  1.1,
83
  1.1,
84
- 1.1500000000000001,
85
- 1.2000000000000002,
86
- 1.2500000000000002,
87
  1.3000000000000003,
88
  1.3500000000000003,
89
- 1.5000000000000004,
90
- 2.000000000000001,
91
- 2.000000000000001,
92
- 2.000000000000001,
93
- 2.000000000000001,
94
  2.000000000000001,
95
  2.000000000000001,
96
  2.000000000000001,
@@ -111,26 +106,34 @@
111
  2.0500000000000007,
112
  2.0500000000000007,
113
  2.0500000000000007,
 
 
 
114
  2.1000000000000005,
115
  2.1000000000000005,
116
- 2.1000000000000005,
117
- 2.1500000000000004,
118
  2.1500000000000004,
119
- 2.3499999999999996,
120
- 2.549999999999999,
121
- 2.5999999999999988,
122
- 2.5999999999999988,
 
 
 
 
 
 
123
  2.7499999999999982,
124
- 2.849999999999998,
125
- 2.849999999999998,
126
- 2.9499999999999975
127
  ],
128
- "type": "su"
129
  },
130
  "rope_theta": 10000.0,
131
  "sliding_window": 262144,
132
  "tie_word_embeddings": false,
133
- "transformers_version": "4.40.2",
 
134
  "use_cache": true,
135
  "vocab_size": 32064
136
  }
 
1
  {
2
+ "_attn_implementation_autoset": true,
3
  "_name_or_path": "microsoft/Phi-3-mini-128k-instruct",
4
  "architectures": [
5
  "Phi3ForCausalLM"
6
  ],
7
+ "attention_bias": false,
8
  "attention_dropout": 0.0,
9
  "auto_map": {
10
  "AutoConfig": "microsoft/Phi-3-mini-128k-instruct--configuration_phi3.Phi3Config",
 
28
  "rms_norm_eps": 1e-05,
29
  "rope_scaling": {
30
  "long_factor": [
31
+ 1.0700000524520874,
32
+ 1.1200000047683716,
33
+ 1.149999976158142,
34
+ 1.4199999570846558,
35
+ 1.5699999332427979,
36
+ 1.7999999523162842,
37
+ 2.129999876022339,
38
+ 2.129999876022339,
39
+ 3.009999990463257,
40
+ 5.910000324249268,
41
+ 6.950000286102295,
42
+ 9.070000648498535,
43
+ 9.930000305175781,
44
+ 10.710000038146973,
45
+ 11.130000114440918,
46
+ 14.609999656677246,
47
+ 15.409998893737793,
48
+ 19.809999465942383,
49
+ 37.279998779296875,
50
+ 38.279998779296875,
51
+ 38.599998474121094,
52
+ 40.12000274658203,
53
+ 46.20000457763672,
54
+ 50.940006256103516,
55
+ 53.66000747680664,
56
+ 54.9373893737793,
57
+ 56.89738845825195,
58
+ 57.28738784790039,
59
+ 59.98738479614258,
60
+ 60.86738586425781,
61
+ 60.887386322021484,
62
+ 61.71739196777344,
63
+ 62.91739273071289,
64
+ 62.957393646240234,
65
+ 63.41739273071289,
66
+ 63.8173942565918,
67
+ 63.83739471435547,
68
+ 63.897396087646484,
69
+ 63.93739700317383,
70
+ 64.06739807128906,
71
+ 64.11434936523438,
72
+ 64.12435150146484,
73
+ 64.15435028076172,
74
+ 64.19435119628906,
75
+ 64.24435424804688,
76
+ 64.57435607910156,
77
+ 64.69000244140625,
78
+ 64.76000213623047
79
  ],
80
  "short_factor": [
 
 
 
81
  1.1,
82
  1.1,
83
+ 1.1,
 
 
84
  1.3000000000000003,
85
  1.3500000000000003,
86
+ 1.3500000000000003,
87
+ 1.4000000000000004,
88
+ 1.5500000000000005,
 
 
89
  2.000000000000001,
90
  2.000000000000001,
91
  2.000000000000001,
 
106
  2.0500000000000007,
107
  2.0500000000000007,
108
  2.0500000000000007,
109
+ 2.0500000000000007,
110
+ 2.0500000000000007,
111
+ 2.0500000000000007,
112
  2.1000000000000005,
113
  2.1000000000000005,
 
 
114
  2.1500000000000004,
115
+ 2.25,
116
+ 2.25,
117
+ 2.25,
118
+ 2.25,
119
+ 2.25,
120
+ 2.3999999999999995,
121
+ 2.4499999999999993,
122
+ 2.499999999999999,
123
+ 2.6999999999999984,
124
+ 2.6999999999999984,
125
  2.7499999999999982,
126
+ 2.799999999999998,
127
+ 2.8999999999999977,
128
+ 3.049999999999997
129
  ],
130
+ "type": "longrope"
131
  },
132
  "rope_theta": 10000.0,
133
  "sliding_window": 262144,
134
  "tie_word_embeddings": false,
135
+ "torch_dtype": "bfloat16",
136
+ "transformers_version": "4.46.1",
137
  "use_cache": true,
138
  "vocab_size": 32064
139
  }
generation_config.json CHANGED
@@ -7,5 +7,5 @@
7
  32007
8
  ],
9
  "pad_token_id": 32000,
10
- "transformers_version": "4.40.2"
11
  }
 
7
  32007
8
  ],
9
  "pad_token_id": 32000,
10
+ "transformers_version": "4.46.1"
11
  }
openvino_detokenizer.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1f06b61953c9af7ee2a9b4ba718b2233dc3ec25c64074ba80cdd6a842d18801
3
- size 499969
 
 
 
 
openvino_detokenizer.xml DELETED
@@ -1,97 +0,0 @@
1
- <?xml version="1.0"?>
2
- <net name="detokenizer" version="11">
3
- <layers>
4
- <layer id="0" name="Parameter_223816" type="Parameter" version="opset1">
5
- <data shape="?,?" element_type="i64" />
6
- <output>
7
- <port id="0" precision="I64" names="Parameter_223816">
8
- <dim>-1</dim>
9
- <dim>-1</dim>
10
- </port>
11
- </output>
12
- </layer>
13
- <layer id="1" name="Constant_223796" type="Const" version="opset1">
14
- <data element_type="u8" shape="499969" offset="0" size="499969" />
15
- <output>
16
- <port id="0" precision="U8">
17
- <dim>499969</dim>
18
- </port>
19
- </output>
20
- </layer>
21
- <layer id="2" name="Convert_223826" type="Convert" version="opset1">
22
- <data destination_type="i32" />
23
- <input>
24
- <port id="0" precision="I64">
25
- <dim>-1</dim>
26
- <dim>-1</dim>
27
- </port>
28
- </input>
29
- <output>
30
- <port id="1" precision="I32">
31
- <dim>-1</dim>
32
- <dim>-1</dim>
33
- </port>
34
- </output>
35
- </layer>
36
- <layer id="3" name="SentencepieceDetokenizer_223817" type="SentencepieceDetokenizer" version="extension">
37
- <input>
38
- <port id="0" precision="U8">
39
- <dim>499969</dim>
40
- </port>
41
- <port id="1" precision="I32">
42
- <dim>-1</dim>
43
- <dim>-1</dim>
44
- </port>
45
- </input>
46
- <output>
47
- <port id="2" precision="I32">
48
- <dim>-1</dim>
49
- </port>
50
- <port id="3" precision="I32">
51
- <dim>-1</dim>
52
- </port>
53
- <port id="4" precision="U8">
54
- <dim>-1</dim>
55
- </port>
56
- </output>
57
- </layer>
58
- <layer id="4" name="StringTensorPack_223818" type="StringTensorPack" version="extension">
59
- <data mode="begins_ends" />
60
- <input>
61
- <port id="0" precision="I32">
62
- <dim>-1</dim>
63
- </port>
64
- <port id="1" precision="I32">
65
- <dim>-1</dim>
66
- </port>
67
- <port id="2" precision="U8">
68
- <dim>-1</dim>
69
- </port>
70
- </input>
71
- <output>
72
- <port id="3" precision="STRING" names="string_output">
73
- <dim>-1</dim>
74
- </port>
75
- </output>
76
- </layer>
77
- <layer id="5" name="Result_223819" type="Result" version="opset1">
78
- <input>
79
- <port id="0" precision="STRING">
80
- <dim>-1</dim>
81
- </port>
82
- </input>
83
- </layer>
84
- </layers>
85
- <edges>
86
- <edge from-layer="0" from-port="0" to-layer="2" to-port="0" />
87
- <edge from-layer="1" from-port="0" to-layer="3" to-port="0" />
88
- <edge from-layer="2" from-port="1" to-layer="3" to-port="1" />
89
- <edge from-layer="3" from-port="2" to-layer="4" to-port="0" />
90
- <edge from-layer="3" from-port="3" to-layer="4" to-port="1" />
91
- <edge from-layer="3" from-port="4" to-layer="4" to-port="2" />
92
- <edge from-layer="4" from-port="3" to-layer="5" to-port="0" />
93
- </edges>
94
- <rt_info>
95
- <eos_token_id value="32000" />
96
- </rt_info>
97
- </net>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
openvino_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feff83396fe9946ae1dbf0cbe9c45486165c4ef064195426f5c836f84e8d7be0
3
- size 2450248212
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ce9a3d5a7f07a4cb34ddae2c2d81c2b6f3cbac5c5cd51e1ba376bcf8983195e
3
+ size 2432074068
openvino_model.xml CHANGED
The diff for this file is too large to render. See raw diff
 
openvino_tokenizer.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:46097e0534935f1aec4cbac2c90e565ec51a8513fcd53b841231849403e5e122
3
- size 499981
 
 
 
 
openvino_tokenizer.xml DELETED
@@ -1,231 +0,0 @@
1
- <?xml version="1.0"?>
2
- <net name="tokenizer" version="11">
3
- <layers>
4
- <layer id="0" name="string_input" type="Parameter" version="opset1">
5
- <data shape="?" element_type="string" />
6
- <output>
7
- <port id="0" precision="STRING" names="string_input">
8
- <dim>-1</dim>
9
- </port>
10
- </output>
11
- </layer>
12
- <layer id="1" name="Constant_223802" type="Const" version="opset1">
13
- <data element_type="i32" shape="" offset="0" size="4" />
14
- <output>
15
- <port id="0" precision="I32" />
16
- </output>
17
- </layer>
18
- <layer id="2" name="Constant_223796" type="Const" version="opset1">
19
- <data element_type="u8" shape="499969" offset="4" size="499969" />
20
- <output>
21
- <port id="0" precision="U8">
22
- <dim>499969</dim>
23
- </port>
24
- </output>
25
- </layer>
26
- <layer id="3" name="SentencepieceTokenizer_223798" type="SentencepieceTokenizer" version="extension">
27
- <data nbest_size="0" alpha="0" add_bos="true" add_eos="false" reverse="false" />
28
- <input>
29
- <port id="0" precision="U8">
30
- <dim>499969</dim>
31
- </port>
32
- <port id="1" precision="STRING">
33
- <dim>-1</dim>
34
- </port>
35
- </input>
36
- <output>
37
- <port id="2" precision="I64">
38
- <dim>-1</dim>
39
- <dim>2</dim>
40
- </port>
41
- <port id="3" precision="I32">
42
- <dim>-1</dim>
43
- </port>
44
- <port id="4" precision="I64">
45
- <dim>2</dim>
46
- </port>
47
- </output>
48
- </layer>
49
- <layer id="4" name="Broadcast_223803" type="Broadcast" version="opset3">
50
- <data mode="numpy" />
51
- <input>
52
- <port id="0" precision="I32" />
53
- <port id="1" precision="I64">
54
- <dim>2</dim>
55
- </port>
56
- </input>
57
- <output>
58
- <port id="2" precision="I32">
59
- <dim>-1</dim>
60
- <dim>-1</dim>
61
- </port>
62
- </output>
63
- </layer>
64
- <layer id="5" name="Constant_223804" type="Const" version="opset1">
65
- <data element_type="i32" shape="" offset="499973" size="4" />
66
- <output>
67
- <port id="0" precision="I32" />
68
- </output>
69
- </layer>
70
- <layer id="6" name="ShapeOf_223805" type="ShapeOf" version="opset3">
71
- <data output_type="i64" />
72
- <input>
73
- <port id="0" precision="I32">
74
- <dim>-1</dim>
75
- </port>
76
- </input>
77
- <output>
78
- <port id="1" precision="I64">
79
- <dim>1</dim>
80
- </port>
81
- </output>
82
- </layer>
83
- <layer id="7" name="Broadcast_223806" type="Broadcast" version="opset3">
84
- <data mode="numpy" />
85
- <input>
86
- <port id="0" precision="I32" />
87
- <port id="1" precision="I64">
88
- <dim>1</dim>
89
- </port>
90
- </input>
91
- <output>
92
- <port id="2" precision="I32">
93
- <dim>-1</dim>
94
- </port>
95
- </output>
96
- </layer>
97
- <layer id="8" name="ScatterNDUpdate_223810" type="ScatterNDUpdate" version="opset4">
98
- <input>
99
- <port id="0" precision="I32">
100
- <dim>-1</dim>
101
- <dim>-1</dim>
102
- </port>
103
- <port id="1" precision="I64">
104
- <dim>-1</dim>
105
- <dim>2</dim>
106
- </port>
107
- <port id="2" precision="I32">
108
- <dim>-1</dim>
109
- </port>
110
- </input>
111
- <output>
112
- <port id="3" precision="I32">
113
- <dim>-1</dim>
114
- <dim>-1</dim>
115
- </port>
116
- </output>
117
- </layer>
118
- <layer id="9" name="ScatterNDUpdate_223810" type="Convert" version="opset1">
119
- <data destination_type="i64" />
120
- <input>
121
- <port id="0" precision="I32">
122
- <dim>-1</dim>
123
- <dim>-1</dim>
124
- </port>
125
- </input>
126
- <output>
127
- <port id="1" precision="I64" names="attention_mask">
128
- <dim>-1</dim>
129
- <dim>-1</dim>
130
- </port>
131
- </output>
132
- </layer>
133
- <layer id="11" name="Constant_223799" type="Const" version="opset1">
134
- <data element_type="i32" shape="" offset="499977" size="4" />
135
- <output>
136
- <port id="0" precision="I32" />
137
- </output>
138
- </layer>
139
- <layer id="12" name="Broadcast_223800" type="Broadcast" version="opset3">
140
- <data mode="numpy" />
141
- <input>
142
- <port id="0" precision="I32" />
143
- <port id="1" precision="I64">
144
- <dim>2</dim>
145
- </port>
146
- </input>
147
- <output>
148
- <port id="2" precision="I32">
149
- <dim>-1</dim>
150
- <dim>-1</dim>
151
- </port>
152
- </output>
153
- </layer>
154
- <layer id="13" name="ScatterNDUpdate_223801" type="ScatterNDUpdate" version="opset4">
155
- <input>
156
- <port id="0" precision="I32">
157
- <dim>-1</dim>
158
- <dim>-1</dim>
159
- </port>
160
- <port id="1" precision="I64">
161
- <dim>-1</dim>
162
- <dim>2</dim>
163
- </port>
164
- <port id="2" precision="I32">
165
- <dim>-1</dim>
166
- </port>
167
- </input>
168
- <output>
169
- <port id="3" precision="I32">
170
- <dim>-1</dim>
171
- <dim>-1</dim>
172
- </port>
173
- </output>
174
- </layer>
175
- <layer id="14" name="ScatterNDUpdate_223801" type="Convert" version="opset1">
176
- <data destination_type="i64" />
177
- <input>
178
- <port id="0" precision="I32">
179
- <dim>-1</dim>
180
- <dim>-1</dim>
181
- </port>
182
- </input>
183
- <output>
184
- <port id="1" precision="I64" names="input_ids">
185
- <dim>-1</dim>
186
- <dim>-1</dim>
187
- </port>
188
- </output>
189
- </layer>
190
- <layer id="15" name="Result_223811" type="Result" version="opset1">
191
- <input>
192
- <port id="0" precision="I64">
193
- <dim>-1</dim>
194
- <dim>-1</dim>
195
- </port>
196
- </input>
197
- </layer>
198
- <layer id="10" name="Result_223812" type="Result" version="opset1">
199
- <input>
200
- <port id="0" precision="I64">
201
- <dim>-1</dim>
202
- <dim>-1</dim>
203
- </port>
204
- </input>
205
- </layer>
206
- </layers>
207
- <edges>
208
- <edge from-layer="0" from-port="0" to-layer="3" to-port="1" />
209
- <edge from-layer="1" from-port="0" to-layer="4" to-port="0" />
210
- <edge from-layer="2" from-port="0" to-layer="3" to-port="0" />
211
- <edge from-layer="3" from-port="4" to-layer="4" to-port="1" />
212
- <edge from-layer="3" from-port="3" to-layer="6" to-port="0" />
213
- <edge from-layer="3" from-port="2" to-layer="8" to-port="1" />
214
- <edge from-layer="3" from-port="4" to-layer="12" to-port="1" />
215
- <edge from-layer="3" from-port="2" to-layer="13" to-port="1" />
216
- <edge from-layer="3" from-port="3" to-layer="13" to-port="2" />
217
- <edge from-layer="4" from-port="2" to-layer="8" to-port="0" />
218
- <edge from-layer="5" from-port="0" to-layer="7" to-port="0" />
219
- <edge from-layer="6" from-port="1" to-layer="7" to-port="1" />
220
- <edge from-layer="7" from-port="2" to-layer="8" to-port="2" />
221
- <edge from-layer="8" from-port="3" to-layer="9" to-port="0" />
222
- <edge from-layer="9" from-port="1" to-layer="10" to-port="0" />
223
- <edge from-layer="11" from-port="0" to-layer="12" to-port="0" />
224
- <edge from-layer="12" from-port="2" to-layer="13" to-port="0" />
225
- <edge from-layer="13" from-port="3" to-layer="14" to-port="0" />
226
- <edge from-layer="14" from-port="1" to-layer="15" to-port="0" />
227
- </edges>
228
- <rt_info>
229
- <eos_token_id value="32000" />
230
- </rt_info>
231
- </net>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
- "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -116,7 +117,7 @@
116
  }
117
  },
118
  "bos_token": "<s>",
119
- "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
120
  "clean_up_tokenization_spaces": false,
121
  "eos_token": "<|endoftext|>",
122
  "legacy": false,
 
1
  {
2
+ "add_bos_token": false,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
117
  }
118
  },
119
  "bos_token": "<s>",
120
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
121
  "clean_up_tokenization_spaces": false,
122
  "eos_token": "<|endoftext|>",
123
  "legacy": false,