doberst commited on
Commit
8519a44
1 Parent(s): ef7d15c

Upload 11 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "papluca/xlm-roberta-base-language-detection",
3
+ "architectures": [
4
+ "XLMRobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "ja",
15
+ "1": "nl",
16
+ "2": "ar",
17
+ "3": "pl",
18
+ "4": "de",
19
+ "5": "it",
20
+ "6": "pt",
21
+ "7": "tr",
22
+ "8": "es",
23
+ "9": "hi",
24
+ "10": "el",
25
+ "11": "ur",
26
+ "12": "bg",
27
+ "13": "en",
28
+ "14": "fr",
29
+ "15": "zh",
30
+ "16": "ru",
31
+ "17": "th",
32
+ "18": "sw",
33
+ "19": "vi"
34
+ },
35
+ "initializer_range": 0.02,
36
+ "intermediate_size": 3072,
37
+ "label2id": {
38
+ "ar": 2,
39
+ "bg": 12,
40
+ "de": 4,
41
+ "el": 10,
42
+ "en": 13,
43
+ "es": 8,
44
+ "fr": 14,
45
+ "hi": 9,
46
+ "it": 5,
47
+ "ja": 0,
48
+ "nl": 1,
49
+ "pl": 3,
50
+ "pt": 6,
51
+ "ru": 16,
52
+ "sw": 18,
53
+ "th": 17,
54
+ "tr": 7,
55
+ "ur": 11,
56
+ "vi": 19,
57
+ "zh": 15
58
+ },
59
+ "layer_norm_eps": 1e-05,
60
+ "max_position_embeddings": 514,
61
+ "model_type": "xlm-roberta",
62
+ "num_attention_heads": 12,
63
+ "num_hidden_layers": 12,
64
+ "output_past": true,
65
+ "pad_token_id": 1,
66
+ "position_embedding_type": "absolute",
67
+ "problem_type": "single_label_classification",
68
+ "transformers_version": "4.41.2",
69
+ "type_vocab_size": 1,
70
+ "use_cache": true,
71
+ "vocab_size": 250002
72
+ }
openvino_detokenizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23017cf370ac8769a66690c562282627f9ae591ff3e0384685addbe479eafe08
3
+ size 5069136
openvino_detokenizer.xml ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <net name="detokenizer" version="11">
3
+ <layers>
4
+ <layer id="0" name="Parameter_10445" type="Parameter" version="opset1">
5
+ <data shape="?,?" element_type="i64" />
6
+ <output>
7
+ <port id="0" precision="I64" names="Parameter_10445">
8
+ <dim>-1</dim>
9
+ <dim>-1</dim>
10
+ </port>
11
+ </output>
12
+ </layer>
13
+ <layer id="1" name="Constant_10425" type="Const" version="opset1">
14
+ <data element_type="u8" shape="5069084" offset="0" size="5069084" />
15
+ <output>
16
+ <port id="0" precision="U8">
17
+ <dim>5069084</dim>
18
+ </port>
19
+ </output>
20
+ </layer>
21
+ <layer id="2" name="Convert_10465" type="Convert" version="opset1">
22
+ <data destination_type="i32" />
23
+ <input>
24
+ <port id="0" precision="I64">
25
+ <dim>-1</dim>
26
+ <dim>-1</dim>
27
+ </port>
28
+ </input>
29
+ <output>
30
+ <port id="1" precision="I32">
31
+ <dim>-1</dim>
32
+ <dim>-1</dim>
33
+ </port>
34
+ </output>
35
+ </layer>
36
+ <layer id="3" name="SentencepieceDetokenizer_10446" type="SentencepieceDetokenizer" version="extension">
37
+ <input>
38
+ <port id="0" precision="U8">
39
+ <dim>5069084</dim>
40
+ </port>
41
+ <port id="1" precision="I32">
42
+ <dim>-1</dim>
43
+ <dim>-1</dim>
44
+ </port>
45
+ </input>
46
+ <output>
47
+ <port id="2" precision="I32">
48
+ <dim>-1</dim>
49
+ </port>
50
+ <port id="3" precision="I32">
51
+ <dim>-1</dim>
52
+ </port>
53
+ <port id="4" precision="U8">
54
+ <dim>-1</dim>
55
+ </port>
56
+ </output>
57
+ </layer>
58
+ <layer id="4" name="Constant_10448" type="Const" version="opset1">
59
+ <data element_type="u8" shape="2" offset="5069084" size="2" />
60
+ <output>
61
+ <port id="0" precision="U8">
62
+ <dim>2</dim>
63
+ </port>
64
+ </output>
65
+ </layer>
66
+ <layer id="5" name="Constant_10450" type="Const" version="opset1">
67
+ <data element_type="u8" shape="0" offset="5069086" size="1" />
68
+ <output>
69
+ <port id="0" precision="U8">
70
+ <dim>0</dim>
71
+ </port>
72
+ </output>
73
+ </layer>
74
+ <layer id="6" name="RegexNormalization_10451" type="RegexNormalization" version="extension">
75
+ <data global_replace="true" />
76
+ <input>
77
+ <port id="0" precision="I32">
78
+ <dim>-1</dim>
79
+ </port>
80
+ <port id="1" precision="I32">
81
+ <dim>-1</dim>
82
+ </port>
83
+ <port id="2" precision="U8">
84
+ <dim>-1</dim>
85
+ </port>
86
+ <port id="3" precision="U8">
87
+ <dim>2</dim>
88
+ </port>
89
+ <port id="4" precision="U8">
90
+ <dim>0</dim>
91
+ </port>
92
+ </input>
93
+ <output>
94
+ <port id="5" precision="I32">
95
+ <dim>-1</dim>
96
+ </port>
97
+ <port id="6" precision="I32">
98
+ <dim>-1</dim>
99
+ </port>
100
+ <port id="7" precision="U8">
101
+ <dim>-1</dim>
102
+ </port>
103
+ </output>
104
+ </layer>
105
+ <layer id="7" name="Constant_10453" type="Const" version="opset1">
106
+ <data element_type="u8" shape="47" offset="5069087" size="47" />
107
+ <output>
108
+ <port id="0" precision="U8">
109
+ <dim>47</dim>
110
+ </port>
111
+ </output>
112
+ </layer>
113
+ <layer id="8" name="Constant_10455" type="Const" version="opset1">
114
+ <data element_type="u8" shape="2" offset="5069134" size="2" />
115
+ <output>
116
+ <port id="0" precision="U8">
117
+ <dim>2</dim>
118
+ </port>
119
+ </output>
120
+ </layer>
121
+ <layer id="9" name="RegexNormalization_10456" type="RegexNormalization" version="extension">
122
+ <data global_replace="true" />
123
+ <input>
124
+ <port id="0" precision="I32">
125
+ <dim>-1</dim>
126
+ </port>
127
+ <port id="1" precision="I32">
128
+ <dim>-1</dim>
129
+ </port>
130
+ <port id="2" precision="U8">
131
+ <dim>-1</dim>
132
+ </port>
133
+ <port id="3" precision="U8">
134
+ <dim>47</dim>
135
+ </port>
136
+ <port id="4" precision="U8">
137
+ <dim>2</dim>
138
+ </port>
139
+ </input>
140
+ <output>
141
+ <port id="5" precision="I32">
142
+ <dim>-1</dim>
143
+ </port>
144
+ <port id="6" precision="I32">
145
+ <dim>-1</dim>
146
+ </port>
147
+ <port id="7" precision="U8">
148
+ <dim>-1</dim>
149
+ </port>
150
+ </output>
151
+ </layer>
152
+ <layer id="10" name="StringTensorPack_10457" type="StringTensorPack" version="extension">
153
+ <data mode="begins_ends" />
154
+ <input>
155
+ <port id="0" precision="I32">
156
+ <dim>-1</dim>
157
+ </port>
158
+ <port id="1" precision="I32">
159
+ <dim>-1</dim>
160
+ </port>
161
+ <port id="2" precision="U8">
162
+ <dim>-1</dim>
163
+ </port>
164
+ </input>
165
+ <output>
166
+ <port id="3" precision="STRING" names="string_output">
167
+ <dim>-1</dim>
168
+ </port>
169
+ </output>
170
+ </layer>
171
+ <layer id="11" name="Result_10458" type="Result" version="opset1">
172
+ <input>
173
+ <port id="0" precision="STRING">
174
+ <dim>-1</dim>
175
+ </port>
176
+ </input>
177
+ </layer>
178
+ </layers>
179
+ <edges>
180
+ <edge from-layer="0" from-port="0" to-layer="2" to-port="0" />
181
+ <edge from-layer="1" from-port="0" to-layer="3" to-port="0" />
182
+ <edge from-layer="2" from-port="1" to-layer="3" to-port="1" />
183
+ <edge from-layer="3" from-port="2" to-layer="6" to-port="0" />
184
+ <edge from-layer="3" from-port="3" to-layer="6" to-port="1" />
185
+ <edge from-layer="3" from-port="4" to-layer="6" to-port="2" />
186
+ <edge from-layer="4" from-port="0" to-layer="6" to-port="3" />
187
+ <edge from-layer="5" from-port="0" to-layer="6" to-port="4" />
188
+ <edge from-layer="6" from-port="5" to-layer="9" to-port="0" />
189
+ <edge from-layer="6" from-port="6" to-layer="9" to-port="1" />
190
+ <edge from-layer="6" from-port="7" to-layer="9" to-port="2" />
191
+ <edge from-layer="7" from-port="0" to-layer="9" to-port="3" />
192
+ <edge from-layer="8" from-port="0" to-layer="9" to-port="4" />
193
+ <edge from-layer="9" from-port="5" to-layer="10" to-port="0" />
194
+ <edge from-layer="9" from-port="6" to-layer="10" to-port="1" />
195
+ <edge from-layer="9" from-port="7" to-layer="10" to-port="2" />
196
+ <edge from-layer="10" from-port="3" to-layer="11" to-port="0" />
197
+ </edges>
198
+ <rt_info>
199
+ <eos_token_id value="2" />
200
+ </rt_info>
201
+ </net>
openvino_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbec0b816458c096b0c2814bb55247d79ad416c5bb43b7548d845db8f5e932f6
3
+ size 246660383
openvino_model.xml ADDED
The diff for this file is too large to render. See raw diff
 
openvino_tokenizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e21a141fddc45a135d71b33803d54b75a9002988ff7dbbfef4fc230d3f781f7d
3
+ size 5069088
openvino_tokenizer.xml ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <net name="tokenizer" version="11">
3
+ <layers>
4
+ <layer id="0" name="string_input" type="Parameter" version="opset1">
5
+ <data shape="?" element_type="string" />
6
+ <output>
7
+ <port id="0" precision="STRING" names="string_input">
8
+ <dim>-1</dim>
9
+ </port>
10
+ </output>
11
+ </layer>
12
+ <layer id="1" name="Constant_10431" type="Const" version="opset1">
13
+ <data element_type="i32" shape="" offset="0" size="4" />
14
+ <output>
15
+ <port id="0" precision="I32" />
16
+ </output>
17
+ </layer>
18
+ <layer id="2" name="Constant_10424" type="Const" version="opset1">
19
+ <data element_type="u8" shape="5069080" offset="4" size="5069080" />
20
+ <output>
21
+ <port id="0" precision="U8">
22
+ <dim>5069080</dim>
23
+ </port>
24
+ </output>
25
+ </layer>
26
+ <layer id="3" name="SentencepieceTokenizer_10427" type="SentencepieceTokenizer" version="extension">
27
+ <data nbest_size="0" alpha="0" add_bos="true" add_eos="true" reverse="false" />
28
+ <input>
29
+ <port id="0" precision="U8">
30
+ <dim>5069080</dim>
31
+ </port>
32
+ <port id="1" precision="STRING">
33
+ <dim>-1</dim>
34
+ </port>
35
+ </input>
36
+ <output>
37
+ <port id="2" precision="I64">
38
+ <dim>-1</dim>
39
+ <dim>2</dim>
40
+ </port>
41
+ <port id="3" precision="I32">
42
+ <dim>-1</dim>
43
+ </port>
44
+ <port id="4" precision="I64">
45
+ <dim>2</dim>
46
+ </port>
47
+ </output>
48
+ </layer>
49
+ <layer id="4" name="Broadcast_10432" type="Broadcast" version="opset3">
50
+ <data mode="numpy" />
51
+ <input>
52
+ <port id="0" precision="I32" />
53
+ <port id="1" precision="I64">
54
+ <dim>2</dim>
55
+ </port>
56
+ </input>
57
+ <output>
58
+ <port id="2" precision="I32">
59
+ <dim>-1</dim>
60
+ <dim>-1</dim>
61
+ </port>
62
+ </output>
63
+ </layer>
64
+ <layer id="5" name="Constant_10433" type="Const" version="opset1">
65
+ <data element_type="i32" shape="" offset="5069084" size="4" />
66
+ <output>
67
+ <port id="0" precision="I32" />
68
+ </output>
69
+ </layer>
70
+ <layer id="6" name="ShapeOf_10434" type="ShapeOf" version="opset3">
71
+ <data output_type="i64" />
72
+ <input>
73
+ <port id="0" precision="I32">
74
+ <dim>-1</dim>
75
+ </port>
76
+ </input>
77
+ <output>
78
+ <port id="1" precision="I64">
79
+ <dim>1</dim>
80
+ </port>
81
+ </output>
82
+ </layer>
83
+ <layer id="7" name="Broadcast_10435" type="Broadcast" version="opset3">
84
+ <data mode="numpy" />
85
+ <input>
86
+ <port id="0" precision="I32" />
87
+ <port id="1" precision="I64">
88
+ <dim>1</dim>
89
+ </port>
90
+ </input>
91
+ <output>
92
+ <port id="2" precision="I32">
93
+ <dim>-1</dim>
94
+ </port>
95
+ </output>
96
+ </layer>
97
+ <layer id="8" name="ScatterNDUpdate_10439" type="ScatterNDUpdate" version="opset4">
98
+ <input>
99
+ <port id="0" precision="I32">
100
+ <dim>-1</dim>
101
+ <dim>-1</dim>
102
+ </port>
103
+ <port id="1" precision="I64">
104
+ <dim>-1</dim>
105
+ <dim>2</dim>
106
+ </port>
107
+ <port id="2" precision="I32">
108
+ <dim>-1</dim>
109
+ </port>
110
+ </input>
111
+ <output>
112
+ <port id="3" precision="I32">
113
+ <dim>-1</dim>
114
+ <dim>-1</dim>
115
+ </port>
116
+ </output>
117
+ </layer>
118
+ <layer id="9" name="ScatterNDUpdate_10439" type="Convert" version="opset1">
119
+ <data destination_type="i64" />
120
+ <input>
121
+ <port id="0" precision="I32">
122
+ <dim>-1</dim>
123
+ <dim>-1</dim>
124
+ </port>
125
+ </input>
126
+ <output>
127
+ <port id="1" precision="I64" names="attention_mask">
128
+ <dim>-1</dim>
129
+ <dim>-1</dim>
130
+ </port>
131
+ </output>
132
+ </layer>
133
+ <layer id="11" name="Constant_10428" type="Const" version="opset1">
134
+ <data element_type="i32" shape="" offset="5069084" size="4" />
135
+ <output>
136
+ <port id="0" precision="I32" />
137
+ </output>
138
+ </layer>
139
+ <layer id="12" name="Broadcast_10429" type="Broadcast" version="opset3">
140
+ <data mode="numpy" />
141
+ <input>
142
+ <port id="0" precision="I32" />
143
+ <port id="1" precision="I64">
144
+ <dim>2</dim>
145
+ </port>
146
+ </input>
147
+ <output>
148
+ <port id="2" precision="I32">
149
+ <dim>-1</dim>
150
+ <dim>-1</dim>
151
+ </port>
152
+ </output>
153
+ </layer>
154
+ <layer id="13" name="ScatterNDUpdate_10430" type="ScatterNDUpdate" version="opset4">
155
+ <input>
156
+ <port id="0" precision="I32">
157
+ <dim>-1</dim>
158
+ <dim>-1</dim>
159
+ </port>
160
+ <port id="1" precision="I64">
161
+ <dim>-1</dim>
162
+ <dim>2</dim>
163
+ </port>
164
+ <port id="2" precision="I32">
165
+ <dim>-1</dim>
166
+ </port>
167
+ </input>
168
+ <output>
169
+ <port id="3" precision="I32">
170
+ <dim>-1</dim>
171
+ <dim>-1</dim>
172
+ </port>
173
+ </output>
174
+ </layer>
175
+ <layer id="14" name="ScatterNDUpdate_10430" type="Convert" version="opset1">
176
+ <data destination_type="i64" />
177
+ <input>
178
+ <port id="0" precision="I32">
179
+ <dim>-1</dim>
180
+ <dim>-1</dim>
181
+ </port>
182
+ </input>
183
+ <output>
184
+ <port id="1" precision="I64" names="input_ids">
185
+ <dim>-1</dim>
186
+ <dim>-1</dim>
187
+ </port>
188
+ </output>
189
+ </layer>
190
+ <layer id="15" name="Result_10440" type="Result" version="opset1">
191
+ <input>
192
+ <port id="0" precision="I64">
193
+ <dim>-1</dim>
194
+ <dim>-1</dim>
195
+ </port>
196
+ </input>
197
+ </layer>
198
+ <layer id="10" name="Result_10441" type="Result" version="opset1">
199
+ <input>
200
+ <port id="0" precision="I64">
201
+ <dim>-1</dim>
202
+ <dim>-1</dim>
203
+ </port>
204
+ </input>
205
+ </layer>
206
+ </layers>
207
+ <edges>
208
+ <edge from-layer="0" from-port="0" to-layer="3" to-port="1" />
209
+ <edge from-layer="1" from-port="0" to-layer="4" to-port="0" />
210
+ <edge from-layer="2" from-port="0" to-layer="3" to-port="0" />
211
+ <edge from-layer="3" from-port="4" to-layer="4" to-port="1" />
212
+ <edge from-layer="3" from-port="3" to-layer="6" to-port="0" />
213
+ <edge from-layer="3" from-port="2" to-layer="8" to-port="1" />
214
+ <edge from-layer="3" from-port="4" to-layer="12" to-port="1" />
215
+ <edge from-layer="3" from-port="2" to-layer="13" to-port="1" />
216
+ <edge from-layer="3" from-port="3" to-layer="13" to-port="2" />
217
+ <edge from-layer="4" from-port="2" to-layer="8" to-port="0" />
218
+ <edge from-layer="5" from-port="0" to-layer="7" to-port="0" />
219
+ <edge from-layer="6" from-port="1" to-layer="7" to-port="1" />
220
+ <edge from-layer="7" from-port="2" to-layer="8" to-port="2" />
221
+ <edge from-layer="8" from-port="3" to-layer="9" to-port="0" />
222
+ <edge from-layer="9" from-port="1" to-layer="10" to-port="0" />
223
+ <edge from-layer="11" from-port="0" to-layer="12" to-port="0" />
224
+ <edge from-layer="12" from-port="2" to-layer="13" to-port="0" />
225
+ <edge from-layer="13" from-port="3" to-layer="14" to-port="0" />
226
+ <edge from-layer="14" from-port="1" to-layer="15" to-port="0" />
227
+ </edges>
228
+ <rt_info>
229
+ <eos_token_id value="2" />
230
+ </rt_info>
231
+ </net>
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
3
+ size 17082734
tokenizer_config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "mask_token": "<mask>",
49
+ "model_max_length": 512,
50
+ "pad_token": "<pad>",
51
+ "sep_token": "</s>",
52
+ "tokenizer_class": "XLMRobertaTokenizer",
53
+ "unk_token": "<unk>"
54
+ }