DarrenChensformer commited on
Commit
90d7728
1 Parent(s): 805464a

New model version

Browse files
Files changed (5) hide show
  1. added_tokens.json +17 -17
  2. config.json +11 -11
  3. pytorch_model.bin +2 -2
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +70 -70
added_tokens.json CHANGED
@@ -1,19 +1,19 @@
1
  {
2
- "<cel>": 47216,
3
- "<concept>": 47219,
4
- "<date>": 47214,
5
- "<dis>": 47218,
6
- "<eve>": 47215,
7
- "<loc>": 47208,
8
- "<media>": 47217,
9
- "<misc>": 47209,
10
- "<num>": 47211,
11
- "<org>": 47213,
12
- "<per>": 47210,
13
- "<relation>": 47204,
14
- "<time>": 47212,
15
- "<triplet>": 47203,
16
- "ca_XX": 47206,
17
- "el_EL": 47205,
18
- "tp_XX": 47207
19
  }
 
1
  {
2
+ "<cel>": 60065,
3
+ "<concept>": 60068,
4
+ "<date>": 60063,
5
+ "<dis>": 60067,
6
+ "<eve>": 60064,
7
+ "<loc>": 60057,
8
+ "<media>": 60066,
9
+ "<misc>": 60058,
10
+ "<num>": 60060,
11
+ "<org>": 60062,
12
+ "<per>": 60059,
13
+ "<relation>": 60053,
14
+ "<time>": 60061,
15
+ "<triplet>": 60052,
16
+ "ca_XX": 60055,
17
+ "el_EL": 60054,
18
+ "tp_XX": 60056
19
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "Babelscape/mrebel-large-trim-zh",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "gelu",
@@ -53,26 +53,26 @@
53
  "torch_dtype": "float32",
54
  "transformers_version": "4.34.1",
55
  "use_cache": true,
56
- "vocab_size": 47220,
57
  "vocabtrimmer": {
58
  "mining_config": {
59
- "dataset": "vocabtrimmer/mc4_validation",
60
- "dataset_column": "text",
61
  "dataset_name": "zh",
62
- "dataset_split": "validation",
63
  "language": "zh",
64
  "min_frequency": 2,
65
- "target_vocab_size": null
66
  },
67
  "stats": {
68
- "compression_rate_embedding": 18.882637330997998,
69
- "compression_rate_full": 65.99762981935335,
70
  "parameter_size_embedding/raw": 256072704,
71
- "parameter_size_embedding/trimmed": 48353280,
72
  "parameter_size_full/raw": 610896896,
73
- "parameter_size_full/trimmed": 403177472,
74
  "vocab_size/raw": 250071,
75
- "vocab_size/trimmed": 47220
76
  }
77
  }
78
  }
 
1
  {
2
+ "_name_or_path": "DarrenChensformer/mrebel-large-trim-zh",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "gelu",
 
53
  "torch_dtype": "float32",
54
  "transformers_version": "4.34.1",
55
  "use_cache": true,
56
+ "vocab_size": 60069,
57
  "vocabtrimmer": {
58
  "mining_config": {
59
+ "dataset": "ikala/datahub-2023Q3",
60
+ "dataset_column": "content",
61
  "dataset_name": "zh",
62
+ "dataset_split": "train",
63
  "language": "zh",
64
  "min_frequency": 2,
65
+ "target_vocab_size": 60000
66
  },
67
  "stats": {
68
+ "compression_rate_embedding": 24.02077809901988,
69
+ "compression_rate_full": 68.15140995576445,
70
  "parameter_size_embedding/raw": 256072704,
71
+ "parameter_size_embedding/trimmed": 61510656,
72
  "parameter_size_full/raw": 610896896,
73
+ "parameter_size_full/trimmed": 416334848,
74
  "vocab_size/raw": 250071,
75
+ "vocab_size/trimmed": 60069
76
  }
77
  }
78
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d43e89868ecb86c70738c7c46b097dd0612a859fe25ace0d1a5c14a0305fc79e
3
- size 1613067802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fce28a7bfe49af25440f26db7cedce4ea080d3308012f2e013f713733867e2dd
3
+ size 1665748698
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -32,7 +32,7 @@
32
  "single_word": false,
33
  "special": true
34
  },
35
- "47150": {
36
  "content": "ar_AR",
37
  "lstrip": false,
38
  "normalized": false,
@@ -40,7 +40,7 @@
40
  "single_word": false,
41
  "special": true
42
  },
43
- "47151": {
44
  "content": "cs_CZ",
45
  "lstrip": false,
46
  "normalized": false,
@@ -48,7 +48,7 @@
48
  "single_word": false,
49
  "special": true
50
  },
51
- "47152": {
52
  "content": "de_DE",
53
  "lstrip": false,
54
  "normalized": false,
@@ -56,7 +56,7 @@
56
  "single_word": false,
57
  "special": true
58
  },
59
- "47153": {
60
  "content": "en_XX",
61
  "lstrip": false,
62
  "normalized": false,
@@ -64,7 +64,7 @@
64
  "single_word": false,
65
  "special": true
66
  },
67
- "47154": {
68
  "content": "es_XX",
69
  "lstrip": false,
70
  "normalized": false,
@@ -72,7 +72,7 @@
72
  "single_word": false,
73
  "special": true
74
  },
75
- "47155": {
76
  "content": "et_EE",
77
  "lstrip": false,
78
  "normalized": false,
@@ -80,7 +80,7 @@
80
  "single_word": false,
81
  "special": true
82
  },
83
- "47156": {
84
  "content": "fi_FI",
85
  "lstrip": false,
86
  "normalized": false,
@@ -88,7 +88,7 @@
88
  "single_word": false,
89
  "special": true
90
  },
91
- "47157": {
92
  "content": "fr_XX",
93
  "lstrip": false,
94
  "normalized": false,
@@ -96,7 +96,7 @@
96
  "single_word": false,
97
  "special": true
98
  },
99
- "47158": {
100
  "content": "gu_IN",
101
  "lstrip": false,
102
  "normalized": false,
@@ -104,7 +104,7 @@
104
  "single_word": false,
105
  "special": true
106
  },
107
- "47159": {
108
  "content": "hi_IN",
109
  "lstrip": false,
110
  "normalized": false,
@@ -112,7 +112,7 @@
112
  "single_word": false,
113
  "special": true
114
  },
115
- "47160": {
116
  "content": "it_IT",
117
  "lstrip": false,
118
  "normalized": false,
@@ -120,7 +120,7 @@
120
  "single_word": false,
121
  "special": true
122
  },
123
- "47161": {
124
  "content": "ja_XX",
125
  "lstrip": false,
126
  "normalized": false,
@@ -128,7 +128,7 @@
128
  "single_word": false,
129
  "special": true
130
  },
131
- "47162": {
132
  "content": "kk_KZ",
133
  "lstrip": false,
134
  "normalized": false,
@@ -136,7 +136,7 @@
136
  "single_word": false,
137
  "special": true
138
  },
139
- "47163": {
140
  "content": "ko_KR",
141
  "lstrip": false,
142
  "normalized": false,
@@ -144,7 +144,7 @@
144
  "single_word": false,
145
  "special": true
146
  },
147
- "47164": {
148
  "content": "lt_LT",
149
  "lstrip": false,
150
  "normalized": false,
@@ -152,7 +152,7 @@
152
  "single_word": false,
153
  "special": true
154
  },
155
- "47165": {
156
  "content": "lv_LV",
157
  "lstrip": false,
158
  "normalized": false,
@@ -160,7 +160,7 @@
160
  "single_word": false,
161
  "special": true
162
  },
163
- "47166": {
164
  "content": "my_MM",
165
  "lstrip": false,
166
  "normalized": false,
@@ -168,7 +168,7 @@
168
  "single_word": false,
169
  "special": true
170
  },
171
- "47167": {
172
  "content": "ne_NP",
173
  "lstrip": false,
174
  "normalized": false,
@@ -176,7 +176,7 @@
176
  "single_word": false,
177
  "special": true
178
  },
179
- "47168": {
180
  "content": "nl_XX",
181
  "lstrip": false,
182
  "normalized": false,
@@ -184,7 +184,7 @@
184
  "single_word": false,
185
  "special": true
186
  },
187
- "47169": {
188
  "content": "ro_RO",
189
  "lstrip": false,
190
  "normalized": false,
@@ -192,7 +192,7 @@
192
  "single_word": false,
193
  "special": true
194
  },
195
- "47170": {
196
  "content": "ru_RU",
197
  "lstrip": false,
198
  "normalized": false,
@@ -200,7 +200,7 @@
200
  "single_word": false,
201
  "special": true
202
  },
203
- "47171": {
204
  "content": "si_LK",
205
  "lstrip": false,
206
  "normalized": false,
@@ -208,7 +208,7 @@
208
  "single_word": false,
209
  "special": true
210
  },
211
- "47172": {
212
  "content": "tr_TR",
213
  "lstrip": false,
214
  "normalized": false,
@@ -216,7 +216,7 @@
216
  "single_word": false,
217
  "special": true
218
  },
219
- "47173": {
220
  "content": "vi_VN",
221
  "lstrip": false,
222
  "normalized": false,
@@ -224,7 +224,7 @@
224
  "single_word": false,
225
  "special": true
226
  },
227
- "47174": {
228
  "content": "zh_CN",
229
  "lstrip": false,
230
  "normalized": false,
@@ -232,7 +232,7 @@
232
  "single_word": false,
233
  "special": true
234
  },
235
- "47175": {
236
  "content": "af_ZA",
237
  "lstrip": false,
238
  "normalized": false,
@@ -240,7 +240,7 @@
240
  "single_word": false,
241
  "special": true
242
  },
243
- "47176": {
244
  "content": "az_AZ",
245
  "lstrip": false,
246
  "normalized": false,
@@ -248,7 +248,7 @@
248
  "single_word": false,
249
  "special": true
250
  },
251
- "47177": {
252
  "content": "bn_IN",
253
  "lstrip": false,
254
  "normalized": false,
@@ -256,7 +256,7 @@
256
  "single_word": false,
257
  "special": true
258
  },
259
- "47178": {
260
  "content": "fa_IR",
261
  "lstrip": false,
262
  "normalized": false,
@@ -264,7 +264,7 @@
264
  "single_word": false,
265
  "special": true
266
  },
267
- "47179": {
268
  "content": "he_IL",
269
  "lstrip": false,
270
  "normalized": false,
@@ -272,7 +272,7 @@
272
  "single_word": false,
273
  "special": true
274
  },
275
- "47180": {
276
  "content": "hr_HR",
277
  "lstrip": false,
278
  "normalized": false,
@@ -280,7 +280,7 @@
280
  "single_word": false,
281
  "special": true
282
  },
283
- "47181": {
284
  "content": "id_ID",
285
  "lstrip": false,
286
  "normalized": false,
@@ -288,7 +288,7 @@
288
  "single_word": false,
289
  "special": true
290
  },
291
- "47182": {
292
  "content": "ka_GE",
293
  "lstrip": false,
294
  "normalized": false,
@@ -296,7 +296,7 @@
296
  "single_word": false,
297
  "special": true
298
  },
299
- "47183": {
300
  "content": "km_KH",
301
  "lstrip": false,
302
  "normalized": false,
@@ -304,7 +304,7 @@
304
  "single_word": false,
305
  "special": true
306
  },
307
- "47184": {
308
  "content": "mk_MK",
309
  "lstrip": false,
310
  "normalized": false,
@@ -312,7 +312,7 @@
312
  "single_word": false,
313
  "special": true
314
  },
315
- "47185": {
316
  "content": "ml_IN",
317
  "lstrip": false,
318
  "normalized": false,
@@ -320,7 +320,7 @@
320
  "single_word": false,
321
  "special": true
322
  },
323
- "47186": {
324
  "content": "mn_MN",
325
  "lstrip": false,
326
  "normalized": false,
@@ -328,7 +328,7 @@
328
  "single_word": false,
329
  "special": true
330
  },
331
- "47187": {
332
  "content": "mr_IN",
333
  "lstrip": false,
334
  "normalized": false,
@@ -336,7 +336,7 @@
336
  "single_word": false,
337
  "special": true
338
  },
339
- "47188": {
340
  "content": "pl_PL",
341
  "lstrip": false,
342
  "normalized": false,
@@ -344,7 +344,7 @@
344
  "single_word": false,
345
  "special": true
346
  },
347
- "47189": {
348
  "content": "ps_AF",
349
  "lstrip": false,
350
  "normalized": false,
@@ -352,7 +352,7 @@
352
  "single_word": false,
353
  "special": true
354
  },
355
- "47190": {
356
  "content": "pt_XX",
357
  "lstrip": false,
358
  "normalized": false,
@@ -360,7 +360,7 @@
360
  "single_word": false,
361
  "special": true
362
  },
363
- "47191": {
364
  "content": "sv_SE",
365
  "lstrip": false,
366
  "normalized": false,
@@ -368,7 +368,7 @@
368
  "single_word": false,
369
  "special": true
370
  },
371
- "47192": {
372
  "content": "sw_KE",
373
  "lstrip": false,
374
  "normalized": false,
@@ -376,7 +376,7 @@
376
  "single_word": false,
377
  "special": true
378
  },
379
- "47193": {
380
  "content": "ta_IN",
381
  "lstrip": false,
382
  "normalized": false,
@@ -384,7 +384,7 @@
384
  "single_word": false,
385
  "special": true
386
  },
387
- "47194": {
388
  "content": "te_IN",
389
  "lstrip": false,
390
  "normalized": false,
@@ -392,7 +392,7 @@
392
  "single_word": false,
393
  "special": true
394
  },
395
- "47195": {
396
  "content": "th_TH",
397
  "lstrip": false,
398
  "normalized": false,
@@ -400,7 +400,7 @@
400
  "single_word": false,
401
  "special": true
402
  },
403
- "47196": {
404
  "content": "tl_XX",
405
  "lstrip": false,
406
  "normalized": false,
@@ -408,7 +408,7 @@
408
  "single_word": false,
409
  "special": true
410
  },
411
- "47197": {
412
  "content": "uk_UA",
413
  "lstrip": false,
414
  "normalized": false,
@@ -416,7 +416,7 @@
416
  "single_word": false,
417
  "special": true
418
  },
419
- "47198": {
420
  "content": "ur_PK",
421
  "lstrip": false,
422
  "normalized": false,
@@ -424,7 +424,7 @@
424
  "single_word": false,
425
  "special": true
426
  },
427
- "47199": {
428
  "content": "xh_ZA",
429
  "lstrip": false,
430
  "normalized": false,
@@ -432,7 +432,7 @@
432
  "single_word": false,
433
  "special": true
434
  },
435
- "47200": {
436
  "content": "gl_ES",
437
  "lstrip": false,
438
  "normalized": false,
@@ -440,7 +440,7 @@
440
  "single_word": false,
441
  "special": true
442
  },
443
- "47201": {
444
  "content": "sl_SI",
445
  "lstrip": false,
446
  "normalized": false,
@@ -448,7 +448,7 @@
448
  "single_word": false,
449
  "special": true
450
  },
451
- "47202": {
452
  "content": "<mask>",
453
  "lstrip": true,
454
  "normalized": false,
@@ -456,7 +456,7 @@
456
  "single_word": false,
457
  "special": true
458
  },
459
- "47203": {
460
  "content": "<triplet>",
461
  "lstrip": false,
462
  "normalized": false,
@@ -464,7 +464,7 @@
464
  "single_word": false,
465
  "special": true
466
  },
467
- "47204": {
468
  "content": "<relation>",
469
  "lstrip": false,
470
  "normalized": false,
@@ -472,7 +472,7 @@
472
  "single_word": false,
473
  "special": true
474
  },
475
- "47205": {
476
  "content": "el_EL",
477
  "lstrip": false,
478
  "normalized": false,
@@ -480,7 +480,7 @@
480
  "single_word": false,
481
  "special": true
482
  },
483
- "47206": {
484
  "content": "ca_XX",
485
  "lstrip": false,
486
  "normalized": false,
@@ -488,7 +488,7 @@
488
  "single_word": false,
489
  "special": true
490
  },
491
- "47207": {
492
  "content": "tp_XX",
493
  "lstrip": false,
494
  "normalized": false,
@@ -496,7 +496,7 @@
496
  "single_word": false,
497
  "special": true
498
  },
499
- "47208": {
500
  "content": "<loc>",
501
  "lstrip": false,
502
  "normalized": false,
@@ -504,7 +504,7 @@
504
  "single_word": false,
505
  "special": true
506
  },
507
- "47209": {
508
  "content": "<misc>",
509
  "lstrip": false,
510
  "normalized": false,
@@ -512,7 +512,7 @@
512
  "single_word": false,
513
  "special": true
514
  },
515
- "47210": {
516
  "content": "<per>",
517
  "lstrip": false,
518
  "normalized": false,
@@ -520,7 +520,7 @@
520
  "single_word": false,
521
  "special": true
522
  },
523
- "47211": {
524
  "content": "<num>",
525
  "lstrip": false,
526
  "normalized": false,
@@ -528,7 +528,7 @@
528
  "single_word": false,
529
  "special": true
530
  },
531
- "47212": {
532
  "content": "<time>",
533
  "lstrip": false,
534
  "normalized": false,
@@ -536,7 +536,7 @@
536
  "single_word": false,
537
  "special": true
538
  },
539
- "47213": {
540
  "content": "<org>",
541
  "lstrip": false,
542
  "normalized": false,
@@ -544,7 +544,7 @@
544
  "single_word": false,
545
  "special": true
546
  },
547
- "47214": {
548
  "content": "<date>",
549
  "lstrip": false,
550
  "normalized": false,
@@ -552,7 +552,7 @@
552
  "single_word": false,
553
  "special": true
554
  },
555
- "47215": {
556
  "content": "<eve>",
557
  "lstrip": false,
558
  "normalized": false,
@@ -560,7 +560,7 @@
560
  "single_word": false,
561
  "special": true
562
  },
563
- "47216": {
564
  "content": "<cel>",
565
  "lstrip": false,
566
  "normalized": false,
@@ -568,7 +568,7 @@
568
  "single_word": false,
569
  "special": true
570
  },
571
- "47217": {
572
  "content": "<media>",
573
  "lstrip": false,
574
  "normalized": false,
@@ -576,7 +576,7 @@
576
  "single_word": false,
577
  "special": true
578
  },
579
- "47218": {
580
  "content": "<dis>",
581
  "lstrip": false,
582
  "normalized": false,
@@ -584,7 +584,7 @@
584
  "single_word": false,
585
  "special": true
586
  },
587
- "47219": {
588
  "content": "<concept>",
589
  "lstrip": false,
590
  "normalized": false,
 
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "59999": {
36
  "content": "ar_AR",
37
  "lstrip": false,
38
  "normalized": false,
 
40
  "single_word": false,
41
  "special": true
42
  },
43
+ "60000": {
44
  "content": "cs_CZ",
45
  "lstrip": false,
46
  "normalized": false,
 
48
  "single_word": false,
49
  "special": true
50
  },
51
+ "60001": {
52
  "content": "de_DE",
53
  "lstrip": false,
54
  "normalized": false,
 
56
  "single_word": false,
57
  "special": true
58
  },
59
+ "60002": {
60
  "content": "en_XX",
61
  "lstrip": false,
62
  "normalized": false,
 
64
  "single_word": false,
65
  "special": true
66
  },
67
+ "60003": {
68
  "content": "es_XX",
69
  "lstrip": false,
70
  "normalized": false,
 
72
  "single_word": false,
73
  "special": true
74
  },
75
+ "60004": {
76
  "content": "et_EE",
77
  "lstrip": false,
78
  "normalized": false,
 
80
  "single_word": false,
81
  "special": true
82
  },
83
+ "60005": {
84
  "content": "fi_FI",
85
  "lstrip": false,
86
  "normalized": false,
 
88
  "single_word": false,
89
  "special": true
90
  },
91
+ "60006": {
92
  "content": "fr_XX",
93
  "lstrip": false,
94
  "normalized": false,
 
96
  "single_word": false,
97
  "special": true
98
  },
99
+ "60007": {
100
  "content": "gu_IN",
101
  "lstrip": false,
102
  "normalized": false,
 
104
  "single_word": false,
105
  "special": true
106
  },
107
+ "60008": {
108
  "content": "hi_IN",
109
  "lstrip": false,
110
  "normalized": false,
 
112
  "single_word": false,
113
  "special": true
114
  },
115
+ "60009": {
116
  "content": "it_IT",
117
  "lstrip": false,
118
  "normalized": false,
 
120
  "single_word": false,
121
  "special": true
122
  },
123
+ "60010": {
124
  "content": "ja_XX",
125
  "lstrip": false,
126
  "normalized": false,
 
128
  "single_word": false,
129
  "special": true
130
  },
131
+ "60011": {
132
  "content": "kk_KZ",
133
  "lstrip": false,
134
  "normalized": false,
 
136
  "single_word": false,
137
  "special": true
138
  },
139
+ "60012": {
140
  "content": "ko_KR",
141
  "lstrip": false,
142
  "normalized": false,
 
144
  "single_word": false,
145
  "special": true
146
  },
147
+ "60013": {
148
  "content": "lt_LT",
149
  "lstrip": false,
150
  "normalized": false,
 
152
  "single_word": false,
153
  "special": true
154
  },
155
+ "60014": {
156
  "content": "lv_LV",
157
  "lstrip": false,
158
  "normalized": false,
 
160
  "single_word": false,
161
  "special": true
162
  },
163
+ "60015": {
164
  "content": "my_MM",
165
  "lstrip": false,
166
  "normalized": false,
 
168
  "single_word": false,
169
  "special": true
170
  },
171
+ "60016": {
172
  "content": "ne_NP",
173
  "lstrip": false,
174
  "normalized": false,
 
176
  "single_word": false,
177
  "special": true
178
  },
179
+ "60017": {
180
  "content": "nl_XX",
181
  "lstrip": false,
182
  "normalized": false,
 
184
  "single_word": false,
185
  "special": true
186
  },
187
+ "60018": {
188
  "content": "ro_RO",
189
  "lstrip": false,
190
  "normalized": false,
 
192
  "single_word": false,
193
  "special": true
194
  },
195
+ "60019": {
196
  "content": "ru_RU",
197
  "lstrip": false,
198
  "normalized": false,
 
200
  "single_word": false,
201
  "special": true
202
  },
203
+ "60020": {
204
  "content": "si_LK",
205
  "lstrip": false,
206
  "normalized": false,
 
208
  "single_word": false,
209
  "special": true
210
  },
211
+ "60021": {
212
  "content": "tr_TR",
213
  "lstrip": false,
214
  "normalized": false,
 
216
  "single_word": false,
217
  "special": true
218
  },
219
+ "60022": {
220
  "content": "vi_VN",
221
  "lstrip": false,
222
  "normalized": false,
 
224
  "single_word": false,
225
  "special": true
226
  },
227
+ "60023": {
228
  "content": "zh_CN",
229
  "lstrip": false,
230
  "normalized": false,
 
232
  "single_word": false,
233
  "special": true
234
  },
235
+ "60024": {
236
  "content": "af_ZA",
237
  "lstrip": false,
238
  "normalized": false,
 
240
  "single_word": false,
241
  "special": true
242
  },
243
+ "60025": {
244
  "content": "az_AZ",
245
  "lstrip": false,
246
  "normalized": false,
 
248
  "single_word": false,
249
  "special": true
250
  },
251
+ "60026": {
252
  "content": "bn_IN",
253
  "lstrip": false,
254
  "normalized": false,
 
256
  "single_word": false,
257
  "special": true
258
  },
259
+ "60027": {
260
  "content": "fa_IR",
261
  "lstrip": false,
262
  "normalized": false,
 
264
  "single_word": false,
265
  "special": true
266
  },
267
+ "60028": {
268
  "content": "he_IL",
269
  "lstrip": false,
270
  "normalized": false,
 
272
  "single_word": false,
273
  "special": true
274
  },
275
+ "60029": {
276
  "content": "hr_HR",
277
  "lstrip": false,
278
  "normalized": false,
 
280
  "single_word": false,
281
  "special": true
282
  },
283
+ "60030": {
284
  "content": "id_ID",
285
  "lstrip": false,
286
  "normalized": false,
 
288
  "single_word": false,
289
  "special": true
290
  },
291
+ "60031": {
292
  "content": "ka_GE",
293
  "lstrip": false,
294
  "normalized": false,
 
296
  "single_word": false,
297
  "special": true
298
  },
299
+ "60032": {
300
  "content": "km_KH",
301
  "lstrip": false,
302
  "normalized": false,
 
304
  "single_word": false,
305
  "special": true
306
  },
307
+ "60033": {
308
  "content": "mk_MK",
309
  "lstrip": false,
310
  "normalized": false,
 
312
  "single_word": false,
313
  "special": true
314
  },
315
+ "60034": {
316
  "content": "ml_IN",
317
  "lstrip": false,
318
  "normalized": false,
 
320
  "single_word": false,
321
  "special": true
322
  },
323
+ "60035": {
324
  "content": "mn_MN",
325
  "lstrip": false,
326
  "normalized": false,
 
328
  "single_word": false,
329
  "special": true
330
  },
331
+ "60036": {
332
  "content": "mr_IN",
333
  "lstrip": false,
334
  "normalized": false,
 
336
  "single_word": false,
337
  "special": true
338
  },
339
+ "60037": {
340
  "content": "pl_PL",
341
  "lstrip": false,
342
  "normalized": false,
 
344
  "single_word": false,
345
  "special": true
346
  },
347
+ "60038": {
348
  "content": "ps_AF",
349
  "lstrip": false,
350
  "normalized": false,
 
352
  "single_word": false,
353
  "special": true
354
  },
355
+ "60039": {
356
  "content": "pt_XX",
357
  "lstrip": false,
358
  "normalized": false,
 
360
  "single_word": false,
361
  "special": true
362
  },
363
+ "60040": {
364
  "content": "sv_SE",
365
  "lstrip": false,
366
  "normalized": false,
 
368
  "single_word": false,
369
  "special": true
370
  },
371
+ "60041": {
372
  "content": "sw_KE",
373
  "lstrip": false,
374
  "normalized": false,
 
376
  "single_word": false,
377
  "special": true
378
  },
379
+ "60042": {
380
  "content": "ta_IN",
381
  "lstrip": false,
382
  "normalized": false,
 
384
  "single_word": false,
385
  "special": true
386
  },
387
+ "60043": {
388
  "content": "te_IN",
389
  "lstrip": false,
390
  "normalized": false,
 
392
  "single_word": false,
393
  "special": true
394
  },
395
+ "60044": {
396
  "content": "th_TH",
397
  "lstrip": false,
398
  "normalized": false,
 
400
  "single_word": false,
401
  "special": true
402
  },
403
+ "60045": {
404
  "content": "tl_XX",
405
  "lstrip": false,
406
  "normalized": false,
 
408
  "single_word": false,
409
  "special": true
410
  },
411
+ "60046": {
412
  "content": "uk_UA",
413
  "lstrip": false,
414
  "normalized": false,
 
416
  "single_word": false,
417
  "special": true
418
  },
419
+ "60047": {
420
  "content": "ur_PK",
421
  "lstrip": false,
422
  "normalized": false,
 
424
  "single_word": false,
425
  "special": true
426
  },
427
+ "60048": {
428
  "content": "xh_ZA",
429
  "lstrip": false,
430
  "normalized": false,
 
432
  "single_word": false,
433
  "special": true
434
  },
435
+ "60049": {
436
  "content": "gl_ES",
437
  "lstrip": false,
438
  "normalized": false,
 
440
  "single_word": false,
441
  "special": true
442
  },
443
+ "60050": {
444
  "content": "sl_SI",
445
  "lstrip": false,
446
  "normalized": false,
 
448
  "single_word": false,
449
  "special": true
450
  },
451
+ "60051": {
452
  "content": "<mask>",
453
  "lstrip": true,
454
  "normalized": false,
 
456
  "single_word": false,
457
  "special": true
458
  },
459
+ "60052": {
460
  "content": "<triplet>",
461
  "lstrip": false,
462
  "normalized": false,
 
464
  "single_word": false,
465
  "special": true
466
  },
467
+ "60053": {
468
  "content": "<relation>",
469
  "lstrip": false,
470
  "normalized": false,
 
472
  "single_word": false,
473
  "special": true
474
  },
475
+ "60054": {
476
  "content": "el_EL",
477
  "lstrip": false,
478
  "normalized": false,
 
480
  "single_word": false,
481
  "special": true
482
  },
483
+ "60055": {
484
  "content": "ca_XX",
485
  "lstrip": false,
486
  "normalized": false,
 
488
  "single_word": false,
489
  "special": true
490
  },
491
+ "60056": {
492
  "content": "tp_XX",
493
  "lstrip": false,
494
  "normalized": false,
 
496
  "single_word": false,
497
  "special": true
498
  },
499
+ "60057": {
500
  "content": "<loc>",
501
  "lstrip": false,
502
  "normalized": false,
 
504
  "single_word": false,
505
  "special": true
506
  },
507
+ "60058": {
508
  "content": "<misc>",
509
  "lstrip": false,
510
  "normalized": false,
 
512
  "single_word": false,
513
  "special": true
514
  },
515
+ "60059": {
516
  "content": "<per>",
517
  "lstrip": false,
518
  "normalized": false,
 
520
  "single_word": false,
521
  "special": true
522
  },
523
+ "60060": {
524
  "content": "<num>",
525
  "lstrip": false,
526
  "normalized": false,
 
528
  "single_word": false,
529
  "special": true
530
  },
531
+ "60061": {
532
  "content": "<time>",
533
  "lstrip": false,
534
  "normalized": false,
 
536
  "single_word": false,
537
  "special": true
538
  },
539
+ "60062": {
540
  "content": "<org>",
541
  "lstrip": false,
542
  "normalized": false,
 
544
  "single_word": false,
545
  "special": true
546
  },
547
+ "60063": {
548
  "content": "<date>",
549
  "lstrip": false,
550
  "normalized": false,
 
552
  "single_word": false,
553
  "special": true
554
  },
555
+ "60064": {
556
  "content": "<eve>",
557
  "lstrip": false,
558
  "normalized": false,
 
560
  "single_word": false,
561
  "special": true
562
  },
563
+ "60065": {
564
  "content": "<cel>",
565
  "lstrip": false,
566
  "normalized": false,
 
568
  "single_word": false,
569
  "special": true
570
  },
571
+ "60066": {
572
  "content": "<media>",
573
  "lstrip": false,
574
  "normalized": false,
 
576
  "single_word": false,
577
  "special": true
578
  },
579
+ "60067": {
580
  "content": "<dis>",
581
  "lstrip": false,
582
  "normalized": false,
 
584
  "single_word": false,
585
  "special": true
586
  },
587
+ "60068": {
588
  "content": "<concept>",
589
  "lstrip": false,
590
  "normalized": false,