hyunwoongko commited on
Commit
66e7f4a
β€’
1 Parent(s): 9f965c3

update kobart-v2

Browse files
.gitattributes CHANGED
@@ -5,4 +5,5 @@
5
  *.tflite filter=lfs diff=lfs merge=lfs -text
6
  *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
  *.ot filter=lfs diff=lfs merge=lfs -text
8
- *.onnx filter=lfs diff=lfs merge=lfs -text
 
5
  *.tflite filter=lfs diff=lfs merge=lfs -text
6
  *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
  *.ot filter=lfs diff=lfs merge=lfs -text
8
+ *.onnx filter=lfs diff=lfs merge=lfs -text
9
+ pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: ko
3
+ tags:
4
+ - bart
5
+ license: mit
6
+ ---
7
+
8
+ ## KoBART-base-v2
9
+
10
+ With the addition of chatting data, the model is trained to handle the semantics of sequences longer than KoBART.
11
+
12
+ ```python
13
+ from transformers import PreTrainedTokenizerFast, BartModel
14
+
15
+ tokenizer = PreTrainedTokenizerFast.from_pretrained('gogamza/kobart-base-v2')
16
+ model = BartModel.from_pretrained('gogamza/kobart-base-v2')
17
+ ```
18
+
19
+ ### Performance
20
+
21
+ NSMC
22
+ - acc. : 0.901
23
+
24
+ ### hyunwoongko/kobart
25
+ - I added bos/eos post processor
added_tokens.json DELETED
@@ -1 +0,0 @@
1
- {}
 
config.json CHANGED
@@ -7,7 +7,7 @@
7
  "BartModel"
8
  ],
9
  "attention_dropout": 0.0,
10
- "bos_token_id": 0,
11
  "classif_dropout": 0.1,
12
  "classifier_dropout": 0.1,
13
  "d_model": 768,
@@ -15,6 +15,7 @@
15
  "decoder_ffn_dim": 3072,
16
  "decoder_layerdrop": 0.0,
17
  "decoder_layers": 6,
 
18
  "do_blenderbot_90_layernorm": false,
19
  "dropout": 0.1,
20
  "encoder_attention_heads": 16,
@@ -24,6 +25,8 @@
24
  "eos_token_id": 1,
25
  "extra_pos_embeddings": 2,
26
  "force_bos_token_to_be_generated": false,
 
 
27
  "id2label": {
28
  "0": "NEGATIVE",
29
  "1": "POSITIVE"
@@ -42,6 +45,10 @@
42
  "pad_token_id": 3,
43
  "scale_embedding": false,
44
  "static_position_embeddings": false,
 
45
  "use_cache": true,
46
- "vocab_size": 30000
 
 
 
47
  }
7
  "BartModel"
8
  ],
9
  "attention_dropout": 0.0,
10
+ "bos_token_id": 1,
11
  "classif_dropout": 0.1,
12
  "classifier_dropout": 0.1,
13
  "d_model": 768,
15
  "decoder_ffn_dim": 3072,
16
  "decoder_layerdrop": 0.0,
17
  "decoder_layers": 6,
18
+ "decoder_start_token_id": 1,
19
  "do_blenderbot_90_layernorm": false,
20
  "dropout": 0.1,
21
  "encoder_attention_heads": 16,
25
  "eos_token_id": 1,
26
  "extra_pos_embeddings": 2,
27
  "force_bos_token_to_be_generated": false,
28
+ "forced_eos_token_id": 1,
29
+ "gradient_checkpointing": false,
30
  "id2label": {
31
  "0": "NEGATIVE",
32
  "1": "POSITIVE"
45
  "pad_token_id": 3,
46
  "scale_embedding": false,
47
  "static_position_embeddings": false,
48
+ "transformers_version": "4.5.1",
49
  "use_cache": true,
50
+ "vocab_size": 30000,
51
+ "tokenizer_class": "PreTrainedTokenizerFast",
52
+ "author": "Heewon Jeon(madjakarta@gmail.com)",
53
+ "kobart_version": 2.0
54
  }
merges.txt DELETED
The diff for this file is too large to render. See raw diff
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbb79682ee5d3d34078866b2e9c0b94ac51487e4b5f5f5c91d2528a60d438d4a
3
  size 495536138
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7082c7f1ae4efcc22c4b0081d71911059f4c5f65f278436c948d14b2d3d934d
3
  size 495536138
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
1
+ {"bos_token": "</s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>"}
tokenizer.json CHANGED
@@ -5,2070 +5,2072 @@
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
- "special": true,
9
  "content": "<s>",
10
  "single_word": false,
11
  "lstrip": false,
12
  "rstrip": false,
13
- "normalized": false
 
14
  },
15
  {
16
  "id": 1,
17
- "special": true,
18
  "content": "</s>",
19
  "single_word": false,
20
  "lstrip": false,
21
  "rstrip": false,
22
- "normalized": false
 
23
  },
24
  {
25
  "id": 2,
26
- "special": true,
27
  "content": "<usr>",
28
  "single_word": false,
29
  "lstrip": false,
30
  "rstrip": false,
31
- "normalized": false
 
32
  },
33
  {
34
  "id": 3,
35
- "special": true,
36
  "content": "<pad>",
37
  "single_word": false,
38
  "lstrip": false,
39
  "rstrip": false,
40
- "normalized": false
 
41
  },
42
  {
43
  "id": 4,
44
- "special": true,
45
  "content": "<sys>",
46
  "single_word": false,
47
  "lstrip": false,
48
  "rstrip": false,
49
- "normalized": false
 
50
  },
51
  {
52
  "id": 5,
53
- "special": true,
54
  "content": "<unk>",
55
  "single_word": false,
56
  "lstrip": false,
57
  "rstrip": false,
58
- "normalized": false
 
59
  },
60
  {
61
  "id": 6,
62
- "special": true,
63
  "content": "<mask>",
64
  "single_word": false,
65
- "lstrip": true,
66
  "rstrip": false,
67
- "normalized": false
 
68
  },
69
  {
70
  "id": 7,
71
- "special": true,
72
  "content": "<unused0>",
73
  "single_word": false,
74
  "lstrip": false,
75
  "rstrip": false,
76
- "normalized": false
 
77
  },
78
  {
79
  "id": 8,
80
- "special": true,
81
  "content": "<unused1>",
82
  "single_word": false,
83
  "lstrip": false,
84
  "rstrip": false,
85
- "normalized": false
 
86
  },
87
  {
88
  "id": 9,
89
- "special": true,
90
  "content": "<unused2>",
91
  "single_word": false,
92
  "lstrip": false,
93
  "rstrip": false,
94
- "normalized": false
 
95
  },
96
  {
97
  "id": 10,
98
- "special": true,
99
  "content": "<unused3>",
100
  "single_word": false,
101
  "lstrip": false,
102
  "rstrip": false,
103
- "normalized": false
 
104
  },
105
  {
106
  "id": 11,
107
- "special": true,
108
  "content": "<unused4>",
109
  "single_word": false,
110
  "lstrip": false,
111
  "rstrip": false,
112
- "normalized": false
 
113
  },
114
  {
115
  "id": 12,
116
- "special": true,
117
  "content": "<unused5>",
118
  "single_word": false,
119
  "lstrip": false,
120
  "rstrip": false,
121
- "normalized": false
 
122
  },
123
  {
124
  "id": 13,
125
- "special": true,
126
  "content": "<unused6>",
127
  "single_word": false,
128
  "lstrip": false,
129
  "rstrip": false,
130
- "normalized": false
 
131
  },
132
  {
133
  "id": 14,
134
- "special": true,
135
  "content": "<unused7>",
136
  "single_word": false,
137
  "lstrip": false,
138
  "rstrip": false,
139
- "normalized": false
 
140
  },
141
  {
142
  "id": 15,
143
- "special": true,
144
  "content": "<unused8>",
145
  "single_word": false,
146
  "lstrip": false,
147
  "rstrip": false,
148
- "normalized": false
 
149
  },
150
  {
151
  "id": 16,
152
- "special": true,
153
  "content": "<unused9>",
154
  "single_word": false,
155
  "lstrip": false,
156
  "rstrip": false,
157
- "normalized": false
 
158
  },
159
  {
160
  "id": 17,
161
- "special": true,
162
  "content": "<unused10>",
163
  "single_word": false,
164
  "lstrip": false,
165
  "rstrip": false,
166
- "normalized": false
 
167
  },
168
  {
169
  "id": 18,
170
- "special": true,
171
  "content": "<unused11>",
172
  "single_word": false,
173
  "lstrip": false,
174
  "rstrip": false,
175
- "normalized": false
 
176
  },
177
  {
178
  "id": 19,
179
- "special": true,
180
  "content": "<unused12>",
181
  "single_word": false,
182
  "lstrip": false,
183
  "rstrip": false,
184
- "normalized": false
 
185
  },
186
  {
187
  "id": 20,
188
- "special": true,
189
  "content": "<unused13>",
190
  "single_word": false,
191
  "lstrip": false,
192
  "rstrip": false,
193
- "normalized": false
 
194
  },
195
  {
196
  "id": 21,
197
- "special": true,
198
  "content": "<unused14>",
199
  "single_word": false,
200
  "lstrip": false,
201
  "rstrip": false,
202
- "normalized": false
 
203
  },
204
  {
205
  "id": 22,
206
- "special": true,
207
  "content": "<unused15>",
208
  "single_word": false,
209
  "lstrip": false,
210
  "rstrip": false,
211
- "normalized": false
 
212
  },
213
  {
214
  "id": 23,
215
- "special": true,
216
  "content": "<unused16>",
217
  "single_word": false,
218
  "lstrip": false,
219
  "rstrip": false,
220
- "normalized": false
 
221
  },
222
  {
223
  "id": 24,
224
- "special": true,
225
  "content": "<unused17>",
226
  "single_word": false,
227
  "lstrip": false,
228
  "rstrip": false,
229
- "normalized": false
 
230
  },
231
  {
232
  "id": 25,
233
- "special": true,
234
  "content": "<unused18>",
235
  "single_word": false,
236
  "lstrip": false,
237
  "rstrip": false,
238
- "normalized": false
 
239
  },
240
  {
241
  "id": 26,
242
- "special": true,
243
  "content": "<unused19>",
244
  "single_word": false,
245
  "lstrip": false,
246
  "rstrip": false,
247
- "normalized": false
 
248
  },
249
  {
250
  "id": 27,
251
- "special": true,
252
  "content": "<unused20>",
253
  "single_word": false,
254
  "lstrip": false,
255
  "rstrip": false,
256
- "normalized": false
 
257
  },
258
  {
259
  "id": 28,
260
- "special": true,
261
  "content": "<unused21>",
262
  "single_word": false,
263
  "lstrip": false,
264
  "rstrip": false,
265
- "normalized": false
 
266
  },
267
  {
268
  "id": 29,
269
- "special": true,
270
  "content": "<unused22>",
271
  "single_word": false,
272
  "lstrip": false,
273
  "rstrip": false,
274
- "normalized": false
 
275
  },
276
  {
277
  "id": 30,
278
- "special": true,
279
  "content": "<unused23>",
280
  "single_word": false,
281
  "lstrip": false,
282
  "rstrip": false,
283
- "normalized": false
 
284
  },
285
  {
286
  "id": 31,
287
- "special": true,
288
  "content": "<unused24>",
289
  "single_word": false,
290
  "lstrip": false,
291
  "rstrip": false,
292
- "normalized": false
 
293
  },
294
  {
295
  "id": 32,
296
- "special": true,
297
  "content": "<unused25>",
298
  "single_word": false,
299
  "lstrip": false,
300
  "rstrip": false,
301
- "normalized": false
 
302
  },
303
  {
304
  "id": 33,
305
- "special": true,
306
  "content": "<unused26>",
307
  "single_word": false,
308
  "lstrip": false,
309
  "rstrip": false,
310
- "normalized": false
 
311
  },
312
  {
313
  "id": 34,
314
- "special": true,
315
  "content": "<unused27>",
316
  "single_word": false,
317
  "lstrip": false,
318
  "rstrip": false,
319
- "normalized": false
 
320
  },
321
  {
322
  "id": 35,
323
- "special": true,
324
  "content": "<unused28>",
325
  "single_word": false,
326
  "lstrip": false,
327
  "rstrip": false,
328
- "normalized": false
 
329
  },
330
  {
331
  "id": 36,
332
- "special": true,
333
  "content": "<unused29>",
334
  "single_word": false,
335
  "lstrip": false,
336
  "rstrip": false,
337
- "normalized": false
 
338
  },
339
  {
340
  "id": 37,
341
- "special": true,
342
  "content": "<unused30>",
343
  "single_word": false,
344
  "lstrip": false,
345
  "rstrip": false,
346
- "normalized": false
 
347
  },
348
  {
349
  "id": 38,
350
- "special": true,
351
  "content": "<unused31>",
352
  "single_word": false,
353
  "lstrip": false,
354
  "rstrip": false,
355
- "normalized": false
 
356
  },
357
  {
358
  "id": 39,
359
- "special": true,
360
  "content": "<unused32>",
361
  "single_word": false,
362
  "lstrip": false,
363
  "rstrip": false,
364
- "normalized": false
 
365
  },
366
  {
367
  "id": 40,
368
- "special": true,
369
  "content": "<unused33>",
370
  "single_word": false,
371
  "lstrip": false,
372
  "rstrip": false,
373
- "normalized": false
 
374
  },
375
  {
376
  "id": 41,
377
- "special": true,
378
  "content": "<unused34>",
379
  "single_word": false,
380
  "lstrip": false,
381
  "rstrip": false,
382
- "normalized": false
 
383
  },
384
  {
385
  "id": 42,
386
- "special": true,
387
  "content": "<unused35>",
388
  "single_word": false,
389
  "lstrip": false,
390
  "rstrip": false,
391
- "normalized": false
 
392
  },
393
  {
394
  "id": 43,
395
- "special": true,
396
  "content": "<unused36>",
397
  "single_word": false,
398
  "lstrip": false,
399
  "rstrip": false,
400
- "normalized": false
 
401
  },
402
  {
403
  "id": 44,
404
- "special": true,
405
  "content": "<unused37>",
406
  "single_word": false,
407
  "lstrip": false,
408
  "rstrip": false,
409
- "normalized": false
 
410
  },
411
  {
412
  "id": 45,
413
- "special": true,
414
  "content": "<unused38>",
415
  "single_word": false,
416
  "lstrip": false,
417
  "rstrip": false,
418
- "normalized": false
 
419
  },
420
  {
421
  "id": 46,
422
- "special": true,
423
  "content": "<unused39>",
424
  "single_word": false,
425
  "lstrip": false,
426
  "rstrip": false,
427
- "normalized": false
 
428
  },
429
  {
430
  "id": 47,
431
- "special": true,
432
  "content": "<unused40>",
433
  "single_word": false,
434
  "lstrip": false,
435
  "rstrip": false,
436
- "normalized": false
 
437
  },
438
  {
439
  "id": 48,
440
- "special": true,
441
  "content": "<unused41>",
442
  "single_word": false,
443
  "lstrip": false,
444
  "rstrip": false,
445
- "normalized": false
 
446
  },
447
  {
448
  "id": 49,
449
- "special": true,
450
  "content": "<unused42>",
451
  "single_word": false,
452
  "lstrip": false,
453
  "rstrip": false,
454
- "normalized": false
 
455
  },
456
  {
457
  "id": 50,
458
- "special": true,
459
  "content": "<unused43>",
460
  "single_word": false,
461
  "lstrip": false,
462
  "rstrip": false,
463
- "normalized": false
 
464
  },
465
  {
466
  "id": 51,
467
- "special": true,
468
  "content": "<unused44>",
469
  "single_word": false,
470
  "lstrip": false,
471
  "rstrip": false,
472
- "normalized": false
 
473
  },
474
  {
475
  "id": 52,
476
- "special": true,
477
  "content": "<unused45>",
478
  "single_word": false,
479
  "lstrip": false,
480
  "rstrip": false,
481
- "normalized": false
 
482
  },
483
  {
484
  "id": 53,
485
- "special": true,
486
  "content": "<unused46>",
487
  "single_word": false,
488
  "lstrip": false,
489
  "rstrip": false,
490
- "normalized": false
 
491
  },
492
  {
493
  "id": 54,
494
- "special": true,
495
  "content": "<unused47>",
496
  "single_word": false,
497
  "lstrip": false,
498
  "rstrip": false,
499
- "normalized": false
 
500
  },
501
  {
502
  "id": 55,
503
- "special": true,
504
  "content": "<unused48>",
505
  "single_word": false,
506
  "lstrip": false,
507
  "rstrip": false,
508
- "normalized": false
 
509
  },
510
  {
511
  "id": 56,
512
- "special": true,
513
  "content": "<unused49>",
514
  "single_word": false,
515
  "lstrip": false,
516
  "rstrip": false,
517
- "normalized": false
 
518
  },
519
  {
520
  "id": 57,
521
- "special": true,
522
  "content": "<unused50>",
523
  "single_word": false,
524
  "lstrip": false,
525
  "rstrip": false,
526
- "normalized": false
 
527
  },
528
  {
529
  "id": 58,
530
- "special": true,
531
  "content": "<unused51>",
532
  "single_word": false,
533
  "lstrip": false,
534
  "rstrip": false,
535
- "normalized": false
 
536
  },
537
  {
538
  "id": 59,
539
- "special": true,
540
  "content": "<unused52>",
541
  "single_word": false,
542
  "lstrip": false,
543
  "rstrip": false,
544
- "normalized": false
 
545
  },
546
  {
547
  "id": 60,
548
- "special": true,
549
  "content": "<unused53>",
550
  "single_word": false,
551
  "lstrip": false,
552
  "rstrip": false,
553
- "normalized": false
 
554
  },
555
  {
556
  "id": 61,
557
- "special": true,
558
  "content": "<unused54>",
559
  "single_word": false,
560
  "lstrip": false,
561
  "rstrip": false,
562
- "normalized": false
 
563
  },
564
  {
565
  "id": 62,
566
- "special": true,
567
  "content": "<unused55>",
568
  "single_word": false,
569
  "lstrip": false,
570
  "rstrip": false,
571
- "normalized": false
 
572
  },
573
  {
574
  "id": 63,
575
- "special": true,
576
  "content": "<unused56>",
577
  "single_word": false,
578
  "lstrip": false,
579
  "rstrip": false,
580
- "normalized": false
 
581
  },
582
  {
583
  "id": 64,
584
- "special": true,
585
  "content": "<unused57>",
586
  "single_word": false,
587
  "lstrip": false,
588
  "rstrip": false,
589
- "normalized": false
 
590
  },
591
  {
592
  "id": 65,
593
- "special": true,
594
  "content": "<unused58>",
595
  "single_word": false,
596
  "lstrip": false,
597
  "rstrip": false,
598
- "normalized": false
 
599
  },
600
  {
601
  "id": 66,
602
- "special": true,
603
  "content": "<unused59>",
604
  "single_word": false,
605
  "lstrip": false,
606
  "rstrip": false,
607
- "normalized": false
 
608
  },
609
  {
610
  "id": 67,
611
- "special": true,
612
  "content": "<unused60>",
613
  "single_word": false,
614
  "lstrip": false,
615
  "rstrip": false,
616
- "normalized": false
 
617
  },
618
  {
619
  "id": 68,
620
- "special": true,
621
  "content": "<unused61>",
622
  "single_word": false,
623
  "lstrip": false,
624
  "rstrip": false,
625
- "normalized": false
 
626
  },
627
  {
628
  "id": 69,
629
- "special": true,
630
  "content": "<unused62>",
631
  "single_word": false,
632
  "lstrip": false,
633
  "rstrip": false,
634
- "normalized": false
 
635
  },
636
  {
637
  "id": 70,
638
- "special": true,
639
  "content": "<unused63>",
640
  "single_word": false,
641
  "lstrip": false,
642
  "rstrip": false,
643
- "normalized": false
 
644
  },
645
  {
646
  "id": 71,
647
- "special": true,
648
  "content": "<unused64>",
649
  "single_word": false,
650
  "lstrip": false,
651
  "rstrip": false,
652
- "normalized": false
 
653
  },
654
  {
655
  "id": 72,
656
- "special": true,
657
  "content": "<unused65>",
658
  "single_word": false,
659
  "lstrip": false,
660
  "rstrip": false,
661
- "normalized": false
 
662
  },
663
  {
664
  "id": 73,
665
- "special": true,
666
  "content": "<unused66>",
667
  "single_word": false,
668
  "lstrip": false,
669
  "rstrip": false,
670
- "normalized": false
 
671
  },
672
  {
673
  "id": 74,
674
- "special": true,
675
  "content": "<unused67>",
676
  "single_word": false,
677
  "lstrip": false,
678
  "rstrip": false,
679
- "normalized": false
 
680
  },
681
  {
682
  "id": 75,
683
- "special": true,
684
  "content": "<unused68>",
685
  "single_word": false,
686
  "lstrip": false,
687
  "rstrip": false,
688
- "normalized": false
 
689
  },
690
  {
691
  "id": 76,
692
- "special": true,
693
  "content": "<unused69>",
694
  "single_word": false,
695
  "lstrip": false,
696
  "rstrip": false,
697
- "normalized": false
 
698
  },
699
  {
700
  "id": 77,
701
- "special": true,
702
  "content": "<unused70>",
703
  "single_word": false,
704
  "lstrip": false,
705
  "rstrip": false,
706
- "normalized": false
 
707
  },
708
  {
709
  "id": 78,
710
- "special": true,
711
  "content": "<unused71>",
712
  "single_word": false,
713
  "lstrip": false,
714
  "rstrip": false,
715
- "normalized": false
 
716
  },
717
  {
718
  "id": 79,
719
- "special": true,
720
  "content": "<unused72>",
721
  "single_word": false,
722
  "lstrip": false,
723
  "rstrip": false,
724
- "normalized": false
 
725
  },
726
  {
727
  "id": 80,
728
- "special": true,
729
  "content": "<unused73>",
730
  "single_word": false,
731
  "lstrip": false,
732
  "rstrip": false,
733
- "normalized": false
 
734
  },
735
  {
736
  "id": 81,
737
- "special": true,
738
  "content": "<unused74>",
739
  "single_word": false,
740
  "lstrip": false,
741
  "rstrip": false,
742
- "normalized": false
 
743
  },
744
  {
745
  "id": 82,
746
- "special": true,
747
  "content": "<unused75>",
748
  "single_word": false,
749
  "lstrip": false,
750
  "rstrip": false,
751
- "normalized": false
 
752
  },
753
  {
754
  "id": 83,
755
- "special": true,
756
  "content": "<unused76>",
757
  "single_word": false,
758
  "lstrip": false,
759
  "rstrip": false,
760
- "normalized": false
 
761
  },
762
  {
763
  "id": 84,
764
- "special": true,
765
  "content": "<unused77>",
766
  "single_word": false,
767
  "lstrip": false,
768
  "rstrip": false,
769
- "normalized": false
 
770
  },
771
  {
772
  "id": 85,
773
- "special": true,
774
  "content": "<unused78>",
775
  "single_word": false,
776
  "lstrip": false,
777
  "rstrip": false,
778
- "normalized": false
 
779
  },
780
  {
781
  "id": 86,
782
- "special": true,
783
  "content": "<unused79>",
784
  "single_word": false,
785
  "lstrip": false,
786
  "rstrip": false,
787
- "normalized": false
 
788
  },
789
  {
790
  "id": 87,
791
- "special": true,
792
  "content": "<unused80>",
793
  "single_word": false,
794
  "lstrip": false,
795
  "rstrip": false,
796
- "normalized": false
 
797
  },
798
  {
799
  "id": 88,
800
- "special": true,
801
  "content": "<unused81>",
802
  "single_word": false,
803
  "lstrip": false,
804
  "rstrip": false,
805
- "normalized": false
 
806
  },
807
  {
808
  "id": 89,
809
- "special": true,
810
  "content": "<unused82>",
811
  "single_word": false,
812
  "lstrip": false,
813
  "rstrip": false,
814
- "normalized": false
 
815
  },
816
  {
817
  "id": 90,
818
- "special": true,
819
  "content": "<unused83>",
820
  "single_word": false,
821
  "lstrip": false,
822
  "rstrip": false,
823
- "normalized": false
 
824
  },
825
  {
826
  "id": 91,
827
- "special": true,
828
  "content": "<unused84>",
829
  "single_word": false,
830
  "lstrip": false,
831
  "rstrip": false,
832
- "normalized": false
 
833
  },
834
  {
835
  "id": 92,
836
- "special": true,
837
  "content": "<unused85>",
838
  "single_word": false,
839
  "lstrip": false,
840
  "rstrip": false,
841
- "normalized": false
 
842
  },
843
  {
844
  "id": 93,
845
- "special": true,
846
  "content": "<unused86>",
847
  "single_word": false,
848
  "lstrip": false,
849
  "rstrip": false,
850
- "normalized": false
 
851
  },
852
  {
853
  "id": 94,
854
- "special": true,
855
  "content": "<unused87>",
856
  "single_word": false,
857
  "lstrip": false,
858
  "rstrip": false,
859
- "normalized": false
 
860
  },
861
  {
862
  "id": 95,
863
- "special": true,
864
  "content": "<unused88>",
865
  "single_word": false,
866
  "lstrip": false,
867
  "rstrip": false,
868
- "normalized": false
 
869
  },
870
  {
871
  "id": 96,
872
- "special": true,
873
  "content": "<unused89>",
874
  "single_word": false,
875
  "lstrip": false,
876
  "rstrip": false,
877
- "normalized": false
 
878
  },
879
  {
880
  "id": 97,
881
- "special": true,
882
  "content": "<unused90>",
883
  "single_word": false,
884
  "lstrip": false,
885
  "rstrip": false,
886
- "normalized": false
 
887
  },
888
  {
889
  "id": 98,
890
- "special": true,
891
  "content": "<unused91>",
892
  "single_word": false,
893
  "lstrip": false,
894
  "rstrip": false,
895
- "normalized": false
 
896
  },
897
  {
898
  "id": 99,
899
- "special": true,
900
  "content": "<unused92>",
901
  "single_word": false,
902
  "lstrip": false,
903
  "rstrip": false,
904
- "normalized": false
 
905
  },
906
  {
907
  "id": 100,
908
- "special": true,
909
  "content": "<unused93>",
910
  "single_word": false,
911
  "lstrip": false,
912
  "rstrip": false,
913
- "normalized": false
 
914
  },
915
  {
916
  "id": 101,
917
- "special": true,
918
  "content": "<unused94>",
919
  "single_word": false,
920
  "lstrip": false,
921
  "rstrip": false,
922
- "normalized": false
 
923
  },
924
  {
925
  "id": 102,
926
- "special": true,
927
  "content": "<unused95>",
928
  "single_word": false,
929
  "lstrip": false,
930
  "rstrip": false,
931
- "normalized": false
 
932
  },
933
  {
934
  "id": 103,
935
- "special": true,
936
  "content": "<unused96>",
937
  "single_word": false,
938
  "lstrip": false,
939
  "rstrip": false,
940
- "normalized": false
 
941
  },
942
  {
943
  "id": 104,
944
- "special": true,
945
  "content": "<unused97>",
946
  "single_word": false,
947
  "lstrip": false,
948
  "rstrip": false,
949
- "normalized": false
 
950
  },
951
  {
952
  "id": 105,
953
- "special": true,
954
  "content": "<unused98>",
955
  "single_word": false,
956
  "lstrip": false,
957
  "rstrip": false,
958
- "normalized": false
 
959
  },
960
  {
961
  "id": 106,
962
- "special": true,
963
  "content": "<unused99>",
964
  "single_word": false,
965
  "lstrip": false,
966
  "rstrip": false,
967
- "normalized": false
 
968
  },
969
  {
970
  "id": 107,
971
- "special": true,
972
  "content": ":-)",
973
  "single_word": false,
974
  "lstrip": false,
975
  "rstrip": false,
976
- "normalized": false
 
977
  },
978
  {
979
  "id": 108,
980
- "special": true,
981
  "content": ":)",
982
  "single_word": false,
983
  "lstrip": false,
984
  "rstrip": false,
985
- "normalized": false
 
986
  },
987
  {
988
  "id": 109,
989
- "special": true,
990
  "content": ",-)",
991
  "single_word": false,
992
  "lstrip": false,
993
  "rstrip": false,
994
- "normalized": false
 
995
  },
996
  {
997
  "id": 110,
998
- "special": true,
999
  "content": "(-:",
1000
  "single_word": false,
1001
  "lstrip": false,
1002
  "rstrip": false,
1003
- "normalized": false
 
1004
  },
1005
  {
1006
  "id": 111,
1007
- "special": true,
1008
  "content": "(:-)",
1009
  "single_word": false,
1010
  "lstrip": false,
1011
  "rstrip": false,
1012
- "normalized": false
 
1013
  },
1014
  {
1015
  "id": 112,
1016
- "special": true,
1017
  "content": "(:-(",
1018
  "single_word": false,
1019
  "lstrip": false,
1020
  "rstrip": false,
1021
- "normalized": false
 
1022
  },
1023
  {
1024
  "id": 113,
1025
- "special": true,
1026
  "content": ",-}",
1027
  "single_word": false,
1028
  "lstrip": false,
1029
  "rstrip": false,
1030
- "normalized": false
 
1031
  },
1032
  {
1033
  "id": 114,
1034
- "special": true,
1035
  "content": "8-O",
1036
  "single_word": false,
1037
  "lstrip": false,
1038
  "rstrip": false,
1039
- "normalized": false
 
1040
  },
1041
  {
1042
  "id": 115,
1043
- "special": true,
1044
  "content": "'-)",
1045
  "single_word": false,
1046
  "lstrip": false,
1047
  "rstrip": false,
1048
- "normalized": false
 
1049
  },
1050
  {
1051
  "id": 116,
1052
- "special": true,
1053
  "content": ":-#",
1054
  "single_word": false,
1055
  "lstrip": false,
1056
  "rstrip": false,
1057
- "normalized": false
 
1058
  },
1059
  {
1060
  "id": 117,
1061
- "special": true,
1062
  "content": ":-*",
1063
  "single_word": false,
1064
  "lstrip": false,
1065
  "rstrip": false,
1066
- "normalized": false
 
1067
  },
1068
  {
1069
  "id": 118,
1070
- "special": true,
1071
  "content": ":-/",
1072
  "single_word": false,
1073
  "lstrip": false,
1074
  "rstrip": false,
1075
- "normalized": false
 
1076
  },
1077
  {
1078
  "id": 119,
1079
- "special": true,
1080
  "content": ":->",
1081
  "single_word": false,
1082
  "lstrip": false,
1083
  "rstrip": false,
1084
- "normalized": false
 
1085
  },
1086
  {
1087
  "id": 120,
1088
- "special": true,
1089
  "content": ":-@",
1090
  "single_word": false,
1091
  "lstrip": false,
1092
  "rstrip": false,
1093
- "normalized": false
 
1094
  },
1095
  {
1096
  "id": 121,
1097
- "special": true,
1098
  "content": ":-d",
1099
  "single_word": false,
1100
  "lstrip": false,
1101
  "rstrip": false,
1102
- "normalized": false
 
1103
  },
1104
  {
1105
  "id": 122,
1106
- "special": true,
1107
  "content": ":-V",
1108
  "single_word": false,
1109
  "lstrip": false,
1110
  "rstrip": false,
1111
- "normalized": false
 
1112
  },
1113
  {
1114
  "id": 123,
1115
- "special": true,
1116
  "content": ":-X",
1117
  "single_word": false,
1118
  "lstrip": false,
1119
  "rstrip": false,
1120
- "normalized": false
 
1121
  },
1122
  {
1123
  "id": 124,
1124
- "special": true,
1125
  "content": ":-\\",
1126
  "single_word": false,
1127
  "lstrip": false,
1128
  "rstrip": false,
1129
- "normalized": false
 
1130
  },
1131
  {
1132
  "id": 125,
1133
- "special": true,
1134
  "content": ":-]",
1135
  "single_word": false,
1136
  "lstrip": false,
1137
  "rstrip": false,
1138
- "normalized": false
 
1139
  },
1140
  {
1141
  "id": 126,
1142
- "special": true,
1143
  "content": ";-(",
1144
  "single_word": false,
1145
  "lstrip": false,
1146
  "rstrip": false,
1147
- "normalized": false
 
1148
  },
1149
  {
1150
  "id": 127,
1151
- "special": true,
1152
  "content": ">;->",
1153
  "single_word": false,
1154
  "lstrip": false,
1155
  "rstrip": false,
1156
- "normalized": false
 
1157
  },
1158
  {
1159
  "id": 128,
1160
- "special": true,
1161
  "content": ";^)",
1162
  "single_word": false,
1163
  "lstrip": false,
1164
  "rstrip": false,
1165
- "normalized": false
 
1166
  },
1167
  {
1168
  "id": 129,
1169
- "special": true,
1170
  "content": "%-)",
1171
  "single_word": false,
1172
  "lstrip": false,
1173
  "rstrip": false,
1174
- "normalized": false
 
1175
  },
1176
  {
1177
  "id": 130,
1178
- "special": true,
1179
  "content": "):-(",
1180
  "single_word": false,
1181
  "lstrip": false,
1182
  "rstrip": false,
1183
- "normalized": false
 
1184
  },
1185
  {
1186
  "id": 131,
1187
- "special": true,
1188
  "content": "3:]",
1189
  "single_word": false,
1190
  "lstrip": false,
1191
  "rstrip": false,
1192
- "normalized": false
 
1193
  },
1194
  {
1195
  "id": 132,
1196
- "special": true,
1197
  "content": ":-&",
1198
  "single_word": false,
1199
  "lstrip": false,
1200
  "rstrip": false,
1201
- "normalized": false
 
1202
  },
1203
  {
1204
  "id": 133,
1205
- "special": true,
1206
  "content": "8:-)",
1207
  "single_word": false,
1208
  "lstrip": false,
1209
  "rstrip": false,
1210
- "normalized": false
 
1211
  },
1212
  {
1213
  "id": 134,
1214
- "special": true,
1215
  "content": ":-)8<",
1216
  "single_word": false,
1217
  "lstrip": false,
1218
  "rstrip": false,
1219
- "normalized": false
 
1220
  },
1221
  {
1222
  "id": 135,
1223
- "special": true,
1224
  "content": ":-O",
1225
  "single_word": false,
1226
  "lstrip": false,
1227
  "rstrip": false,
1228
- "normalized": false
 
1229
  },
1230
  {
1231
  "id": 136,
1232
- "special": true,
1233
  "content": ":-6",
1234
  "single_word": false,
1235
  "lstrip": false,
1236
  "rstrip": false,
1237
- "normalized": false
 
1238
  },
1239
  {
1240
  "id": 137,
1241
- "special": true,
1242
  "content": "+:-)",
1243
  "single_word": false,
1244
  "lstrip": false,
1245
  "rstrip": false,
1246
- "normalized": false
 
1247
  },
1248
  {
1249
  "id": 138,
1250
- "special": true,
1251
  "content": "O:-)",
1252
  "single_word": false,
1253
  "lstrip": false,
1254
  "rstrip": false,
1255
- "normalized": false
 
1256
  },
1257
  {
1258
  "id": 139,
1259
- "special": true,
1260
  "content": ":-<",
1261
  "single_word": false,
1262
  "lstrip": false,
1263
  "rstrip": false,
1264
- "normalized": false
 
1265
  },
1266
  {
1267
  "id": 140,
1268
- "special": true,
1269
  "content": ":-?",
1270
  "single_word": false,
1271
  "lstrip": false,
1272
  "rstrip": false,
1273
- "normalized": false
 
1274
  },
1275
  {
1276
  "id": 141,
1277
- "special": true,
1278
  "content": ":-E",
1279
  "single_word": false,
1280
  "lstrip": false,
1281
  "rstrip": false,
1282
- "normalized": false
 
1283
  },
1284
  {
1285
  "id": 142,
1286
- "special": true,
1287
  "content": ":-Q",
1288
  "single_word": false,
1289
  "lstrip": false,
1290
  "rstrip": false,
1291
- "normalized": false
 
1292
  },
1293
  {
1294
  "id": 143,
1295
- "special": true,
1296
  "content": ":-}X",
1297
  "single_word": false,
1298
  "lstrip": false,
1299
  "rstrip": false,
1300
- "normalized": false
 
1301
  },
1302
  {
1303
  "id": 144,
1304
- "special": true,
1305
  "content": ":-[",
1306
  "single_word": false,
1307
  "lstrip": false,
1308
  "rstrip": false,
1309
- "normalized": false
 
1310
  },
1311
  {
1312
  "id": 145,
1313
- "special": true,
1314
  "content": ":-a",
1315
  "single_word": false,
1316
  "lstrip": false,
1317
  "rstrip": false,
1318
- "normalized": false
 
1319
  },
1320
  {
1321
  "id": 146,
1322
- "special": true,
1323
  "content": ":-{",
1324
  "single_word": false,
1325
  "lstrip": false,
1326
  "rstrip": false,
1327
- "normalized": false
 
1328
  },
1329
  {
1330
  "id": 147,
1331
- "special": true,
1332
  "content": ":-{}",
1333
  "single_word": false,
1334
  "lstrip": false,
1335
  "rstrip": false,
1336
- "normalized": false
 
1337
  },
1338
  {
1339
  "id": 148,
1340
- "special": true,
1341
  "content": ":^)",
1342
  "single_word": false,
1343
  "lstrip": false,
1344
  "rstrip": false,
1345
- "normalized": false
 
1346
  },
1347
  {
1348
  "id": 149,
1349
- "special": true,
1350
  "content": "<:-l",
1351
  "single_word": false,
1352
  "lstrip": false,
1353
  "rstrip": false,
1354
- "normalized": false
 
1355
  },
1356
  {
1357
  "id": 150,
1358
- "special": true,
1359
  "content": ":=)",
1360
  "single_word": false,
1361
  "lstrip": false,
1362
  "rstrip": false,
1363
- "normalized": false
 
1364
  },
1365
  {
1366
  "id": 151,
1367
- "special": true,
1368
  "content": ">:->",
1369
  "single_word": false,
1370
  "lstrip": false,
1371
  "rstrip": false,
1372
- "normalized": false
 
1373
  },
1374
  {
1375
  "id": 152,
1376
- "special": true,
1377
  "content": ">:-l",
1378
  "single_word": false,
1379
  "lstrip": false,
1380
  "rstrip": false,
1381
- "normalized": false
 
1382
  },
1383
  {
1384
  "id": 153,
1385
- "special": true,
1386
  "content": "@:-)",
1387
  "single_word": false,
1388
  "lstrip": false,
1389
  "rstrip": false,
1390
- "normalized": false
 
1391
  },
1392
  {
1393
  "id": 154,
1394
- "special": true,
1395
  "content": "@:-}",
1396
  "single_word": false,
1397
  "lstrip": false,
1398
  "rstrip": false,
1399
- "normalized": false
 
1400
  },
1401
  {
1402
  "id": 155,
1403
- "special": true,
1404
  "content": "C=:-)",
1405
  "single_word": false,
1406
  "lstrip": false,
1407
  "rstrip": false,
1408
- "normalized": false
 
1409
  },
1410
  {
1411
  "id": 156,
1412
- "special": true,
1413
  "content": "X:-)",
1414
  "single_word": false,
1415
  "lstrip": false,
1416
  "rstrip": false,
1417
- "normalized": false
 
1418
  },
1419
  {
1420
  "id": 157,
1421
- "special": true,
1422
  "content": "[:-)",
1423
  "single_word": false,
1424
  "lstrip": false,
1425
  "rstrip": false,
1426
- "normalized": false
 
1427
  },
1428
  {
1429
  "id": 158,
1430
- "special": true,
1431
  "content": "[:]",
1432
  "single_word": false,
1433
  "lstrip": false,
1434
  "rstrip": false,
1435
- "normalized": false
 
1436
  },
1437
  {
1438
  "id": 159,
1439
- "special": true,
1440
  "content": "{:-)",
1441
  "single_word": false,
1442
  "lstrip": false,
1443
  "rstrip": false,
1444
- "normalized": false
 
1445
  },
1446
  {
1447
  "id": 160,
1448
- "special": true,
1449
  "content": "l^o",
1450
  "single_word": false,
1451
  "lstrip": false,
1452
  "rstrip": false,
1453
- "normalized": false
 
1454
  },
1455
  {
1456
  "id": 161,
1457
- "special": true,
1458
  "content": "}:^#)",
1459
  "single_word": false,
1460
  "lstrip": false,
1461
  "rstrip": false,
1462
- "normalized": false
 
1463
  },
1464
  {
1465
  "id": 162,
1466
- "special": true,
1467
  "content": ":-(=)",
1468
  "single_word": false,
1469
  "lstrip": false,
1470
  "rstrip": false,
1471
- "normalized": false
 
1472
  },
1473
  {
1474
  "id": 163,
1475
- "special": true,
1476
  "content": "O-)",
1477
  "single_word": false,
1478
  "lstrip": false,
1479
  "rstrip": false,
1480
- "normalized": false
 
1481
  },
1482
  {
1483
  "id": 164,
1484
- "special": true,
1485
  "content": ":-3",
1486
  "single_word": false,
1487
  "lstrip": false,
1488
  "rstrip": false,
1489
- "normalized": false
 
1490
  },
1491
  {
1492
  "id": 165,
1493
- "special": true,
1494
  "content": ": =",
1495
  "single_word": false,
1496
  "lstrip": false,
1497
  "rstrip": false,
1498
- "normalized": false
 
1499
  },
1500
  {
1501
  "id": 166,
1502
- "special": true,
1503
  "content": ":-\"",
1504
  "single_word": false,
1505
  "lstrip": false,
1506
  "rstrip": false,
1507
- "normalized": false
 
1508
  },
1509
  {
1510
  "id": 167,
1511
- "special": true,
1512
  "content": "P-(",
1513
  "single_word": false,
1514
  "lstrip": false,
1515
  "rstrip": false,
1516
- "normalized": false
 
1517
  },
1518
  {
1519
  "id": 168,
1520
- "special": true,
1521
  "content": "?-(",
1522
  "single_word": false,
1523
  "lstrip": false,
1524
  "rstrip": false,
1525
- "normalized": false
 
1526
  },
1527
  {
1528
  "id": 169,
1529
- "special": true,
1530
  "content": "d:-)",
1531
  "single_word": false,
1532
  "lstrip": false,
1533
  "rstrip": false,
1534
- "normalized": false
 
1535
  },
1536
  {
1537
  "id": 170,
1538
- "special": true,
1539
  "content": ":8)",
1540
  "single_word": false,
1541
  "lstrip": false,
1542
  "rstrip": false,
1543
- "normalized": false
 
1544
  },
1545
  {
1546
  "id": 171,
1547
- "special": true,
1548
  "content": ":-7",
1549
  "single_word": false,
1550
  "lstrip": false,
1551
  "rstrip": false,
1552
- "normalized": false
 
1553
  },
1554
  {
1555
  "id": 172,
1556
- "special": true,
1557
  "content": "):-)",
1558
  "single_word": false,
1559
  "lstrip": false,
1560
  "rstrip": false,
1561
- "normalized": false
 
1562
  },
1563
  {
1564
  "id": 173,
1565
- "special": true,
1566
  "content": ":/\\)",
1567
  "single_word": false,
1568
  "lstrip": false,
1569
  "rstrip": false,
1570
- "normalized": false
 
1571
  },
1572
  {
1573
  "id": 174,
1574
- "special": true,
1575
  "content": "8(:-)",
1576
  "single_word": false,
1577
  "lstrip": false,
1578
  "rstrip": false,
1579
- "normalized": false
 
1580
  },
1581
  {
1582
  "id": 175,
1583
- "special": true,
1584
  "content": "([(",
1585
  "single_word": false,
1586
  "lstrip": false,
1587
  "rstrip": false,
1588
- "normalized": false
 
1589
  },
1590
  {
1591
  "id": 176,
1592
- "special": true,
1593
  "content": ":-(*)",
1594
  "single_word": false,
1595
  "lstrip": false,
1596
  "rstrip": false,
1597
- "normalized": false
 
1598
  },
1599
  {
1600
  "id": 177,
1601
- "special": true,
1602
  "content": "&-l",
1603
  "single_word": false,
1604
  "lstrip": false,
1605
  "rstrip": false,
1606
- "normalized": false
 
1607
  },
1608
  {
1609
  "id": 178,
1610
- "special": true,
1611
  "content": ":-e",
1612
  "single_word": false,
1613
  "lstrip": false,
1614
  "rstrip": false,
1615
- "normalized": false
 
1616
  },
1617
  {
1618
  "id": 179,
1619
- "special": true,
1620
  "content": ":(",
1621
  "single_word": false,
1622
  "lstrip": false,
1623
  "rstrip": false,
1624
- "normalized": false
 
1625
  },
1626
  {
1627
  "id": 180,
1628
- "special": true,
1629
  "content": ":,(",
1630
  "single_word": false,
1631
  "lstrip": false,
1632
  "rstrip": false,
1633
- "normalized": false
 
1634
  },
1635
  {
1636
  "id": 181,
1637
- "special": true,
1638
  "content": ":-(",
1639
  "single_word": false,
1640
  "lstrip": false,
1641
  "rstrip": false,
1642
- "normalized": false
 
1643
  },
1644
  {
1645
  "id": 182,
1646
- "special": true,
1647
  "content": ":-P",
1648
  "single_word": false,
1649
  "lstrip": false,
1650
  "rstrip": false,
1651
- "normalized": false
 
1652
  },
1653
  {
1654
  "id": 183,
1655
- "special": true,
1656
  "content": ":-S",
1657
  "single_word": false,
1658
  "lstrip": false,
1659
  "rstrip": false,
1660
- "normalized": false
 
1661
  },
1662
  {
1663
  "id": 184,
1664
- "special": true,
1665
  "content": ":-C",
1666
  "single_word": false,
1667
  "lstrip": false,
1668
  "rstrip": false,
1669
- "normalized": false
 
1670
  },
1671
  {
1672
  "id": 185,
1673
- "special": true,
1674
  "content": ":-r",
1675
  "single_word": false,
1676
  "lstrip": false,
1677
  "rstrip": false,
1678
- "normalized": false
 
1679
  },
1680
  {
1681
  "id": 186,
1682
- "special": true,
1683
  "content": ":-t",
1684
  "single_word": false,
1685
  "lstrip": false,
1686
  "rstrip": false,
1687
- "normalized": false
 
1688
  },
1689
  {
1690
  "id": 187,
1691
- "special": true,
1692
  "content": ":-W",
1693
  "single_word": false,
1694
  "lstrip": false,
1695
  "rstrip": false,
1696
- "normalized": false
 
1697
  },
1698
  {
1699
  "id": 188,
1700
- "special": true,
1701
  "content": "X-(",
1702
  "single_word": false,
1703
  "lstrip": false,
1704
  "rstrip": false,
1705
- "normalized": false
 
1706
  },
1707
  {
1708
  "id": 189,
1709
- "special": true,
1710
  "content": "l-O",
1711
  "single_word": false,
1712
  "lstrip": false,
1713
  "rstrip": false,
1714
- "normalized": false
 
1715
  },
1716
  {
1717
  "id": 190,
1718
- "special": true,
1719
  "content": "l:-O",
1720
  "single_word": false,
1721
  "lstrip": false,
1722
  "rstrip": false,
1723
- "normalized": false
 
1724
  },
1725
  {
1726
  "id": 191,
1727
- "special": true,
1728
  "content": "$-)",
1729
  "single_word": false,
1730
  "lstrip": false,
1731
  "rstrip": false,
1732
- "normalized": false
 
1733
  },
1734
  {
1735
  "id": 192,
1736
- "special": true,
1737
  "content": ":-!",
1738
  "single_word": false,
1739
  "lstrip": false,
1740
  "rstrip": false,
1741
- "normalized": false
 
1742
  },
1743
  {
1744
  "id": 193,
1745
- "special": true,
1746
  "content": ":----}",
1747
  "single_word": false,
1748
  "lstrip": false,
1749
  "rstrip": false,
1750
- "normalized": false
 
1751
  },
1752
  {
1753
  "id": 194,
1754
- "special": true,
1755
  "content": "=:-)",
1756
  "single_word": false,
1757
  "lstrip": false,
1758
  "rstrip": false,
1759
- "normalized": false
 
1760
  },
1761
  {
1762
  "id": 195,
1763
- "special": true,
1764
  "content": "=:-(",
1765
  "single_word": false,
1766
  "lstrip": false,
1767
  "rstrip": false,
1768
- "normalized": false
 
1769
  },
1770
  {
1771
  "id": 196,
1772
- "special": true,
1773
  "content": "3:[",
1774
  "single_word": false,
1775
  "lstrip": false,
1776
  "rstrip": false,
1777
- "normalized": false
 
1778
  },
1779
  {
1780
  "id": 197,
1781
- "special": true,
1782
  "content": "8<:-)",
1783
  "single_word": false,
1784
  "lstrip": false,
1785
  "rstrip": false,
1786
- "normalized": false
 
1787
  },
1788
  {
1789
  "id": 198,
1790
- "special": true,
1791
  "content": ":#)",
1792
  "single_word": false,
1793
  "lstrip": false,
1794
  "rstrip": false,
1795
- "normalized": false
 
1796
  },
1797
  {
1798
  "id": 199,
1799
- "special": true,
1800
  "content": "8-#",
1801
  "single_word": false,
1802
  "lstrip": false,
1803
  "rstrip": false,
1804
- "normalized": false
 
1805
  },
1806
  {
1807
  "id": 200,
1808
- "special": true,
1809
  "content": "B-)",
1810
  "single_word": false,
1811
  "lstrip": false,
1812
  "rstrip": false,
1813
- "normalized": false
 
1814
  },
1815
  {
1816
  "id": 201,
1817
- "special": true,
1818
  "content": "8-)",
1819
  "single_word": false,
1820
  "lstrip": false,
1821
  "rstrip": false,
1822
- "normalized": false
 
1823
  },
1824
  {
1825
  "id": 202,
1826
- "special": true,
1827
  "content": "|-(",
1828
  "single_word": false,
1829
  "lstrip": false,
1830
  "rstrip": false,
1831
- "normalized": false
 
1832
  },
1833
  {
1834
  "id": 203,
1835
- "special": true,
1836
  "content": "H-)",
1837
  "single_word": false,
1838
  "lstrip": false,
1839
  "rstrip": false,
1840
- "normalized": false
 
1841
  },
1842
  {
1843
  "id": 204,
1844
- "special": true,
1845
  "content": "]-I",
1846
  "single_word": false,
1847
  "lstrip": false,
1848
  "rstrip": false,
1849
- "normalized": false
 
1850
  },
1851
  {
1852
  "id": 205,
1853
- "special": true,
1854
  "content": "V^J",
1855
  "single_word": false,
1856
  "lstrip": false,
1857
  "rstrip": false,
1858
- "normalized": false
 
1859
  },
1860
  {
1861
  "id": 206,
1862
- "special": true,
1863
  "content": "+-(",
1864
  "single_word": false,
1865
  "lstrip": false,
1866
  "rstrip": false,
1867
- "normalized": false
 
1868
  },
1869
  {
1870
  "id": 207,
1871
- "special": true,
1872
  "content": "~:-P",
1873
  "single_word": false,
1874
  "lstrip": false,
1875
  "rstrip": false,
1876
- "normalized": false
 
1877
  },
1878
  {
1879
  "id": 208,
1880
- "special": true,
1881
  "content": "`'",
1882
  "single_word": false,
1883
  "lstrip": false,
1884
  "rstrip": false,
1885
- "normalized": false
 
1886
  },
1887
  {
1888
  "id": 209,
1889
- "special": true,
1890
  "content": "L-P",
1891
  "single_word": false,
1892
  "lstrip": false,
1893
  "rstrip": false,
1894
- "normalized": false
 
1895
  },
1896
  {
1897
  "id": 210,
1898
- "special": true,
1899
  "content": "BI",
1900
  "single_word": false,
1901
  "lstrip": false,
1902
  "rstrip": false,
1903
- "normalized": false
 
1904
  },
1905
  {
1906
  "id": 211,
1907
- "special": true,
1908
  "content": "O |",
1909
  "single_word": false,
1910
  "lstrip": false,
1911
  "rstrip": false,
1912
- "normalized": false
 
1913
  },
1914
  {
1915
  "id": 212,
1916
- "special": true,
1917
  "content": "^^",
1918
  "single_word": false,
1919
  "lstrip": false,
1920
  "rstrip": false,
1921
- "normalized": false
 
1922
  },
1923
  {
1924
  "id": 213,
1925
- "special": true,
1926
  "content": "γ…œγ…œ",
1927
  "single_word": false,
1928
  "lstrip": false,
1929
  "rstrip": false,
1930
- "normalized": false
 
1931
  },
1932
  {
1933
  "id": 214,
1934
- "special": true,
1935
  "content": "γ… γ… ",
1936
  "single_word": false,
1937
  "lstrip": false,
1938
  "rstrip": false,
1939
- "normalized": false
 
1940
  },
1941
  {
1942
  "id": 215,
1943
- "special": true,
1944
  "content": "γ…‘γ…‘",
1945
  "single_word": false,
1946
  "lstrip": false,
1947
  "rstrip": false,
1948
- "normalized": false
 
1949
  },
1950
  {
1951
  "id": 216,
1952
- "special": true,
1953
  "content": "πŸ˜€",
1954
  "single_word": false,
1955
  "lstrip": false,
1956
  "rstrip": false,
1957
- "normalized": false
 
1958
  },
1959
  {
1960
  "id": 217,
1961
- "special": true,
1962
  "content": "πŸ˜ƒ",
1963
  "single_word": false,
1964
  "lstrip": false,
1965
  "rstrip": false,
1966
- "normalized": false
 
1967
  },
1968
  {
1969
  "id": 218,
1970
- "special": true,
1971
  "content": "πŸ˜„",
1972
  "single_word": false,
1973
  "lstrip": false,
1974
  "rstrip": false,
1975
- "normalized": false
 
1976
  },
1977
  {
1978
  "id": 219,
1979
- "special": true,
1980
  "content": "😁",
1981
  "single_word": false,
1982
  "lstrip": false,
1983
  "rstrip": false,
1984
- "normalized": false
 
1985
  },
1986
  {
1987
  "id": 220,
1988
- "special": true,
1989
  "content": "πŸ˜†",
1990
  "single_word": false,
1991
  "lstrip": false,
1992
  "rstrip": false,
1993
- "normalized": false
 
1994
  },
1995
  {
1996
  "id": 221,
1997
- "special": true,
1998
  "content": "πŸ˜…",
1999
  "single_word": false,
2000
  "lstrip": false,
2001
  "rstrip": false,
2002
- "normalized": false
 
2003
  },
2004
  {
2005
  "id": 222,
2006
- "special": true,
2007
  "content": "🀣",
2008
  "single_word": false,
2009
  "lstrip": false,
2010
  "rstrip": false,
2011
- "normalized": false
 
2012
  },
2013
  {
2014
  "id": 223,
2015
- "special": true,
2016
  "content": "πŸ˜‚",
2017
  "single_word": false,
2018
  "lstrip": false,
2019
  "rstrip": false,
2020
- "normalized": false
 
2021
  },
2022
  {
2023
  "id": 224,
2024
- "special": true,
2025
  "content": "πŸ™‚",
2026
  "single_word": false,
2027
  "lstrip": false,
2028
  "rstrip": false,
2029
- "normalized": false
 
2030
  },
2031
  {
2032
  "id": 225,
2033
- "special": true,
2034
  "content": "πŸ™ƒ",
2035
  "single_word": false,
2036
  "lstrip": false,
2037
  "rstrip": false,
2038
- "normalized": false
 
2039
  },
2040
  {
2041
  "id": 226,
2042
- "special": true,
2043
  "content": "πŸ˜‰",
2044
  "single_word": false,
2045
  "lstrip": false,
2046
  "rstrip": false,
2047
- "normalized": false
 
2048
  },
2049
  {
2050
  "id": 227,
2051
- "special": true,
2052
  "content": "😊",
2053
  "single_word": false,
2054
  "lstrip": false,
2055
  "rstrip": false,
2056
- "normalized": false
 
2057
  },
2058
  {
2059
  "id": 228,
2060
- "special": true,
2061
  "content": "πŸ˜‡",
2062
  "single_word": false,
2063
  "lstrip": false,
2064
  "rstrip": false,
2065
- "normalized": false
 
2066
  }
2067
  ],
2068
  "normalizer": {
2069
  "type": "Sequence",
2070
  "normalizers": [
2071
- { "type": "NFKC" },
 
 
2072
  {
2073
  "type": "BertNormalizer",
2074
  "clean_text": false,
@@ -2081,7 +2083,7 @@
2081
  "pre_tokenizer": {
2082
  "type": "Metaspace",
2083
  "replacement": "▁",
2084
- "add_prefix_space": false
2085
  },
2086
  "post_processor": {
2087
  "type": "RobertaProcessing",
@@ -2098,7 +2100,7 @@
2098
  "model": {
2099
  "type": "BPE",
2100
  "dropout": null,
2101
- "unk_token": null,
2102
  "continuing_subword_prefix": null,
2103
  "end_of_word_suffix": null,
2104
  "fuse_unk": false,
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
8
  "content": "<s>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
  },
15
  {
16
  "id": 1,
 
17
  "content": "</s>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
  },
24
  {
25
  "id": 2,
 
26
  "content": "<usr>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
  },
33
  {
34
  "id": 3,
 
35
  "content": "<pad>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
  },
42
  {
43
  "id": 4,
 
44
  "content": "<sys>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
  },
51
  {
52
  "id": 5,
 
53
  "content": "<unk>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
  },
60
  {
61
  "id": 6,
 
62
  "content": "<mask>",
63
  "single_word": false,
64
+ "lstrip": false,
65
  "rstrip": false,
66
+ "normalized": false,
67
+ "special": true
68
  },
69
  {
70
  "id": 7,
 
71
  "content": "<unused0>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
75
+ "normalized": false,
76
+ "special": true
77
  },
78
  {
79
  "id": 8,
 
80
  "content": "<unused1>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
84
+ "normalized": false,
85
+ "special": true
86
  },
87
  {
88
  "id": 9,
 
89
  "content": "<unused2>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
93
+ "normalized": false,
94
+ "special": true
95
  },
96
  {
97
  "id": 10,
 
98
  "content": "<unused3>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
102
+ "normalized": false,
103
+ "special": true
104
  },
105
  {
106
  "id": 11,
 
107
  "content": "<unused4>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
111
+ "normalized": false,
112
+ "special": true
113
  },
114
  {
115
  "id": 12,
 
116
  "content": "<unused5>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
120
+ "normalized": false,
121
+ "special": true
122
  },
123
  {
124
  "id": 13,
 
125
  "content": "<unused6>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
129
+ "normalized": false,
130
+ "special": true
131
  },
132
  {
133
  "id": 14,
 
134
  "content": "<unused7>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
138
+ "normalized": false,
139
+ "special": true
140
  },
141
  {
142
  "id": 15,
 
143
  "content": "<unused8>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
147
+ "normalized": false,
148
+ "special": true
149
  },
150
  {
151
  "id": 16,
 
152
  "content": "<unused9>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
156
+ "normalized": false,
157
+ "special": true
158
  },
159
  {
160
  "id": 17,
 
161
  "content": "<unused10>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
165
+ "normalized": false,
166
+ "special": true
167
  },
168
  {
169
  "id": 18,
 
170
  "content": "<unused11>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
174
+ "normalized": false,
175
+ "special": true
176
  },
177
  {
178
  "id": 19,
 
179
  "content": "<unused12>",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
183
+ "normalized": false,
184
+ "special": true
185
  },
186
  {
187
  "id": 20,
 
188
  "content": "<unused13>",
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
192
+ "normalized": false,
193
+ "special": true
194
  },
195
  {
196
  "id": 21,
 
197
  "content": "<unused14>",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
201
+ "normalized": false,
202
+ "special": true
203
  },
204
  {
205
  "id": 22,
 
206
  "content": "<unused15>",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
210
+ "normalized": false,
211
+ "special": true
212
  },
213
  {
214
  "id": 23,
 
215
  "content": "<unused16>",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
219
+ "normalized": false,
220
+ "special": true
221
  },
222
  {
223
  "id": 24,
 
224
  "content": "<unused17>",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
+ "normalized": false,
229
+ "special": true
230
  },
231
  {
232
  "id": 25,
 
233
  "content": "<unused18>",
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
237
+ "normalized": false,
238
+ "special": true
239
  },
240
  {
241
  "id": 26,
 
242
  "content": "<unused19>",
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
246
+ "normalized": false,
247
+ "special": true
248
  },
249
  {
250
  "id": 27,
 
251
  "content": "<unused20>",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
255
+ "normalized": false,
256
+ "special": true
257
  },
258
  {
259
  "id": 28,
 
260
  "content": "<unused21>",
261
  "single_word": false,
262
  "lstrip": false,
263
  "rstrip": false,
264
+ "normalized": false,
265
+ "special": true
266
  },
267
  {
268
  "id": 29,
 
269
  "content": "<unused22>",
270
  "single_word": false,
271
  "lstrip": false,
272
  "rstrip": false,
273
+ "normalized": false,
274
+ "special": true
275
  },
276
  {
277
  "id": 30,
 
278
  "content": "<unused23>",
279
  "single_word": false,
280
  "lstrip": false,
281
  "rstrip": false,
282
+ "normalized": false,
283
+ "special": true
284
  },
285
  {
286
  "id": 31,
 
287
  "content": "<unused24>",
288
  "single_word": false,
289
  "lstrip": false,
290
  "rstrip": false,
291
+ "normalized": false,
292
+ "special": true
293
  },
294
  {
295
  "id": 32,
 
296
  "content": "<unused25>",
297
  "single_word": false,
298
  "lstrip": false,
299
  "rstrip": false,
300
+ "normalized": false,
301
+ "special": true
302
  },
303
  {
304
  "id": 33,
 
305
  "content": "<unused26>",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
309
+ "normalized": false,
310
+ "special": true
311
  },
312
  {
313
  "id": 34,
 
314
  "content": "<unused27>",
315
  "single_word": false,
316
  "lstrip": false,
317
  "rstrip": false,
318
+ "normalized": false,
319
+ "special": true
320
  },
321
  {
322
  "id": 35,
 
323
  "content": "<unused28>",
324
  "single_word": false,
325
  "lstrip": false,
326
  "rstrip": false,
327
+ "normalized": false,
328
+ "special": true
329
  },
330
  {
331
  "id": 36,
 
332
  "content": "<unused29>",
333
  "single_word": false,
334
  "lstrip": false,
335
  "rstrip": false,
336
+ "normalized": false,
337
+ "special": true
338
  },
339
  {
340
  "id": 37,
 
341
  "content": "<unused30>",
342
  "single_word": false,
343
  "lstrip": false,
344
  "rstrip": false,
345
+ "normalized": false,
346
+ "special": true
347
  },
348
  {
349
  "id": 38,
 
350
  "content": "<unused31>",
351
  "single_word": false,
352
  "lstrip": false,
353
  "rstrip": false,
354
+ "normalized": false,
355
+ "special": true
356
  },
357
  {
358
  "id": 39,
 
359
  "content": "<unused32>",
360
  "single_word": false,
361
  "lstrip": false,
362
  "rstrip": false,
363
+ "normalized": false,
364
+ "special": true
365
  },
366
  {
367
  "id": 40,
 
368
  "content": "<unused33>",
369
  "single_word": false,
370
  "lstrip": false,
371
  "rstrip": false,
372
+ "normalized": false,
373
+ "special": true
374
  },
375
  {
376
  "id": 41,
 
377
  "content": "<unused34>",
378
  "single_word": false,
379
  "lstrip": false,
380
  "rstrip": false,
381
+ "normalized": false,
382
+ "special": true
383
  },
384
  {
385
  "id": 42,
 
386
  "content": "<unused35>",
387
  "single_word": false,
388
  "lstrip": false,
389
  "rstrip": false,
390
+ "normalized": false,
391
+ "special": true
392
  },
393
  {
394
  "id": 43,
 
395
  "content": "<unused36>",
396
  "single_word": false,
397
  "lstrip": false,
398
  "rstrip": false,
399
+ "normalized": false,
400
+ "special": true
401
  },
402
  {
403
  "id": 44,
 
404
  "content": "<unused37>",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
408
+ "normalized": false,
409
+ "special": true
410
  },
411
  {
412
  "id": 45,
 
413
  "content": "<unused38>",
414
  "single_word": false,
415
  "lstrip": false,
416
  "rstrip": false,
417
+ "normalized": false,
418
+ "special": true
419
  },
420
  {
421
  "id": 46,
 
422
  "content": "<unused39>",
423
  "single_word": false,
424
  "lstrip": false,
425
  "rstrip": false,
426
+ "normalized": false,
427
+ "special": true
428
  },
429
  {
430
  "id": 47,
 
431
  "content": "<unused40>",
432
  "single_word": false,
433
  "lstrip": false,
434
  "rstrip": false,
435
+ "normalized": false,
436
+ "special": true
437
  },
438
  {
439
  "id": 48,
 
440
  "content": "<unused41>",
441
  "single_word": false,
442
  "lstrip": false,
443
  "rstrip": false,
444
+ "normalized": false,
445
+ "special": true
446
  },
447
  {
448
  "id": 49,
 
449
  "content": "<unused42>",
450
  "single_word": false,
451
  "lstrip": false,
452
  "rstrip": false,
453
+ "normalized": false,
454
+ "special": true
455
  },
456
  {
457
  "id": 50,
 
458
  "content": "<unused43>",
459
  "single_word": false,
460
  "lstrip": false,
461
  "rstrip": false,
462
+ "normalized": false,
463
+ "special": true
464
  },
465
  {
466
  "id": 51,
 
467
  "content": "<unused44>",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
471
+ "normalized": false,
472
+ "special": true
473
  },
474
  {
475
  "id": 52,
 
476
  "content": "<unused45>",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
480
+ "normalized": false,
481
+ "special": true
482
  },
483
  {
484
  "id": 53,
 
485
  "content": "<unused46>",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
489
+ "normalized": false,
490
+ "special": true
491
  },
492
  {
493
  "id": 54,
 
494
  "content": "<unused47>",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
498
+ "normalized": false,
499
+ "special": true
500
  },
501
  {
502
  "id": 55,
 
503
  "content": "<unused48>",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
507
+ "normalized": false,
508
+ "special": true
509
  },
510
  {
511
  "id": 56,
 
512
  "content": "<unused49>",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
516
+ "normalized": false,
517
+ "special": true
518
  },
519
  {
520
  "id": 57,
 
521
  "content": "<unused50>",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
525
+ "normalized": false,
526
+ "special": true
527
  },
528
  {
529
  "id": 58,
 
530
  "content": "<unused51>",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
534
+ "normalized": false,
535
+ "special": true
536
  },
537
  {
538
  "id": 59,
 
539
  "content": "<unused52>",
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
543
+ "normalized": false,
544
+ "special": true
545
  },
546
  {
547
  "id": 60,
 
548
  "content": "<unused53>",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
552
+ "normalized": false,
553
+ "special": true
554
  },
555
  {
556
  "id": 61,
 
557
  "content": "<unused54>",
558
  "single_word": false,
559
  "lstrip": false,
560
  "rstrip": false,
561
+ "normalized": false,
562
+ "special": true
563
  },
564
  {
565
  "id": 62,
 
566
  "content": "<unused55>",
567
  "single_word": false,
568
  "lstrip": false,
569
  "rstrip": false,
570
+ "normalized": false,
571
+ "special": true
572
  },
573
  {
574
  "id": 63,
 
575
  "content": "<unused56>",
576
  "single_word": false,
577
  "lstrip": false,
578
  "rstrip": false,
579
+ "normalized": false,
580
+ "special": true
581
  },
582
  {
583
  "id": 64,
 
584
  "content": "<unused57>",
585
  "single_word": false,
586
  "lstrip": false,
587
  "rstrip": false,
588
+ "normalized": false,
589
+ "special": true
590
  },
591
  {
592
  "id": 65,
 
593
  "content": "<unused58>",
594
  "single_word": false,
595
  "lstrip": false,
596
  "rstrip": false,
597
+ "normalized": false,
598
+ "special": true
599
  },
600
  {
601
  "id": 66,
 
602
  "content": "<unused59>",
603
  "single_word": false,
604
  "lstrip": false,
605
  "rstrip": false,
606
+ "normalized": false,
607
+ "special": true
608
  },
609
  {
610
  "id": 67,
 
611
  "content": "<unused60>",
612
  "single_word": false,
613
  "lstrip": false,
614
  "rstrip": false,
615
+ "normalized": false,
616
+ "special": true
617
  },
618
  {
619
  "id": 68,
 
620
  "content": "<unused61>",
621
  "single_word": false,
622
  "lstrip": false,
623
  "rstrip": false,
624
+ "normalized": false,
625
+ "special": true
626
  },
627
  {
628
  "id": 69,
 
629
  "content": "<unused62>",
630
  "single_word": false,
631
  "lstrip": false,
632
  "rstrip": false,
633
+ "normalized": false,
634
+ "special": true
635
  },
636
  {
637
  "id": 70,
 
638
  "content": "<unused63>",
639
  "single_word": false,
640
  "lstrip": false,
641
  "rstrip": false,
642
+ "normalized": false,
643
+ "special": true
644
  },
645
  {
646
  "id": 71,
 
647
  "content": "<unused64>",
648
  "single_word": false,
649
  "lstrip": false,
650
  "rstrip": false,
651
+ "normalized": false,
652
+ "special": true
653
  },
654
  {
655
  "id": 72,
 
656
  "content": "<unused65>",
657
  "single_word": false,
658
  "lstrip": false,
659
  "rstrip": false,
660
+ "normalized": false,
661
+ "special": true
662
  },
663
  {
664
  "id": 73,
 
665
  "content": "<unused66>",
666
  "single_word": false,
667
  "lstrip": false,
668
  "rstrip": false,
669
+ "normalized": false,
670
+ "special": true
671
  },
672
  {
673
  "id": 74,
 
674
  "content": "<unused67>",
675
  "single_word": false,
676
  "lstrip": false,
677
  "rstrip": false,
678
+ "normalized": false,
679
+ "special": true
680
  },
681
  {
682
  "id": 75,
 
683
  "content": "<unused68>",
684
  "single_word": false,
685
  "lstrip": false,
686
  "rstrip": false,
687
+ "normalized": false,
688
+ "special": true
689
  },
690
  {
691
  "id": 76,
 
692
  "content": "<unused69>",
693
  "single_word": false,
694
  "lstrip": false,
695
  "rstrip": false,
696
+ "normalized": false,
697
+ "special": true
698
  },
699
  {
700
  "id": 77,
 
701
  "content": "<unused70>",
702
  "single_word": false,
703
  "lstrip": false,
704
  "rstrip": false,
705
+ "normalized": false,
706
+ "special": true
707
  },
708
  {
709
  "id": 78,
 
710
  "content": "<unused71>",
711
  "single_word": false,
712
  "lstrip": false,
713
  "rstrip": false,
714
+ "normalized": false,
715
+ "special": true
716
  },
717
  {
718
  "id": 79,
 
719
  "content": "<unused72>",
720
  "single_word": false,
721
  "lstrip": false,
722
  "rstrip": false,
723
+ "normalized": false,
724
+ "special": true
725
  },
726
  {
727
  "id": 80,
 
728
  "content": "<unused73>",
729
  "single_word": false,
730
  "lstrip": false,
731
  "rstrip": false,
732
+ "normalized": false,
733
+ "special": true
734
  },
735
  {
736
  "id": 81,
 
737
  "content": "<unused74>",
738
  "single_word": false,
739
  "lstrip": false,
740
  "rstrip": false,
741
+ "normalized": false,
742
+ "special": true
743
  },
744
  {
745
  "id": 82,
 
746
  "content": "<unused75>",
747
  "single_word": false,
748
  "lstrip": false,
749
  "rstrip": false,
750
+ "normalized": false,
751
+ "special": true
752
  },
753
  {
754
  "id": 83,
 
755
  "content": "<unused76>",
756
  "single_word": false,
757
  "lstrip": false,
758
  "rstrip": false,
759
+ "normalized": false,
760
+ "special": true
761
  },
762
  {
763
  "id": 84,
 
764
  "content": "<unused77>",
765
  "single_word": false,
766
  "lstrip": false,
767
  "rstrip": false,
768
+ "normalized": false,
769
+ "special": true
770
  },
771
  {
772
  "id": 85,
 
773
  "content": "<unused78>",
774
  "single_word": false,
775
  "lstrip": false,
776
  "rstrip": false,
777
+ "normalized": false,
778
+ "special": true
779
  },
780
  {
781
  "id": 86,
 
782
  "content": "<unused79>",
783
  "single_word": false,
784
  "lstrip": false,
785
  "rstrip": false,
786
+ "normalized": false,
787
+ "special": true
788
  },
789
  {
790
  "id": 87,
 
791
  "content": "<unused80>",
792
  "single_word": false,
793
  "lstrip": false,
794
  "rstrip": false,
795
+ "normalized": false,
796
+ "special": true
797
  },
798
  {
799
  "id": 88,
 
800
  "content": "<unused81>",
801
  "single_word": false,
802
  "lstrip": false,
803
  "rstrip": false,
804
+ "normalized": false,
805
+ "special": true
806
  },
807
  {
808
  "id": 89,
 
809
  "content": "<unused82>",
810
  "single_word": false,
811
  "lstrip": false,
812
  "rstrip": false,
813
+ "normalized": false,
814
+ "special": true
815
  },
816
  {
817
  "id": 90,
 
818
  "content": "<unused83>",
819
  "single_word": false,
820
  "lstrip": false,
821
  "rstrip": false,
822
+ "normalized": false,
823
+ "special": true
824
  },
825
  {
826
  "id": 91,
 
827
  "content": "<unused84>",
828
  "single_word": false,
829
  "lstrip": false,
830
  "rstrip": false,
831
+ "normalized": false,
832
+ "special": true
833
  },
834
  {
835
  "id": 92,
 
836
  "content": "<unused85>",
837
  "single_word": false,
838
  "lstrip": false,
839
  "rstrip": false,
840
+ "normalized": false,
841
+ "special": true
842
  },
843
  {
844
  "id": 93,
 
845
  "content": "<unused86>",
846
  "single_word": false,
847
  "lstrip": false,
848
  "rstrip": false,
849
+ "normalized": false,
850
+ "special": true
851
  },
852
  {
853
  "id": 94,
 
854
  "content": "<unused87>",
855
  "single_word": false,
856
  "lstrip": false,
857
  "rstrip": false,
858
+ "normalized": false,
859
+ "special": true
860
  },
861
  {
862
  "id": 95,
 
863
  "content": "<unused88>",
864
  "single_word": false,
865
  "lstrip": false,
866
  "rstrip": false,
867
+ "normalized": false,
868
+ "special": true
869
  },
870
  {
871
  "id": 96,
 
872
  "content": "<unused89>",
873
  "single_word": false,
874
  "lstrip": false,
875
  "rstrip": false,
876
+ "normalized": false,
877
+ "special": true
878
  },
879
  {
880
  "id": 97,
 
881
  "content": "<unused90>",
882
  "single_word": false,
883
  "lstrip": false,
884
  "rstrip": false,
885
+ "normalized": false,
886
+ "special": true
887
  },
888
  {
889
  "id": 98,
 
890
  "content": "<unused91>",
891
  "single_word": false,
892
  "lstrip": false,
893
  "rstrip": false,
894
+ "normalized": false,
895
+ "special": true
896
  },
897
  {
898
  "id": 99,
 
899
  "content": "<unused92>",
900
  "single_word": false,
901
  "lstrip": false,
902
  "rstrip": false,
903
+ "normalized": false,
904
+ "special": true
905
  },
906
  {
907
  "id": 100,
 
908
  "content": "<unused93>",
909
  "single_word": false,
910
  "lstrip": false,
911
  "rstrip": false,
912
+ "normalized": false,
913
+ "special": true
914
  },
915
  {
916
  "id": 101,
 
917
  "content": "<unused94>",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
921
+ "normalized": false,
922
+ "special": true
923
  },
924
  {
925
  "id": 102,
 
926
  "content": "<unused95>",
927
  "single_word": false,
928
  "lstrip": false,
929
  "rstrip": false,
930
+ "normalized": false,
931
+ "special": true
932
  },
933
  {
934
  "id": 103,
 
935
  "content": "<unused96>",
936
  "single_word": false,
937
  "lstrip": false,
938
  "rstrip": false,
939
+ "normalized": false,
940
+ "special": true
941
  },
942
  {
943
  "id": 104,
 
944
  "content": "<unused97>",
945
  "single_word": false,
946
  "lstrip": false,
947
  "rstrip": false,
948
+ "normalized": false,
949
+ "special": true
950
  },
951
  {
952
  "id": 105,
 
953
  "content": "<unused98>",
954
  "single_word": false,
955
  "lstrip": false,
956
  "rstrip": false,
957
+ "normalized": false,
958
+ "special": true
959
  },
960
  {
961
  "id": 106,
 
962
  "content": "<unused99>",
963
  "single_word": false,
964
  "lstrip": false,
965
  "rstrip": false,
966
+ "normalized": false,
967
+ "special": true
968
  },
969
  {
970
  "id": 107,
 
971
  "content": ":-)",
972
  "single_word": false,
973
  "lstrip": false,
974
  "rstrip": false,
975
+ "normalized": false,
976
+ "special": true
977
  },
978
  {
979
  "id": 108,
 
980
  "content": ":)",
981
  "single_word": false,
982
  "lstrip": false,
983
  "rstrip": false,
984
+ "normalized": false,
985
+ "special": true
986
  },
987
  {
988
  "id": 109,
 
989
  "content": ",-)",
990
  "single_word": false,
991
  "lstrip": false,
992
  "rstrip": false,
993
+ "normalized": false,
994
+ "special": true
995
  },
996
  {
997
  "id": 110,
 
998
  "content": "(-:",
999
  "single_word": false,
1000
  "lstrip": false,
1001
  "rstrip": false,
1002
+ "normalized": false,
1003
+ "special": true
1004
  },
1005
  {
1006
  "id": 111,
 
1007
  "content": "(:-)",
1008
  "single_word": false,
1009
  "lstrip": false,
1010
  "rstrip": false,
1011
+ "normalized": false,
1012
+ "special": true
1013
  },
1014
  {
1015
  "id": 112,
 
1016
  "content": "(:-(",
1017
  "single_word": false,
1018
  "lstrip": false,
1019
  "rstrip": false,
1020
+ "normalized": false,
1021
+ "special": true
1022
  },
1023
  {
1024
  "id": 113,
 
1025
  "content": ",-}",
1026
  "single_word": false,
1027
  "lstrip": false,
1028
  "rstrip": false,
1029
+ "normalized": false,
1030
+ "special": true
1031
  },
1032
  {
1033
  "id": 114,
 
1034
  "content": "8-O",
1035
  "single_word": false,
1036
  "lstrip": false,
1037
  "rstrip": false,
1038
+ "normalized": false,
1039
+ "special": true
1040
  },
1041
  {
1042
  "id": 115,
 
1043
  "content": "'-)",
1044
  "single_word": false,
1045
  "lstrip": false,
1046
  "rstrip": false,
1047
+ "normalized": false,
1048
+ "special": true
1049
  },
1050
  {
1051
  "id": 116,
 
1052
  "content": ":-#",
1053
  "single_word": false,
1054
  "lstrip": false,
1055
  "rstrip": false,
1056
+ "normalized": false,
1057
+ "special": true
1058
  },
1059
  {
1060
  "id": 117,
 
1061
  "content": ":-*",
1062
  "single_word": false,
1063
  "lstrip": false,
1064
  "rstrip": false,
1065
+ "normalized": false,
1066
+ "special": true
1067
  },
1068
  {
1069
  "id": 118,
 
1070
  "content": ":-/",
1071
  "single_word": false,
1072
  "lstrip": false,
1073
  "rstrip": false,
1074
+ "normalized": false,
1075
+ "special": true
1076
  },
1077
  {
1078
  "id": 119,
 
1079
  "content": ":->",
1080
  "single_word": false,
1081
  "lstrip": false,
1082
  "rstrip": false,
1083
+ "normalized": false,
1084
+ "special": true
1085
  },
1086
  {
1087
  "id": 120,
 
1088
  "content": ":-@",
1089
  "single_word": false,
1090
  "lstrip": false,
1091
  "rstrip": false,
1092
+ "normalized": false,
1093
+ "special": true
1094
  },
1095
  {
1096
  "id": 121,
 
1097
  "content": ":-d",
1098
  "single_word": false,
1099
  "lstrip": false,
1100
  "rstrip": false,
1101
+ "normalized": false,
1102
+ "special": true
1103
  },
1104
  {
1105
  "id": 122,
 
1106
  "content": ":-V",
1107
  "single_word": false,
1108
  "lstrip": false,
1109
  "rstrip": false,
1110
+ "normalized": false,
1111
+ "special": true
1112
  },
1113
  {
1114
  "id": 123,
 
1115
  "content": ":-X",
1116
  "single_word": false,
1117
  "lstrip": false,
1118
  "rstrip": false,
1119
+ "normalized": false,
1120
+ "special": true
1121
  },
1122
  {
1123
  "id": 124,
 
1124
  "content": ":-\\",
1125
  "single_word": false,
1126
  "lstrip": false,
1127
  "rstrip": false,
1128
+ "normalized": false,
1129
+ "special": true
1130
  },
1131
  {
1132
  "id": 125,
 
1133
  "content": ":-]",
1134
  "single_word": false,
1135
  "lstrip": false,
1136
  "rstrip": false,
1137
+ "normalized": false,
1138
+ "special": true
1139
  },
1140
  {
1141
  "id": 126,
 
1142
  "content": ";-(",
1143
  "single_word": false,
1144
  "lstrip": false,
1145
  "rstrip": false,
1146
+ "normalized": false,
1147
+ "special": true
1148
  },
1149
  {
1150
  "id": 127,
 
1151
  "content": ">;->",
1152
  "single_word": false,
1153
  "lstrip": false,
1154
  "rstrip": false,
1155
+ "normalized": false,
1156
+ "special": true
1157
  },
1158
  {
1159
  "id": 128,
 
1160
  "content": ";^)",
1161
  "single_word": false,
1162
  "lstrip": false,
1163
  "rstrip": false,
1164
+ "normalized": false,
1165
+ "special": true
1166
  },
1167
  {
1168
  "id": 129,
 
1169
  "content": "%-)",
1170
  "single_word": false,
1171
  "lstrip": false,
1172
  "rstrip": false,
1173
+ "normalized": false,
1174
+ "special": true
1175
  },
1176
  {
1177
  "id": 130,
 
1178
  "content": "):-(",
1179
  "single_word": false,
1180
  "lstrip": false,
1181
  "rstrip": false,
1182
+ "normalized": false,
1183
+ "special": true
1184
  },
1185
  {
1186
  "id": 131,
 
1187
  "content": "3:]",
1188
  "single_word": false,
1189
  "lstrip": false,
1190
  "rstrip": false,
1191
+ "normalized": false,
1192
+ "special": true
1193
  },
1194
  {
1195
  "id": 132,
 
1196
  "content": ":-&",
1197
  "single_word": false,
1198
  "lstrip": false,
1199
  "rstrip": false,
1200
+ "normalized": false,
1201
+ "special": true
1202
  },
1203
  {
1204
  "id": 133,
 
1205
  "content": "8:-)",
1206
  "single_word": false,
1207
  "lstrip": false,
1208
  "rstrip": false,
1209
+ "normalized": false,
1210
+ "special": true
1211
  },
1212
  {
1213
  "id": 134,
 
1214
  "content": ":-)8<",
1215
  "single_word": false,
1216
  "lstrip": false,
1217
  "rstrip": false,
1218
+ "normalized": false,
1219
+ "special": true
1220
  },
1221
  {
1222
  "id": 135,
 
1223
  "content": ":-O",
1224
  "single_word": false,
1225
  "lstrip": false,
1226
  "rstrip": false,
1227
+ "normalized": false,
1228
+ "special": true
1229
  },
1230
  {
1231
  "id": 136,
 
1232
  "content": ":-6",
1233
  "single_word": false,
1234
  "lstrip": false,
1235
  "rstrip": false,
1236
+ "normalized": false,
1237
+ "special": true
1238
  },
1239
  {
1240
  "id": 137,
 
1241
  "content": "+:-)",
1242
  "single_word": false,
1243
  "lstrip": false,
1244
  "rstrip": false,
1245
+ "normalized": false,
1246
+ "special": true
1247
  },
1248
  {
1249
  "id": 138,
 
1250
  "content": "O:-)",
1251
  "single_word": false,
1252
  "lstrip": false,
1253
  "rstrip": false,
1254
+ "normalized": false,
1255
+ "special": true
1256
  },
1257
  {
1258
  "id": 139,
 
1259
  "content": ":-<",
1260
  "single_word": false,
1261
  "lstrip": false,
1262
  "rstrip": false,
1263
+ "normalized": false,
1264
+ "special": true
1265
  },
1266
  {
1267
  "id": 140,
 
1268
  "content": ":-?",
1269
  "single_word": false,
1270
  "lstrip": false,
1271
  "rstrip": false,
1272
+ "normalized": false,
1273
+ "special": true
1274
  },
1275
  {
1276
  "id": 141,
 
1277
  "content": ":-E",
1278
  "single_word": false,
1279
  "lstrip": false,
1280
  "rstrip": false,
1281
+ "normalized": false,
1282
+ "special": true
1283
  },
1284
  {
1285
  "id": 142,
 
1286
  "content": ":-Q",
1287
  "single_word": false,
1288
  "lstrip": false,
1289
  "rstrip": false,
1290
+ "normalized": false,
1291
+ "special": true
1292
  },
1293
  {
1294
  "id": 143,
 
1295
  "content": ":-}X",
1296
  "single_word": false,
1297
  "lstrip": false,
1298
  "rstrip": false,
1299
+ "normalized": false,
1300
+ "special": true
1301
  },
1302
  {
1303
  "id": 144,
 
1304
  "content": ":-[",
1305
  "single_word": false,
1306
  "lstrip": false,
1307
  "rstrip": false,
1308
+ "normalized": false,
1309
+ "special": true
1310
  },
1311
  {
1312
  "id": 145,
 
1313
  "content": ":-a",
1314
  "single_word": false,
1315
  "lstrip": false,
1316
  "rstrip": false,
1317
+ "normalized": false,
1318
+ "special": true
1319
  },
1320
  {
1321
  "id": 146,
 
1322
  "content": ":-{",
1323
  "single_word": false,
1324
  "lstrip": false,
1325
  "rstrip": false,
1326
+ "normalized": false,
1327
+ "special": true
1328
  },
1329
  {
1330
  "id": 147,
 
1331
  "content": ":-{}",
1332
  "single_word": false,
1333
  "lstrip": false,
1334
  "rstrip": false,
1335
+ "normalized": false,
1336
+ "special": true
1337
  },
1338
  {
1339
  "id": 148,
 
1340
  "content": ":^)",
1341
  "single_word": false,
1342
  "lstrip": false,
1343
  "rstrip": false,
1344
+ "normalized": false,
1345
+ "special": true
1346
  },
1347
  {
1348
  "id": 149,
 
1349
  "content": "<:-l",
1350
  "single_word": false,
1351
  "lstrip": false,
1352
  "rstrip": false,
1353
+ "normalized": false,
1354
+ "special": true
1355
  },
1356
  {
1357
  "id": 150,
 
1358
  "content": ":=)",
1359
  "single_word": false,
1360
  "lstrip": false,
1361
  "rstrip": false,
1362
+ "normalized": false,
1363
+ "special": true
1364
  },
1365
  {
1366
  "id": 151,
 
1367
  "content": ">:->",
1368
  "single_word": false,
1369
  "lstrip": false,
1370
  "rstrip": false,
1371
+ "normalized": false,
1372
+ "special": true
1373
  },
1374
  {
1375
  "id": 152,
 
1376
  "content": ">:-l",
1377
  "single_word": false,
1378
  "lstrip": false,
1379
  "rstrip": false,
1380
+ "normalized": false,
1381
+ "special": true
1382
  },
1383
  {
1384
  "id": 153,
 
1385
  "content": "@:-)",
1386
  "single_word": false,
1387
  "lstrip": false,
1388
  "rstrip": false,
1389
+ "normalized": false,
1390
+ "special": true
1391
  },
1392
  {
1393
  "id": 154,
 
1394
  "content": "@:-}",
1395
  "single_word": false,
1396
  "lstrip": false,
1397
  "rstrip": false,
1398
+ "normalized": false,
1399
+ "special": true
1400
  },
1401
  {
1402
  "id": 155,
 
1403
  "content": "C=:-)",
1404
  "single_word": false,
1405
  "lstrip": false,
1406
  "rstrip": false,
1407
+ "normalized": false,
1408
+ "special": true
1409
  },
1410
  {
1411
  "id": 156,
 
1412
  "content": "X:-)",
1413
  "single_word": false,
1414
  "lstrip": false,
1415
  "rstrip": false,
1416
+ "normalized": false,
1417
+ "special": true
1418
  },
1419
  {
1420
  "id": 157,
 
1421
  "content": "[:-)",
1422
  "single_word": false,
1423
  "lstrip": false,
1424
  "rstrip": false,
1425
+ "normalized": false,
1426
+ "special": true
1427
  },
1428
  {
1429
  "id": 158,
 
1430
  "content": "[:]",
1431
  "single_word": false,
1432
  "lstrip": false,
1433
  "rstrip": false,
1434
+ "normalized": false,
1435
+ "special": true
1436
  },
1437
  {
1438
  "id": 159,
 
1439
  "content": "{:-)",
1440
  "single_word": false,
1441
  "lstrip": false,
1442
  "rstrip": false,
1443
+ "normalized": false,
1444
+ "special": true
1445
  },
1446
  {
1447
  "id": 160,
 
1448
  "content": "l^o",
1449
  "single_word": false,
1450
  "lstrip": false,
1451
  "rstrip": false,
1452
+ "normalized": false,
1453
+ "special": true
1454
  },
1455
  {
1456
  "id": 161,
 
1457
  "content": "}:^#)",
1458
  "single_word": false,
1459
  "lstrip": false,
1460
  "rstrip": false,
1461
+ "normalized": false,
1462
+ "special": true
1463
  },
1464
  {
1465
  "id": 162,
 
1466
  "content": ":-(=)",
1467
  "single_word": false,
1468
  "lstrip": false,
1469
  "rstrip": false,
1470
+ "normalized": false,
1471
+ "special": true
1472
  },
1473
  {
1474
  "id": 163,
 
1475
  "content": "O-)",
1476
  "single_word": false,
1477
  "lstrip": false,
1478
  "rstrip": false,
1479
+ "normalized": false,
1480
+ "special": true
1481
  },
1482
  {
1483
  "id": 164,
 
1484
  "content": ":-3",
1485
  "single_word": false,
1486
  "lstrip": false,
1487
  "rstrip": false,
1488
+ "normalized": false,
1489
+ "special": true
1490
  },
1491
  {
1492
  "id": 165,
 
1493
  "content": ": =",
1494
  "single_word": false,
1495
  "lstrip": false,
1496
  "rstrip": false,
1497
+ "normalized": false,
1498
+ "special": true
1499
  },
1500
  {
1501
  "id": 166,
 
1502
  "content": ":-\"",
1503
  "single_word": false,
1504
  "lstrip": false,
1505
  "rstrip": false,
1506
+ "normalized": false,
1507
+ "special": true
1508
  },
1509
  {
1510
  "id": 167,
 
1511
  "content": "P-(",
1512
  "single_word": false,
1513
  "lstrip": false,
1514
  "rstrip": false,
1515
+ "normalized": false,
1516
+ "special": true
1517
  },
1518
  {
1519
  "id": 168,
 
1520
  "content": "?-(",
1521
  "single_word": false,
1522
  "lstrip": false,
1523
  "rstrip": false,
1524
+ "normalized": false,
1525
+ "special": true
1526
  },
1527
  {
1528
  "id": 169,
 
1529
  "content": "d:-)",
1530
  "single_word": false,
1531
  "lstrip": false,
1532
  "rstrip": false,
1533
+ "normalized": false,
1534
+ "special": true
1535
  },
1536
  {
1537
  "id": 170,
 
1538
  "content": ":8)",
1539
  "single_word": false,
1540
  "lstrip": false,
1541
  "rstrip": false,
1542
+ "normalized": false,
1543
+ "special": true
1544
  },
1545
  {
1546
  "id": 171,
 
1547
  "content": ":-7",
1548
  "single_word": false,
1549
  "lstrip": false,
1550
  "rstrip": false,
1551
+ "normalized": false,
1552
+ "special": true
1553
  },
1554
  {
1555
  "id": 172,
 
1556
  "content": "):-)",
1557
  "single_word": false,
1558
  "lstrip": false,
1559
  "rstrip": false,
1560
+ "normalized": false,
1561
+ "special": true
1562
  },
1563
  {
1564
  "id": 173,
 
1565
  "content": ":/\\)",
1566
  "single_word": false,
1567
  "lstrip": false,
1568
  "rstrip": false,
1569
+ "normalized": false,
1570
+ "special": true
1571
  },
1572
  {
1573
  "id": 174,
 
1574
  "content": "8(:-)",
1575
  "single_word": false,
1576
  "lstrip": false,
1577
  "rstrip": false,
1578
+ "normalized": false,
1579
+ "special": true
1580
  },
1581
  {
1582
  "id": 175,
 
1583
  "content": "([(",
1584
  "single_word": false,
1585
  "lstrip": false,
1586
  "rstrip": false,
1587
+ "normalized": false,
1588
+ "special": true
1589
  },
1590
  {
1591
  "id": 176,
 
1592
  "content": ":-(*)",
1593
  "single_word": false,
1594
  "lstrip": false,
1595
  "rstrip": false,
1596
+ "normalized": false,
1597
+ "special": true
1598
  },
1599
  {
1600
  "id": 177,
 
1601
  "content": "&-l",
1602
  "single_word": false,
1603
  "lstrip": false,
1604
  "rstrip": false,
1605
+ "normalized": false,
1606
+ "special": true
1607
  },
1608
  {
1609
  "id": 178,
 
1610
  "content": ":-e",
1611
  "single_word": false,
1612
  "lstrip": false,
1613
  "rstrip": false,
1614
+ "normalized": false,
1615
+ "special": true
1616
  },
1617
  {
1618
  "id": 179,
 
1619
  "content": ":(",
1620
  "single_word": false,
1621
  "lstrip": false,
1622
  "rstrip": false,
1623
+ "normalized": false,
1624
+ "special": true
1625
  },
1626
  {
1627
  "id": 180,
 
1628
  "content": ":,(",
1629
  "single_word": false,
1630
  "lstrip": false,
1631
  "rstrip": false,
1632
+ "normalized": false,
1633
+ "special": true
1634
  },
1635
  {
1636
  "id": 181,
 
1637
  "content": ":-(",
1638
  "single_word": false,
1639
  "lstrip": false,
1640
  "rstrip": false,
1641
+ "normalized": false,
1642
+ "special": true
1643
  },
1644
  {
1645
  "id": 182,
 
1646
  "content": ":-P",
1647
  "single_word": false,
1648
  "lstrip": false,
1649
  "rstrip": false,
1650
+ "normalized": false,
1651
+ "special": true
1652
  },
1653
  {
1654
  "id": 183,
 
1655
  "content": ":-S",
1656
  "single_word": false,
1657
  "lstrip": false,
1658
  "rstrip": false,
1659
+ "normalized": false,
1660
+ "special": true
1661
  },
1662
  {
1663
  "id": 184,
 
1664
  "content": ":-C",
1665
  "single_word": false,
1666
  "lstrip": false,
1667
  "rstrip": false,
1668
+ "normalized": false,
1669
+ "special": true
1670
  },
1671
  {
1672
  "id": 185,
 
1673
  "content": ":-r",
1674
  "single_word": false,
1675
  "lstrip": false,
1676
  "rstrip": false,
1677
+ "normalized": false,
1678
+ "special": true
1679
  },
1680
  {
1681
  "id": 186,
 
1682
  "content": ":-t",
1683
  "single_word": false,
1684
  "lstrip": false,
1685
  "rstrip": false,
1686
+ "normalized": false,
1687
+ "special": true
1688
  },
1689
  {
1690
  "id": 187,
 
1691
  "content": ":-W",
1692
  "single_word": false,
1693
  "lstrip": false,
1694
  "rstrip": false,
1695
+ "normalized": false,
1696
+ "special": true
1697
  },
1698
  {
1699
  "id": 188,
 
1700
  "content": "X-(",
1701
  "single_word": false,
1702
  "lstrip": false,
1703
  "rstrip": false,
1704
+ "normalized": false,
1705
+ "special": true
1706
  },
1707
  {
1708
  "id": 189,
 
1709
  "content": "l-O",
1710
  "single_word": false,
1711
  "lstrip": false,
1712
  "rstrip": false,
1713
+ "normalized": false,
1714
+ "special": true
1715
  },
1716
  {
1717
  "id": 190,
 
1718
  "content": "l:-O",
1719
  "single_word": false,
1720
  "lstrip": false,
1721
  "rstrip": false,
1722
+ "normalized": false,
1723
+ "special": true
1724
  },
1725
  {
1726
  "id": 191,
 
1727
  "content": "$-)",
1728
  "single_word": false,
1729
  "lstrip": false,
1730
  "rstrip": false,
1731
+ "normalized": false,
1732
+ "special": true
1733
  },
1734
  {
1735
  "id": 192,
 
1736
  "content": ":-!",
1737
  "single_word": false,
1738
  "lstrip": false,
1739
  "rstrip": false,
1740
+ "normalized": false,
1741
+ "special": true
1742
  },
1743
  {
1744
  "id": 193,
 
1745
  "content": ":----}",
1746
  "single_word": false,
1747
  "lstrip": false,
1748
  "rstrip": false,
1749
+ "normalized": false,
1750
+ "special": true
1751
  },
1752
  {
1753
  "id": 194,
 
1754
  "content": "=:-)",
1755
  "single_word": false,
1756
  "lstrip": false,
1757
  "rstrip": false,
1758
+ "normalized": false,
1759
+ "special": true
1760
  },
1761
  {
1762
  "id": 195,
 
1763
  "content": "=:-(",
1764
  "single_word": false,
1765
  "lstrip": false,
1766
  "rstrip": false,
1767
+ "normalized": false,
1768
+ "special": true
1769
  },
1770
  {
1771
  "id": 196,
 
1772
  "content": "3:[",
1773
  "single_word": false,
1774
  "lstrip": false,
1775
  "rstrip": false,
1776
+ "normalized": false,
1777
+ "special": true
1778
  },
1779
  {
1780
  "id": 197,
 
1781
  "content": "8<:-)",
1782
  "single_word": false,
1783
  "lstrip": false,
1784
  "rstrip": false,
1785
+ "normalized": false,
1786
+ "special": true
1787
  },
1788
  {
1789
  "id": 198,
 
1790
  "content": ":#)",
1791
  "single_word": false,
1792
  "lstrip": false,
1793
  "rstrip": false,
1794
+ "normalized": false,
1795
+ "special": true
1796
  },
1797
  {
1798
  "id": 199,
 
1799
  "content": "8-#",
1800
  "single_word": false,
1801
  "lstrip": false,
1802
  "rstrip": false,
1803
+ "normalized": false,
1804
+ "special": true
1805
  },
1806
  {
1807
  "id": 200,
 
1808
  "content": "B-)",
1809
  "single_word": false,
1810
  "lstrip": false,
1811
  "rstrip": false,
1812
+ "normalized": false,
1813
+ "special": true
1814
  },
1815
  {
1816
  "id": 201,
 
1817
  "content": "8-)",
1818
  "single_word": false,
1819
  "lstrip": false,
1820
  "rstrip": false,
1821
+ "normalized": false,
1822
+ "special": true
1823
  },
1824
  {
1825
  "id": 202,
 
1826
  "content": "|-(",
1827
  "single_word": false,
1828
  "lstrip": false,
1829
  "rstrip": false,
1830
+ "normalized": false,
1831
+ "special": true
1832
  },
1833
  {
1834
  "id": 203,
 
1835
  "content": "H-)",
1836
  "single_word": false,
1837
  "lstrip": false,
1838
  "rstrip": false,
1839
+ "normalized": false,
1840
+ "special": true
1841
  },
1842
  {
1843
  "id": 204,
 
1844
  "content": "]-I",
1845
  "single_word": false,
1846
  "lstrip": false,
1847
  "rstrip": false,
1848
+ "normalized": false,
1849
+ "special": true
1850
  },
1851
  {
1852
  "id": 205,
 
1853
  "content": "V^J",
1854
  "single_word": false,
1855
  "lstrip": false,
1856
  "rstrip": false,
1857
+ "normalized": false,
1858
+ "special": true
1859
  },
1860
  {
1861
  "id": 206,
 
1862
  "content": "+-(",
1863
  "single_word": false,
1864
  "lstrip": false,
1865
  "rstrip": false,
1866
+ "normalized": false,
1867
+ "special": true
1868
  },
1869
  {
1870
  "id": 207,
 
1871
  "content": "~:-P",
1872
  "single_word": false,
1873
  "lstrip": false,
1874
  "rstrip": false,
1875
+ "normalized": false,
1876
+ "special": true
1877
  },
1878
  {
1879
  "id": 208,
 
1880
  "content": "`'",
1881
  "single_word": false,
1882
  "lstrip": false,
1883
  "rstrip": false,
1884
+ "normalized": false,
1885
+ "special": true
1886
  },
1887
  {
1888
  "id": 209,
 
1889
  "content": "L-P",
1890
  "single_word": false,
1891
  "lstrip": false,
1892
  "rstrip": false,
1893
+ "normalized": false,
1894
+ "special": true
1895
  },
1896
  {
1897
  "id": 210,
 
1898
  "content": "BI",
1899
  "single_word": false,
1900
  "lstrip": false,
1901
  "rstrip": false,
1902
+ "normalized": false,
1903
+ "special": true
1904
  },
1905
  {
1906
  "id": 211,
 
1907
  "content": "O |",
1908
  "single_word": false,
1909
  "lstrip": false,
1910
  "rstrip": false,
1911
+ "normalized": false,
1912
+ "special": true
1913
  },
1914
  {
1915
  "id": 212,
 
1916
  "content": "^^",
1917
  "single_word": false,
1918
  "lstrip": false,
1919
  "rstrip": false,
1920
+ "normalized": false,
1921
+ "special": true
1922
  },
1923
  {
1924
  "id": 213,
 
1925
  "content": "γ…œγ…œ",
1926
  "single_word": false,
1927
  "lstrip": false,
1928
  "rstrip": false,
1929
+ "normalized": false,
1930
+ "special": true
1931
  },
1932
  {
1933
  "id": 214,
 
1934
  "content": "γ… γ… ",
1935
  "single_word": false,
1936
  "lstrip": false,
1937
  "rstrip": false,
1938
+ "normalized": false,
1939
+ "special": true
1940
  },
1941
  {
1942
  "id": 215,
 
1943
  "content": "γ…‘γ…‘",
1944
  "single_word": false,
1945
  "lstrip": false,
1946
  "rstrip": false,
1947
+ "normalized": false,
1948
+ "special": true
1949
  },
1950
  {
1951
  "id": 216,
 
1952
  "content": "πŸ˜€",
1953
  "single_word": false,
1954
  "lstrip": false,
1955
  "rstrip": false,
1956
+ "normalized": false,
1957
+ "special": true
1958
  },
1959
  {
1960
  "id": 217,
 
1961
  "content": "πŸ˜ƒ",
1962
  "single_word": false,
1963
  "lstrip": false,
1964
  "rstrip": false,
1965
+ "normalized": false,
1966
+ "special": true
1967
  },
1968
  {
1969
  "id": 218,
 
1970
  "content": "πŸ˜„",
1971
  "single_word": false,
1972
  "lstrip": false,
1973
  "rstrip": false,
1974
+ "normalized": false,
1975
+ "special": true
1976
  },
1977
  {
1978
  "id": 219,
 
1979
  "content": "😁",
1980
  "single_word": false,
1981
  "lstrip": false,
1982
  "rstrip": false,
1983
+ "normalized": false,
1984
+ "special": true
1985
  },
1986
  {
1987
  "id": 220,
 
1988
  "content": "πŸ˜†",
1989
  "single_word": false,
1990
  "lstrip": false,
1991
  "rstrip": false,
1992
+ "normalized": false,
1993
+ "special": true
1994
  },
1995
  {
1996
  "id": 221,
 
1997
  "content": "πŸ˜…",
1998
  "single_word": false,
1999
  "lstrip": false,
2000
  "rstrip": false,
2001
+ "normalized": false,
2002
+ "special": true
2003
  },
2004
  {
2005
  "id": 222,
 
2006
  "content": "🀣",
2007
  "single_word": false,
2008
  "lstrip": false,
2009
  "rstrip": false,
2010
+ "normalized": false,
2011
+ "special": true
2012
  },
2013
  {
2014
  "id": 223,
 
2015
  "content": "πŸ˜‚",
2016
  "single_word": false,
2017
  "lstrip": false,
2018
  "rstrip": false,
2019
+ "normalized": false,
2020
+ "special": true
2021
  },
2022
  {
2023
  "id": 224,
 
2024
  "content": "πŸ™‚",
2025
  "single_word": false,
2026
  "lstrip": false,
2027
  "rstrip": false,
2028
+ "normalized": false,
2029
+ "special": true
2030
  },
2031
  {
2032
  "id": 225,
 
2033
  "content": "πŸ™ƒ",
2034
  "single_word": false,
2035
  "lstrip": false,
2036
  "rstrip": false,
2037
+ "normalized": false,
2038
+ "special": true
2039
  },
2040
  {
2041
  "id": 226,
 
2042
  "content": "πŸ˜‰",
2043
  "single_word": false,
2044
  "lstrip": false,
2045
  "rstrip": false,
2046
+ "normalized": false,
2047
+ "special": true
2048
  },
2049
  {
2050
  "id": 227,
 
2051
  "content": "😊",
2052
  "single_word": false,
2053
  "lstrip": false,
2054
  "rstrip": false,
2055
+ "normalized": false,
2056
+ "special": true
2057
  },
2058
  {
2059
  "id": 228,
 
2060
  "content": "πŸ˜‡",
2061
  "single_word": false,
2062
  "lstrip": false,
2063
  "rstrip": false,
2064
+ "normalized": false,
2065
+ "special": true
2066
  }
2067
  ],
2068
  "normalizer": {
2069
  "type": "Sequence",
2070
  "normalizers": [
2071
+ {
2072
+ "type": "NFKC"
2073
+ },
2074
  {
2075
  "type": "BertNormalizer",
2076
  "clean_text": false,
2083
  "pre_tokenizer": {
2084
  "type": "Metaspace",
2085
  "replacement": "▁",
2086
+ "add_prefix_space": true
2087
  },
2088
  "post_processor": {
2089
  "type": "RobertaProcessing",
2100
  "model": {
2101
  "type": "BPE",
2102
  "dropout": null,
2103
+ "unk_token": "<unk>",
2104
  "continuing_subword_prefix": null,
2105
  "end_of_word_suffix": null,
2106
  "fuse_unk": false,
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"do_lower_case": false, "unk_token": "<unk>", "pad_token": "<pad>", "bos_token": "<s>", "eos_token": "</s>"}
1
+ {"special_tokens_map_file": "/root/.cache/huggingface/transformers/a87d2ed77831bb40ce806a97c04126addf5ecc82b3e23ecf916b2a4acdb9c29a.c23d5e62137984cf842a885705037b25b156747d145406702932d5f5d5e7c88e", "name_or_path": "gogamza/kobart-base-v2", "tokenizer_class": "PreTrainedTokenizerFast"}
vocab.json DELETED
The diff for this file is too large to render. See raw diff