tdooms commited on
Commit
e62e286
1 Parent(s): 6928cb8

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +1081 -1034
  2. tokenizer_config.json +2 -10
tokenizer.json CHANGED
@@ -14,7 +14,7 @@
14
  },
15
  {
16
  "id": 1,
17
- "content": "[CLS]",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
@@ -23,16 +23,7 @@
23
  },
24
  {
25
  "id": 2,
26
- "content": "[SEP]",
27
- "single_word": false,
28
- "lstrip": false,
29
- "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
- },
33
- {
34
- "id": 3,
35
- "content": "[PAD]",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
@@ -70,7 +61,63 @@
70
  }
71
  ]
72
  },
73
- "post_processor": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  "decoder": null,
75
  "model": {
76
  "type": "WordPiece",
@@ -79,1029 +126,1029 @@
79
  "max_input_chars_per_word": 100,
80
  "vocab": {
81
  "[UNK]": 0,
82
- "[CLS]": 1,
83
- "[SEP]": 2,
84
- "[PAD]": 3,
85
- "!": 4,
86
- "\"": 5,
87
- "#": 6,
88
- "$": 7,
89
- "%": 8,
90
- "&": 9,
91
- "'": 10,
92
- "(": 11,
93
- ")": 12,
94
- "*": 13,
95
- "+": 14,
96
- ",": 15,
97
- "-": 16,
98
- ".": 17,
99
- "/": 18,
100
- "0": 19,
101
- "1": 20,
102
- "2": 21,
103
- "3": 22,
104
- "4": 23,
105
- "5": 24,
106
- "6": 25,
107
- "7": 26,
108
- "8": 27,
109
- "9": 28,
110
- ":": 29,
111
- ";": 30,
112
- "<": 31,
113
- "=": 32,
114
- ">": 33,
115
- "?": 34,
116
- "@": 35,
117
- "[": 36,
118
- "\\": 37,
119
- "]": 38,
120
- "_": 39,
121
- "`": 40,
122
- "a": 41,
123
- "b": 42,
124
- "c": 43,
125
- "d": 44,
126
- "e": 45,
127
- "f": 46,
128
- "g": 47,
129
- "h": 48,
130
- "i": 49,
131
- "j": 50,
132
- "k": 51,
133
- "l": 52,
134
- "m": 53,
135
- "n": 54,
136
- "o": 55,
137
- "p": 56,
138
- "q": 57,
139
- "r": 58,
140
- "s": 59,
141
- "t": 60,
142
- "u": 61,
143
- "v": 62,
144
- "w": 63,
145
- "x": 64,
146
- "y": 65,
147
- "z": 66,
148
- "{": 67,
149
- "|": 68,
150
- "}": 69,
151
- "~": 70,
152
- "##a": 71,
153
- "##n": 72,
154
- "##d": 73,
155
- "##e": 74,
156
- "##r": 75,
157
- "##x": 76,
158
- "##p": 77,
159
- "##c": 78,
160
- "##t": 79,
161
- "##i": 80,
162
- "##o": 81,
163
- "##l": 82,
164
- "##s": 83,
165
- "##h": 84,
166
- "##u": 85,
167
- "##y": 86,
168
- "##w": 87,
169
- "##v": 88,
170
- "##m": 89,
171
- "##b": 90,
172
- "##g": 91,
173
- "##f": 92,
174
- "##k": 93,
175
  "##j": 94,
176
- "##z": 95,
177
- "##q": 96,
178
- "##he": 97,
179
- "the": 98,
180
- "##nd": 99,
181
- "##ed": 100,
182
- "and": 101,
183
- "to": 102,
184
- "##er": 103,
185
- "wa": 104,
186
- "##ou": 105,
187
- "##in": 106,
188
- "he": 107,
189
- "##re": 108,
190
- "was": 109,
191
- "sa": 110,
192
- "##ing": 111,
193
- "##om": 112,
194
- "she": 113,
195
- "##ar": 114,
196
- "##il": 115,
197
- "##it": 116,
198
- "##ay": 117,
199
- "it": 118,
200
- "##id": 119,
201
- "##at": 120,
202
- "they": 121,
203
- "ha": 122,
204
- "##en": 123,
205
- "##is": 124,
206
- "##an": 125,
207
- "th": 126,
208
- "on": 127,
209
- "##or": 128,
210
- "##im": 129,
211
- "##on": 130,
212
- "##ut": 131,
213
- "her": 132,
214
- "##ll": 133,
215
- "##le": 134,
216
- "##et": 135,
217
- "##ot": 136,
218
- "##ir": 137,
219
- "##es": 138,
220
- "in": 139,
221
- "##ow": 140,
222
- "you": 141,
223
- "##ck": 142,
224
- "##ld": 143,
225
- "##oo": 144,
226
- "said": 145,
227
- "be": 146,
228
- "##ily": 147,
229
- "st": 148,
230
- "##ig": 149,
231
- "tim": 150,
232
- "so": 151,
233
- "##ce": 152,
234
- "##pp": 153,
235
- "wit": 154,
236
- "his": 155,
237
- "with": 156,
238
- "mom": 157,
239
- "##ve": 158,
240
- "lily": 159,
241
- "of": 160,
242
- "##ked": 161,
243
- "fr": 162,
244
- "that": 163,
245
- "pl": 164,
246
- "##ery": 165,
247
- "##am": 166,
248
- "##ad": 167,
249
- "##ke": 168,
250
- "##nt": 169,
251
- "but": 170,
252
- "day": 171,
253
- "up": 172,
254
- "##ie": 173,
255
- "play": 174,
256
- "had": 175,
257
- "##el": 176,
258
- "wh": 177,
259
- "for": 178,
260
- "##st": 179,
261
- "##un": 180,
262
- "##ould": 181,
263
- "an": 182,
264
- "##my": 183,
265
- "##ent": 184,
266
- "li": 185,
267
- "##ra": 186,
268
- "##ch": 187,
269
- "do": 188,
270
- "happ": 189,
271
- "one": 190,
272
- "sh": 191,
273
- "want": 192,
274
- "##itt": 193,
275
- "##her": 194,
276
- "not": 195,
277
- "there": 196,
278
- "##ly": 197,
279
- "##ome": 198,
280
- "very": 199,
281
- "##se": 200,
282
- "##ound": 201,
283
- "as": 202,
284
- "ba": 203,
285
- "litt": 204,
286
- "little": 205,
287
- "ne": 206,
288
- "time": 207,
289
- "##ht": 208,
290
- "##al": 209,
291
- "ma": 210,
292
- "sm": 211,
293
- "happy": 212,
294
- "is": 213,
295
- "big": 214,
296
- "loo": 215,
297
- "saw": 216,
298
- "##iend": 217,
299
- "friend": 218,
300
- "##ry": 219,
301
- "re": 220,
302
- "bo": 221,
303
- "##ur": 222,
304
- "##ter": 223,
305
- "##ug": 224,
306
- "##ere": 225,
307
- "##ved": 226,
308
- "lo": 227,
309
- "were": 228,
310
- "once": 229,
311
- "##ore": 230,
312
- "se": 231,
313
- "ev": 232,
314
- "go": 233,
315
- "sp": 234,
316
- "him": 235,
317
- "too": 236,
318
- "ca": 237,
319
- "##ide": 238,
320
- "we": 239,
321
- "are": 240,
322
- "tom": 241,
323
- "at": 242,
324
- "can": 243,
325
- "##irl": 244,
326
- "##ard": 245,
327
- "wanted": 246,
328
- "whe": 247,
329
- "ben": 248,
330
- "##ill": 249,
331
- "girl": 250,
332
- "upon": 251,
333
- "##ec": 252,
334
- "their": 253,
335
- "out": 254,
336
- "them": 255,
337
- "##ys": 256,
338
- "##fu": 257,
339
- "##way": 258,
340
- "did": 259,
341
- "smil": 260,
342
- "##ind": 261,
343
- "could": 262,
344
- "have": 263,
345
- "no": 264,
346
- "##ri": 265,
347
- "##ted": 266,
348
- "##ver": 267,
349
- "##ain": 268,
350
- "ex": 269,
351
- "##hed": 270,
352
- "all": 271,
353
- "ar": 272,
354
- "went": 273,
355
- "hel": 274,
356
- "su": 275,
357
- "##ic": 276,
358
- "when": 277,
359
- "ta": 278,
360
- "help": 279,
361
- "##ful": 280,
362
- "##ood": 281,
363
- "friends": 282,
364
- "##hing": 283,
365
- "nam": 284,
366
- "##ight": 285,
367
- "what": 286,
368
- "kn": 287,
369
- "le": 288,
370
- "##um": 289,
371
- "back": 290,
372
- "##ark": 291,
373
- "##one": 292,
374
- "cl": 293,
375
- "from": 294,
376
- "fun": 295,
377
- "al": 296,
378
- "then": 297,
379
- "##all": 298,
380
- "timmy": 299,
381
- "ro": 300,
382
- "star": 301,
383
- "every": 302,
384
- "sc": 303,
385
- "smiled": 304,
386
- "named": 305,
387
- "##oug": 306,
388
- "loved": 307,
389
- "##side": 308,
390
- "man": 309,
391
- "some": 310,
392
- "##elt": 311,
393
- "asked": 312,
394
- "see": 313,
395
- "me": 314,
396
- "##ick": 315,
397
- "like": 316,
398
- "fe": 317,
399
- "##op": 318,
400
- "felt": 319,
401
- "br": 320,
402
- "looked": 321,
403
- "around": 322,
404
- "look": 323,
405
- "##ep": 324,
406
- "##ame": 325,
407
- "##omet": 326,
408
- "would": 327,
409
- "bir": 328,
410
- "somet": 329,
411
- "##get": 330,
412
- "fa": 331,
413
- "##ong": 332,
414
- "boy": 333,
415
- "##ss": 334,
416
- "bird": 335,
417
- "pr": 336,
418
- "##dd": 337,
419
- "jo": 338,
420
- "##est": 339,
421
- "##ings": 340,
422
- "ag": 341,
423
- "wor": 342,
424
- "mommy": 343,
425
- "make": 344,
426
- "##ade": 345,
427
- "than": 346,
428
- "car": 347,
429
- "##own": 348,
430
- "tre": 349,
431
- "ran": 350,
432
- "##gether": 351,
433
- "together": 352,
434
- "la": 353,
435
- "away": 354,
436
- "dad": 355,
437
- "says": 356,
438
- "started": 357,
439
- "##ice": 358,
440
- "##oud": 359,
441
- "something": 360,
442
- "##ared": 361,
443
- "made": 362,
444
- "co": 363,
445
- "fl": 364,
446
- "##ther": 365,
447
- "##ited": 366,
448
- "park": 367,
449
- "sad": 368,
450
- "good": 369,
451
- "##ack": 370,
452
- "other": 371,
453
- "ch": 372,
454
- "exc": 373,
455
- "new": 374,
456
- "put": 375,
457
- "who": 376,
458
- "##out": 377,
459
- "let": 378,
460
- "hug": 379,
461
- "mu": 380,
462
- "##ble": 381,
463
- "again": 382,
464
- "home": 383,
465
- "sam": 384,
466
- "found": 385,
467
- "dec": 386,
468
- "##pped": 387,
469
- "##ried": 388,
470
- "wal": 389,
471
- "##ure": 390,
472
- "get": 391,
473
- "##ach": 392,
474
- "playing": 393,
475
- "##ought": 394,
476
- "gra": 395,
477
- "##na": 396,
478
- "##pl": 397,
479
- "sw": 398,
480
- "##ous": 399,
481
- "things": 400,
482
- "bl": 401,
483
- "excited": 402,
484
- "your": 403,
485
- "liked": 404,
486
- "got": 405,
487
- "##ny": 406,
488
- "##uck": 407,
489
- "##king": 408,
490
- "##ge": 409,
491
- "##ided": 410,
492
- "decided": 411,
493
- "came": 412,
494
- "my": 413,
495
- "dog": 414,
496
- "this": 415,
497
- "scared": 416,
498
- "##ust": 417,
499
- "down": 418,
500
- "bec": 419,
501
- "##ouse": 420,
502
- "ab": 421,
503
- "find": 422,
504
- "pa": 423,
505
- "care": 424,
506
- "will": 425,
507
- "po": 426,
508
- "sara": 427,
509
- "gr": 428,
510
- "feel": 429,
511
- "anna": 430,
512
- "##ell": 431,
513
- "max": 432,
514
- "##as": 433,
515
- "##ist": 434,
516
- "bu": 435,
517
- "##arn": 436,
518
- "##ave": 437,
519
- "##nder": 438,
520
- "##ways": 439,
521
- "##ess": 440,
522
- "always": 441,
523
- "mo": 442,
524
- "took": 443,
525
- "about": 444,
526
- "##ers": 445,
527
- "lot": 446,
528
- "##ook": 447,
529
- "toys": 448,
530
- "kne": 449,
531
- "didn": 450,
532
- "##ged": 451,
533
- "##ise": 452,
534
- "##bb": 453,
535
- "old": 454,
536
- "outside": 455,
537
- "tree": 456,
538
- "ho": 457,
539
- "##ite": 458,
540
- "how": 459,
541
- "##ally": 460,
542
- "ball": 461,
543
- "thought": 462,
544
- "##ant": 463,
545
- "sor": 464,
546
- "af": 465,
547
- "more": 466,
548
- "##ma": 467,
549
- "##eci": 468,
550
- "##ened": 469,
551
- "##ched": 470,
552
- "learn": 471,
553
- "##ret": 472,
554
- "pu": 473,
555
- "tw": 474,
556
- "know": 475,
557
- "cat": 476,
558
- "##to": 477,
559
- "take": 478,
560
- "don": 479,
561
- "mi": 480,
562
- "laug": 481,
563
- "pe": 482,
564
- "speci": 483,
565
- "special": 484,
566
- "sudd": 485,
567
- "sudden": 486,
568
- "inside": 487,
569
- "knew": 488,
570
- "##ty": 489,
571
- "sorry": 490,
572
- "##ive": 491,
573
- "##ro": 492,
574
- "any": 493,
575
- "jack": 494,
576
- "##ff": 495,
577
- "suddenly": 496,
578
- "just": 497,
579
- "toy": 498,
580
- "##ue": 499,
581
- "after": 500,
582
- "tr": 501,
583
- "if": 502,
584
- "show": 503,
585
- "##ink": 504,
586
- "##lly": 505,
587
- "or": 506,
588
- "run": 507,
589
- "ra": 508,
590
- "sl": 509,
591
- "much": 510,
592
- "##ish": 511,
593
- "hand": 512,
594
- "yes": 513,
595
- "house": 514,
596
- "sun": 515,
597
- "op": 516,
598
- "sk": 517,
599
- "clo": 518,
600
- "into": 519,
601
- "fin": 520,
602
- "tried": 521,
603
- "en": 522,
604
- "water": 523,
605
- "##ate": 524,
606
- "told": 525,
607
- "each": 526,
608
- "##ea": 527,
609
- "over": 528,
610
- "proud": 529,
611
- "##ump": 530,
612
- "gave": 531,
613
- "heard": 532,
614
- "never": 533,
615
- "ok": 534,
616
- "##dy": 535,
617
- "##by": 536,
618
- "thank": 537,
619
- "##use": 538,
620
- "eat": 539,
621
- "room": 540,
622
- "expl": 541,
623
- "##other": 542,
624
- "pick": 543,
625
- "pret": 544,
626
- "qu": 545,
627
- "##lled": 546,
628
- "gre": 547,
629
- "come": 548,
630
- "couldn": 549,
631
- "played": 550,
632
- "##ion": 551,
633
- "mia": 552,
634
- "sha": 553,
635
- "##ause": 554,
636
- "because": 555,
637
- "wat": 556,
638
- "hugged": 557,
639
- "##ious": 558,
640
- "off": 559,
641
- "bear": 560,
642
- "now": 561,
643
- "com": 562,
644
- "nice": 563,
645
- "fo": 564,
646
- "##oth": 565,
647
- "box": 566,
648
- "##our": 567,
649
- "str": 568,
650
- "need": 569,
651
- "bet": 570,
652
- "many": 571,
653
- "##ile": 572,
654
- "##fe": 573,
655
- "##ft": 574,
656
- "small": 575,
657
- "##eep": 576,
658
- "long": 577,
659
- "##ving": 578,
660
- "##kes": 579,
661
- "##sed": 580,
662
- "##gry": 581,
663
- "anim": 582,
664
- "animal": 583,
665
- "try": 584,
666
- "end": 585,
667
- "##ough": 586,
668
- "unt": 587,
669
- "##cy": 588,
670
- "##ild": 589,
671
- "until": 590,
672
- "even": 591,
673
- "##urt": 592,
674
- "##ort": 593,
675
- "##elf": 594,
676
- "soon": 595,
677
- "##iz": 596,
678
- "kind": 597,
679
- "love": 598,
680
- "learned": 599,
681
- "bea": 600,
682
- "everyone": 601,
683
- "by": 602,
684
- "ad": 603,
685
- "better": 604,
686
- "flow": 605,
687
- "spot": 606,
688
- "##ine": 607,
689
- "best": 608,
690
- "##urp": 609,
691
- "say": 610,
692
- "##mp": 611,
693
- "##ady": 612,
694
- "##ream": 613,
695
- "cle": 614,
696
- "##urn": 615,
697
- "##ace": 616,
698
- "##ves": 617,
699
- "fi": 618,
700
- "gard": 619,
701
- "garden": 620,
702
- "fast": 621,
703
- "its": 622,
704
- "che": 623,
705
- "careful": 624,
706
- "##ber": 625,
707
- "beaut": 626,
708
- "##ies": 627,
709
- "bra": 628,
710
- "thanked": 629,
711
- "sky": 630,
712
- "laughed": 631,
713
- "jump": 632,
714
- "gl": 633,
715
- "loud": 634,
716
- "sn": 635,
717
- "list": 636,
718
- "ow": 637,
719
- "##ear": 638,
720
- "##ct": 639,
721
- "##iny": 640,
722
- "wo": 641,
723
- "beauti": 642,
724
- "##sh": 643,
725
- "##lew": 644,
726
- "lots": 645,
727
- "beautiful": 646,
728
- "hard": 647,
729
- "still": 648,
730
- "animals": 649,
731
- "fam": 650,
732
- "joh": 651,
733
- "under": 652,
734
- "john": 653,
735
- "lu": 654,
736
- "mum": 655,
737
- "stay": 656,
738
- "hurt": 657,
739
- "##ning": 658,
740
- "both": 659,
741
- "dan": 660,
742
- "##ree": 661,
743
- "##self": 662,
744
- "way": 663,
745
- "rem": 664,
746
- "two": 665,
747
- "##hes": 666,
748
- "bad": 667,
749
- "safe": 668,
750
- "col": 669,
751
- "##ool": 670,
752
- "##be": 671,
753
- "di": 672,
754
- "red": 673,
755
- "book": 674,
756
- "imp": 675,
757
- "tow": 676,
758
- "##ople": 677,
759
- "##ane": 678,
760
- "lived": 679,
761
- "people": 680,
762
- "okay": 681,
763
- "##em": 682,
764
- "lucy": 683,
765
- "walked": 684,
766
- "should": 685,
767
- "brave": 686,
768
- "surp": 687,
769
- "surpr": 688,
770
- "##ase": 689,
771
- "##ock": 690,
772
- "##ished": 691,
773
- "angry": 692,
774
- "family": 693,
775
- "adv": 694,
776
- "##ress": 695,
777
- "##igh": 696,
778
- "flew": 697,
779
- "called": 698,
780
- "stor": 699,
781
- "##ept": 700,
782
- "##ip": 701,
783
- "##eet": 702,
784
- "sure": 703,
785
- "##led": 704,
786
- "fore": 705,
787
- "kept": 706,
788
- "fly": 707,
789
- "share": 708,
790
- "##ect": 709,
791
- "##fore": 710,
792
- "before": 711,
793
- "##ger": 712,
794
- "##art": 713,
795
- "while": 714,
796
- "##xt": 715,
797
- "##ised": 716,
798
- "##dded": 717,
799
- "pic": 718,
800
- "keep": 719,
801
- "door": 720,
802
- "going": 721,
803
- "pretty": 722,
804
- "rock": 723,
805
- "clean": 724,
806
- "dra": 725,
807
- "next": 726,
808
- "why": 727,
809
- "may": 728,
810
- "advent": 729,
811
- "##ied": 730,
812
- "##ary": 731,
813
- "give": 732,
814
- "noise": 733,
815
- "con": 734,
816
- "opened": 735,
817
- "shiny": 736,
818
- "far": 737,
819
- "un": 738,
820
- "doll": 739,
821
- "wind": 740,
822
- "id": 741,
823
- "cry": 742,
824
- "grand": 743,
825
- "real": 744,
826
- "##illy": 745,
827
- "##end": 746,
828
- "sto": 747,
829
- "##so": 748,
830
- "turn": 749,
831
- "explore": 750,
832
- "ground": 751,
833
- "also": 752,
834
- "##les": 753,
835
- "ey": 754,
836
- "##ner": 755,
837
- "idea": 756,
838
- "color": 757,
839
- "##ap": 758,
840
- "where": 759,
841
- "nodded": 760,
842
- "war": 761,
843
- "feeling": 762,
844
- "blue": 763,
845
- "bob": 764,
846
- "picked": 765,
847
- "##imb": 766,
848
- "walking": 767,
849
- "clos": 768,
850
- "##thing": 769,
851
- "climb": 770,
852
- "thr": 771,
853
- "##ting": 772,
854
- "has": 773,
855
- "ple": 774,
856
- "wait": 775,
857
- "bed": 776,
858
- "smile": 777,
859
- "adventure": 778,
860
- "##oy": 779,
861
- "being": 780,
862
- "##th": 781,
863
- "maybe": 782,
864
- "finally": 783,
865
- "looking": 784,
866
- "da": 785,
867
- "##iced": 786,
868
- "##ture": 787,
869
- "diff": 788,
870
- "wr": 789,
871
- "##and": 790,
872
- "food": 791,
873
- "listen": 792,
874
- "##joy": 793,
875
- "picture": 794,
876
- "think": 795,
877
- "remem": 796,
878
- "del": 797,
879
- "truck": 798,
880
- "tra": 799,
881
- "eyes": 800,
882
- "##ught": 801,
883
- "bro": 802,
884
- "gi": 803,
885
- "repl": 804,
886
- "stopped": 805,
887
- "here": 806,
888
- "great": 807,
889
- "bre": 808,
890
- "ru": 809,
891
- "vo": 810,
892
- "walk": 811,
893
- "##qu": 812,
894
- "year": 813,
895
- "enjoy": 814,
896
- "wonder": 815,
897
- "remember": 816,
898
- "##able": 817,
899
- "hands": 818,
900
- "sue": 819,
901
- "quick": 820,
902
- "##per": 821,
903
- "ever": 822,
904
- "cur": 823,
905
- "head": 824,
906
- "flowers": 825,
907
- "import": 826,
908
- "forest": 827,
909
- "##og": 828,
910
- "ac": 829,
911
- "noticed": 830,
912
- "near": 831,
913
- "##bbit": 832,
914
- "app": 833,
915
- "dis": 834,
916
- "##ized": 835,
917
- "watch": 836,
918
- "##irst": 837,
919
- "rabbit": 838,
920
- "fish": 839,
921
- "##llow": 840,
922
- "important": 841,
923
- "##age": 842,
924
- "ama": 843,
925
- "us": 844,
926
- "rain": 845,
927
- "sound": 846,
928
- "slide": 847,
929
- "bun": 848,
930
- "amaz": 849,
931
- "replied": 850,
932
- "stop": 851,
933
- "work": 852,
934
- "follow": 853,
935
- "mor": 854,
936
- "showed": 855,
937
- "tal": 856,
938
- "mean": 857,
939
- "tou": 858,
940
- "##ces": 859,
941
- "##gan": 860,
942
- "right": 861,
943
- "sarah": 862,
944
- "ask": 863,
945
- "##bbed": 864,
946
- "differ": 865,
947
- "mag": 866,
948
- "goodby": 867,
949
- "our": 868,
950
- "bright": 869,
951
- "##day": 870,
952
- "watched": 871,
953
- "goodbye": 872,
954
- "hop": 873,
955
- "please": 874,
956
- "use": 875,
957
- "strong": 876,
958
- "am": 877,
959
- "stick": 878,
960
- "quickly": 879,
961
- "daddy": 880,
962
- "been": 881,
963
- "voice": 882,
964
- "yell": 883,
965
- "##ath": 884,
966
- "jane": 885,
967
- "different": 886,
968
- "child": 887,
969
- "boat": 888,
970
- "##llo": 889,
971
- "##co": 890,
972
- "##che": 891,
973
- "hello": 892,
974
- "first": 893,
975
- "became": 894,
976
- "face": 895,
977
- "place": 896,
978
- "##ange": 897,
979
- "does": 898,
980
- "high": 899,
981
- "##ng": 900,
982
- "store": 901,
983
- "closer": 902,
984
- "curious": 903,
985
- "##ak": 904,
986
- "warm": 905,
987
- "sand": 906,
988
- "dress": 907,
989
- "bel": 908,
990
- "##ummy": 909,
991
- "joe": 910,
992
- "cook": 911,
993
- "tell": 912,
994
- "##ila": 913,
995
- "forg": 914,
996
- "em": 915,
997
- "three": 916,
998
- "fav": 917,
999
- "grandma": 918,
1000
- "##oon": 919,
1001
- "##ount": 920,
1002
- "open": 921,
1003
- "lila": 922,
1004
- "##leep": 923,
1005
- "night": 924,
1006
- "cake": 925,
1007
- "block": 926,
1008
- "##more": 927,
1009
- "pie": 928,
1010
- "anymore": 929,
1011
- "bunny": 930,
1012
- "butter": 931,
1013
- "mon": 932,
1014
- "##iss": 933,
1015
- "##ached": 934,
1016
- "lea": 935,
1017
- "sweet": 936,
1018
- "##ired": 937,
1019
- "kid": 938,
1020
- "##ull": 939,
1021
- "only": 940,
1022
- "flo": 941,
1023
- "kit": 942,
1024
- "pain": 943,
1025
- "grabbed": 944,
1026
- "duck": 945,
1027
- "##isy": 946,
1028
- "fell": 947,
1029
- "birds": 948,
1030
- "fire": 949,
1031
- "cont": 950,
1032
- "##ered": 951,
1033
- "jumped": 952,
1034
- "glad": 953,
1035
- "pet": 954,
1036
- "##here": 955,
1037
- "bit": 956,
1038
- "per": 957,
1039
- "##chen": 958,
1040
- "kitchen": 959,
1041
- "cra": 960,
1042
- "helped": 961,
1043
- "sees": 962,
1044
- "dr": 963,
1045
- "##outed": 964,
1046
- "hear": 965,
1047
- "sing": 966,
1048
- "##ul": 967,
1049
- "prin": 968,
1050
- "happened": 969,
1051
- "squ": 970,
1052
- "yummy": 971,
1053
- "grass": 972,
1054
- "tri": 973,
1055
- "ready": 974,
1056
- "tommy": 975,
1057
- "story": 976,
1058
- "shouted": 977,
1059
- "beh": 978,
1060
- "##nts": 979,
1061
- "really": 980,
1062
- "brother": 981,
1063
- "cr": 982,
1064
- "reached": 983,
1065
- "realized": 984,
1066
- "lady": 985,
1067
- "##ey": 986,
1068
- "draw": 987,
1069
- "having": 988,
1070
- "##ins": 989,
1071
- "through": 990,
1072
- "favor": 991,
1073
- "mess": 992,
1074
- "cre": 993,
1075
- "hat": 994,
1076
- "favorite": 995,
1077
- "game": 996,
1078
- "less": 997,
1079
- "ate": 998,
1080
- "underst": 999,
1081
- "soft": 1000,
1082
- "pare": 1001,
1083
- "##zy": 1002,
1084
- "thing": 1003,
1085
- "##ket": 1004,
1086
- "##ather": 1005,
1087
- "##imes": 1006,
1088
- "cu": 1007,
1089
- "magic": 1008,
1090
- "pretend": 1009,
1091
- "began": 1010,
1092
- "##where": 1011,
1093
- "world": 1012,
1094
- "looks": 1013,
1095
- "##ken": 1014,
1096
- "butterf": 1015,
1097
- "himself": 1016,
1098
- "kids": 1017,
1099
- "##fully": 1018,
1100
- "making": 1019,
1101
- "done": 1020,
1102
- "wow": 1021,
1103
- "cut": 1022,
1104
- "sometimes": 1023
1105
  }
1106
  }
1107
  }
 
14
  },
15
  {
16
  "id": 1,
17
+ "content": "[BOS]",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
 
23
  },
24
  {
25
  "id": 2,
26
+ "content": "[EOS]",
 
 
 
 
 
 
 
 
 
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
 
61
  }
62
  ]
63
  },
64
+ "post_processor": {
65
+ "type": "TemplateProcessing",
66
+ "single": [
67
+ {
68
+ "SpecialToken": {
69
+ "id": "[BOS]",
70
+ "type_id": 0
71
+ }
72
+ },
73
+ {
74
+ "Sequence": {
75
+ "id": "A",
76
+ "type_id": 0
77
+ }
78
+ },
79
+ {
80
+ "SpecialToken": {
81
+ "id": "[EOS]",
82
+ "type_id": 0
83
+ }
84
+ }
85
+ ],
86
+ "pair": [
87
+ {
88
+ "Sequence": {
89
+ "id": "A",
90
+ "type_id": 0
91
+ }
92
+ },
93
+ {
94
+ "Sequence": {
95
+ "id": "B",
96
+ "type_id": 1
97
+ }
98
+ }
99
+ ],
100
+ "special_tokens": {
101
+ "[BOS]": {
102
+ "id": "[BOS]",
103
+ "ids": [
104
+ 1
105
+ ],
106
+ "tokens": [
107
+ "[BOS]"
108
+ ]
109
+ },
110
+ "[EOS]": {
111
+ "id": "[EOS]",
112
+ "ids": [
113
+ 2
114
+ ],
115
+ "tokens": [
116
+ "[EOS]"
117
+ ]
118
+ }
119
+ }
120
+ },
121
  "decoder": null,
122
  "model": {
123
  "type": "WordPiece",
 
126
  "max_input_chars_per_word": 100,
127
  "vocab": {
128
  "[UNK]": 0,
129
+ "[BOS]": 1,
130
+ "[EOS]": 2,
131
+ "!": 3,
132
+ "\"": 4,
133
+ "#": 5,
134
+ "$": 6,
135
+ "%": 7,
136
+ "&": 8,
137
+ "'": 9,
138
+ "(": 10,
139
+ ")": 11,
140
+ "*": 12,
141
+ "+": 13,
142
+ ",": 14,
143
+ "-": 15,
144
+ ".": 16,
145
+ "/": 17,
146
+ "0": 18,
147
+ "1": 19,
148
+ "2": 20,
149
+ "3": 21,
150
+ "4": 22,
151
+ "5": 23,
152
+ "6": 24,
153
+ "7": 25,
154
+ "8": 26,
155
+ "9": 27,
156
+ ":": 28,
157
+ ";": 29,
158
+ "<": 30,
159
+ "=": 31,
160
+ ">": 32,
161
+ "?": 33,
162
+ "@": 34,
163
+ "[": 35,
164
+ "\\": 36,
165
+ "]": 37,
166
+ "_": 38,
167
+ "`": 39,
168
+ "a": 40,
169
+ "b": 41,
170
+ "c": 42,
171
+ "d": 43,
172
+ "e": 44,
173
+ "f": 45,
174
+ "g": 46,
175
+ "h": 47,
176
+ "i": 48,
177
+ "j": 49,
178
+ "k": 50,
179
+ "l": 51,
180
+ "m": 52,
181
+ "n": 53,
182
+ "o": 54,
183
+ "p": 55,
184
+ "q": 56,
185
+ "r": 57,
186
+ "s": 58,
187
+ "t": 59,
188
+ "u": 60,
189
+ "v": 61,
190
+ "w": 62,
191
+ "x": 63,
192
+ "y": 64,
193
+ "z": 65,
194
+ "{": 66,
195
+ "|": 67,
196
+ "}": 68,
197
+ "~": 69,
198
+ "##e": 70,
199
+ "##n": 71,
200
+ "##t": 72,
201
+ "##l": 73,
202
+ "##a": 74,
203
+ "##m": 75,
204
+ "##s": 76,
205
+ "##u": 77,
206
+ "##k": 78,
207
+ "##o": 79,
208
+ "##r": 80,
209
+ "##d": 81,
210
+ "##f": 82,
211
+ "##b": 83,
212
+ "##y": 84,
213
+ "##i": 85,
214
+ "##p": 86,
215
+ "##v": 87,
216
+ "##g": 88,
217
+ "##h": 89,
218
+ "##c": 90,
219
+ "##w": 91,
220
+ "##z": 92,
221
+ "##q": 93,
222
  "##j": 94,
223
+ "##x": 95,
224
+ "##he": 96,
225
+ "the": 97,
226
+ "##nd": 98,
227
+ "##ed": 99,
228
+ "and": 100,
229
+ "to": 101,
230
+ "##er": 102,
231
+ "wa": 103,
232
+ "##ou": 104,
233
+ "##in": 105,
234
+ "he": 106,
235
+ "##re": 107,
236
+ "was": 108,
237
+ "sa": 109,
238
+ "##ing": 110,
239
+ "##om": 111,
240
+ "she": 112,
241
+ "##ar": 113,
242
+ "##il": 114,
243
+ "##it": 115,
244
+ "##ay": 116,
245
+ "it": 117,
246
+ "##id": 118,
247
+ "##at": 119,
248
+ "they": 120,
249
+ "ha": 121,
250
+ "##en": 122,
251
+ "##is": 123,
252
+ "##an": 124,
253
+ "th": 125,
254
+ "on": 126,
255
+ "##or": 127,
256
+ "##im": 128,
257
+ "##on": 129,
258
+ "##ut": 130,
259
+ "her": 131,
260
+ "##ll": 132,
261
+ "##le": 133,
262
+ "##et": 134,
263
+ "##ot": 135,
264
+ "##ir": 136,
265
+ "##es": 137,
266
+ "in": 138,
267
+ "##ow": 139,
268
+ "you": 140,
269
+ "##ck": 141,
270
+ "##ld": 142,
271
+ "##oo": 143,
272
+ "said": 144,
273
+ "be": 145,
274
+ "##ily": 146,
275
+ "st": 147,
276
+ "##ig": 148,
277
+ "tim": 149,
278
+ "so": 150,
279
+ "##ce": 151,
280
+ "##pp": 152,
281
+ "wit": 153,
282
+ "his": 154,
283
+ "with": 155,
284
+ "mom": 156,
285
+ "##ve": 157,
286
+ "lily": 158,
287
+ "of": 159,
288
+ "##ked": 160,
289
+ "fr": 161,
290
+ "that": 162,
291
+ "pl": 163,
292
+ "##ery": 164,
293
+ "##am": 165,
294
+ "##ad": 166,
295
+ "##ke": 167,
296
+ "##nt": 168,
297
+ "but": 169,
298
+ "day": 170,
299
+ "up": 171,
300
+ "##ie": 172,
301
+ "play": 173,
302
+ "had": 174,
303
+ "##el": 175,
304
+ "wh": 176,
305
+ "for": 177,
306
+ "##st": 178,
307
+ "##un": 179,
308
+ "##ould": 180,
309
+ "an": 181,
310
+ "##my": 182,
311
+ "##ent": 183,
312
+ "li": 184,
313
+ "##ra": 185,
314
+ "##ch": 186,
315
+ "do": 187,
316
+ "happ": 188,
317
+ "one": 189,
318
+ "sh": 190,
319
+ "want": 191,
320
+ "##itt": 192,
321
+ "##her": 193,
322
+ "not": 194,
323
+ "there": 195,
324
+ "##ly": 196,
325
+ "##ome": 197,
326
+ "very": 198,
327
+ "##se": 199,
328
+ "##ound": 200,
329
+ "as": 201,
330
+ "ba": 202,
331
+ "litt": 203,
332
+ "little": 204,
333
+ "ne": 205,
334
+ "time": 206,
335
+ "##ht": 207,
336
+ "##al": 208,
337
+ "ma": 209,
338
+ "sm": 210,
339
+ "happy": 211,
340
+ "is": 212,
341
+ "big": 213,
342
+ "loo": 214,
343
+ "saw": 215,
344
+ "##iend": 216,
345
+ "friend": 217,
346
+ "##ry": 218,
347
+ "re": 219,
348
+ "bo": 220,
349
+ "##ur": 221,
350
+ "##ter": 222,
351
+ "##ug": 223,
352
+ "##ere": 224,
353
+ "##ved": 225,
354
+ "lo": 226,
355
+ "were": 227,
356
+ "once": 228,
357
+ "##ore": 229,
358
+ "se": 230,
359
+ "ev": 231,
360
+ "go": 232,
361
+ "sp": 233,
362
+ "him": 234,
363
+ "too": 235,
364
+ "ca": 236,
365
+ "##ide": 237,
366
+ "we": 238,
367
+ "are": 239,
368
+ "tom": 240,
369
+ "at": 241,
370
+ "can": 242,
371
+ "##irl": 243,
372
+ "##ard": 244,
373
+ "wanted": 245,
374
+ "whe": 246,
375
+ "ben": 247,
376
+ "##ill": 248,
377
+ "girl": 249,
378
+ "upon": 250,
379
+ "##ec": 251,
380
+ "their": 252,
381
+ "out": 253,
382
+ "them": 254,
383
+ "##ys": 255,
384
+ "##fu": 256,
385
+ "##way": 257,
386
+ "did": 258,
387
+ "smil": 259,
388
+ "##ind": 260,
389
+ "could": 261,
390
+ "have": 262,
391
+ "no": 263,
392
+ "##ri": 264,
393
+ "##ted": 265,
394
+ "##ver": 266,
395
+ "##ain": 267,
396
+ "ex": 268,
397
+ "##hed": 269,
398
+ "all": 270,
399
+ "ar": 271,
400
+ "went": 272,
401
+ "hel": 273,
402
+ "su": 274,
403
+ "##ic": 275,
404
+ "when": 276,
405
+ "ta": 277,
406
+ "help": 278,
407
+ "##ful": 279,
408
+ "##ood": 280,
409
+ "friends": 281,
410
+ "##hing": 282,
411
+ "nam": 283,
412
+ "##ight": 284,
413
+ "what": 285,
414
+ "kn": 286,
415
+ "le": 287,
416
+ "##um": 288,
417
+ "back": 289,
418
+ "##ark": 290,
419
+ "##one": 291,
420
+ "cl": 292,
421
+ "from": 293,
422
+ "fun": 294,
423
+ "al": 295,
424
+ "then": 296,
425
+ "##all": 297,
426
+ "timmy": 298,
427
+ "ro": 299,
428
+ "star": 300,
429
+ "every": 301,
430
+ "sc": 302,
431
+ "smiled": 303,
432
+ "named": 304,
433
+ "##oug": 305,
434
+ "loved": 306,
435
+ "##side": 307,
436
+ "man": 308,
437
+ "some": 309,
438
+ "##elt": 310,
439
+ "asked": 311,
440
+ "see": 312,
441
+ "me": 313,
442
+ "##ick": 314,
443
+ "like": 315,
444
+ "fe": 316,
445
+ "##op": 317,
446
+ "felt": 318,
447
+ "br": 319,
448
+ "looked": 320,
449
+ "around": 321,
450
+ "look": 322,
451
+ "##ep": 323,
452
+ "##ame": 324,
453
+ "##omet": 325,
454
+ "would": 326,
455
+ "bir": 327,
456
+ "somet": 328,
457
+ "##get": 329,
458
+ "fa": 330,
459
+ "##ong": 331,
460
+ "boy": 332,
461
+ "##ss": 333,
462
+ "bird": 334,
463
+ "pr": 335,
464
+ "##dd": 336,
465
+ "jo": 337,
466
+ "##est": 338,
467
+ "##ings": 339,
468
+ "ag": 340,
469
+ "wor": 341,
470
+ "mommy": 342,
471
+ "make": 343,
472
+ "##ade": 344,
473
+ "than": 345,
474
+ "car": 346,
475
+ "##own": 347,
476
+ "tre": 348,
477
+ "ran": 349,
478
+ "##gether": 350,
479
+ "together": 351,
480
+ "la": 352,
481
+ "away": 353,
482
+ "dad": 354,
483
+ "says": 355,
484
+ "started": 356,
485
+ "##ice": 357,
486
+ "##oud": 358,
487
+ "something": 359,
488
+ "##ared": 360,
489
+ "made": 361,
490
+ "co": 362,
491
+ "fl": 363,
492
+ "##ther": 364,
493
+ "##ited": 365,
494
+ "park": 366,
495
+ "sad": 367,
496
+ "good": 368,
497
+ "##ack": 369,
498
+ "other": 370,
499
+ "ch": 371,
500
+ "exc": 372,
501
+ "new": 373,
502
+ "put": 374,
503
+ "who": 375,
504
+ "##out": 376,
505
+ "let": 377,
506
+ "hug": 378,
507
+ "mu": 379,
508
+ "##ble": 380,
509
+ "again": 381,
510
+ "home": 382,
511
+ "sam": 383,
512
+ "found": 384,
513
+ "dec": 385,
514
+ "##pped": 386,
515
+ "##ried": 387,
516
+ "wal": 388,
517
+ "##ure": 389,
518
+ "get": 390,
519
+ "##ach": 391,
520
+ "playing": 392,
521
+ "##ought": 393,
522
+ "gra": 394,
523
+ "##na": 395,
524
+ "##pl": 396,
525
+ "sw": 397,
526
+ "##ous": 398,
527
+ "things": 399,
528
+ "bl": 400,
529
+ "excited": 401,
530
+ "your": 402,
531
+ "liked": 403,
532
+ "got": 404,
533
+ "##ny": 405,
534
+ "##uck": 406,
535
+ "##king": 407,
536
+ "##ge": 408,
537
+ "##ided": 409,
538
+ "decided": 410,
539
+ "came": 411,
540
+ "my": 412,
541
+ "dog": 413,
542
+ "this": 414,
543
+ "scared": 415,
544
+ "##ust": 416,
545
+ "down": 417,
546
+ "bec": 418,
547
+ "##ouse": 419,
548
+ "ab": 420,
549
+ "find": 421,
550
+ "pa": 422,
551
+ "care": 423,
552
+ "will": 424,
553
+ "po": 425,
554
+ "sara": 426,
555
+ "gr": 427,
556
+ "feel": 428,
557
+ "anna": 429,
558
+ "##ell": 430,
559
+ "max": 431,
560
+ "##as": 432,
561
+ "##ist": 433,
562
+ "bu": 434,
563
+ "##arn": 435,
564
+ "##ave": 436,
565
+ "##nder": 437,
566
+ "##ways": 438,
567
+ "##ess": 439,
568
+ "always": 440,
569
+ "mo": 441,
570
+ "took": 442,
571
+ "about": 443,
572
+ "##ers": 444,
573
+ "lot": 445,
574
+ "##ook": 446,
575
+ "toys": 447,
576
+ "kne": 448,
577
+ "didn": 449,
578
+ "##ged": 450,
579
+ "##ise": 451,
580
+ "##bb": 452,
581
+ "old": 453,
582
+ "outside": 454,
583
+ "tree": 455,
584
+ "ho": 456,
585
+ "##ite": 457,
586
+ "how": 458,
587
+ "##ally": 459,
588
+ "ball": 460,
589
+ "thought": 461,
590
+ "##ant": 462,
591
+ "sor": 463,
592
+ "af": 464,
593
+ "more": 465,
594
+ "##ma": 466,
595
+ "##eci": 467,
596
+ "##ened": 468,
597
+ "##ched": 469,
598
+ "learn": 470,
599
+ "##ret": 471,
600
+ "pu": 472,
601
+ "tw": 473,
602
+ "know": 474,
603
+ "cat": 475,
604
+ "##to": 476,
605
+ "take": 477,
606
+ "don": 478,
607
+ "mi": 479,
608
+ "laug": 480,
609
+ "pe": 481,
610
+ "speci": 482,
611
+ "special": 483,
612
+ "sudd": 484,
613
+ "sudden": 485,
614
+ "inside": 486,
615
+ "knew": 487,
616
+ "##ty": 488,
617
+ "sorry": 489,
618
+ "##ive": 490,
619
+ "##ro": 491,
620
+ "any": 492,
621
+ "jack": 493,
622
+ "##ff": 494,
623
+ "suddenly": 495,
624
+ "just": 496,
625
+ "toy": 497,
626
+ "##ue": 498,
627
+ "after": 499,
628
+ "tr": 500,
629
+ "if": 501,
630
+ "show": 502,
631
+ "##ink": 503,
632
+ "##lly": 504,
633
+ "or": 505,
634
+ "run": 506,
635
+ "ra": 507,
636
+ "sl": 508,
637
+ "much": 509,
638
+ "##ish": 510,
639
+ "hand": 511,
640
+ "yes": 512,
641
+ "house": 513,
642
+ "sun": 514,
643
+ "op": 515,
644
+ "sk": 516,
645
+ "clo": 517,
646
+ "into": 518,
647
+ "fin": 519,
648
+ "tried": 520,
649
+ "en": 521,
650
+ "water": 522,
651
+ "##ate": 523,
652
+ "told": 524,
653
+ "each": 525,
654
+ "##ea": 526,
655
+ "over": 527,
656
+ "proud": 528,
657
+ "##ump": 529,
658
+ "gave": 530,
659
+ "heard": 531,
660
+ "never": 532,
661
+ "ok": 533,
662
+ "##dy": 534,
663
+ "##by": 535,
664
+ "thank": 536,
665
+ "##use": 537,
666
+ "eat": 538,
667
+ "room": 539,
668
+ "expl": 540,
669
+ "##other": 541,
670
+ "pick": 542,
671
+ "pret": 543,
672
+ "qu": 544,
673
+ "##lled": 545,
674
+ "gre": 546,
675
+ "come": 547,
676
+ "couldn": 548,
677
+ "played": 549,
678
+ "##ion": 550,
679
+ "mia": 551,
680
+ "sha": 552,
681
+ "##ause": 553,
682
+ "because": 554,
683
+ "wat": 555,
684
+ "hugged": 556,
685
+ "##ious": 557,
686
+ "off": 558,
687
+ "bear": 559,
688
+ "now": 560,
689
+ "com": 561,
690
+ "nice": 562,
691
+ "fo": 563,
692
+ "##oth": 564,
693
+ "box": 565,
694
+ "##our": 566,
695
+ "str": 567,
696
+ "need": 568,
697
+ "bet": 569,
698
+ "many": 570,
699
+ "##ile": 571,
700
+ "##fe": 572,
701
+ "##ft": 573,
702
+ "small": 574,
703
+ "##eep": 575,
704
+ "long": 576,
705
+ "##ving": 577,
706
+ "##kes": 578,
707
+ "##sed": 579,
708
+ "##gry": 580,
709
+ "anim": 581,
710
+ "animal": 582,
711
+ "try": 583,
712
+ "end": 584,
713
+ "##ough": 585,
714
+ "unt": 586,
715
+ "##cy": 587,
716
+ "##ild": 588,
717
+ "until": 589,
718
+ "even": 590,
719
+ "##urt": 591,
720
+ "##ort": 592,
721
+ "##elf": 593,
722
+ "soon": 594,
723
+ "##iz": 595,
724
+ "kind": 596,
725
+ "love": 597,
726
+ "learned": 598,
727
+ "bea": 599,
728
+ "everyone": 600,
729
+ "by": 601,
730
+ "ad": 602,
731
+ "better": 603,
732
+ "flow": 604,
733
+ "spot": 605,
734
+ "##ine": 606,
735
+ "best": 607,
736
+ "##urp": 608,
737
+ "say": 609,
738
+ "##mp": 610,
739
+ "##ady": 611,
740
+ "##ream": 612,
741
+ "cle": 613,
742
+ "##urn": 614,
743
+ "##ace": 615,
744
+ "##ves": 616,
745
+ "fi": 617,
746
+ "gard": 618,
747
+ "garden": 619,
748
+ "fast": 620,
749
+ "its": 621,
750
+ "che": 622,
751
+ "careful": 623,
752
+ "##ber": 624,
753
+ "beaut": 625,
754
+ "##ies": 626,
755
+ "bra": 627,
756
+ "thanked": 628,
757
+ "sky": 629,
758
+ "laughed": 630,
759
+ "jump": 631,
760
+ "gl": 632,
761
+ "loud": 633,
762
+ "sn": 634,
763
+ "list": 635,
764
+ "ow": 636,
765
+ "##ear": 637,
766
+ "##ct": 638,
767
+ "##iny": 639,
768
+ "wo": 640,
769
+ "beauti": 641,
770
+ "##sh": 642,
771
+ "##lew": 643,
772
+ "lots": 644,
773
+ "beautiful": 645,
774
+ "hard": 646,
775
+ "still": 647,
776
+ "animals": 648,
777
+ "fam": 649,
778
+ "joh": 650,
779
+ "under": 651,
780
+ "john": 652,
781
+ "lu": 653,
782
+ "mum": 654,
783
+ "stay": 655,
784
+ "hurt": 656,
785
+ "##ning": 657,
786
+ "both": 658,
787
+ "dan": 659,
788
+ "##ree": 660,
789
+ "##self": 661,
790
+ "way": 662,
791
+ "rem": 663,
792
+ "two": 664,
793
+ "##hes": 665,
794
+ "bad": 666,
795
+ "safe": 667,
796
+ "col": 668,
797
+ "##ool": 669,
798
+ "##be": 670,
799
+ "di": 671,
800
+ "red": 672,
801
+ "book": 673,
802
+ "imp": 674,
803
+ "tow": 675,
804
+ "##ople": 676,
805
+ "##ane": 677,
806
+ "lived": 678,
807
+ "people": 679,
808
+ "okay": 680,
809
+ "##em": 681,
810
+ "lucy": 682,
811
+ "walked": 683,
812
+ "should": 684,
813
+ "brave": 685,
814
+ "surp": 686,
815
+ "surpr": 687,
816
+ "##ase": 688,
817
+ "##ock": 689,
818
+ "##ished": 690,
819
+ "angry": 691,
820
+ "family": 692,
821
+ "adv": 693,
822
+ "##ress": 694,
823
+ "##igh": 695,
824
+ "flew": 696,
825
+ "called": 697,
826
+ "stor": 698,
827
+ "##ept": 699,
828
+ "##ip": 700,
829
+ "##eet": 701,
830
+ "sure": 702,
831
+ "##led": 703,
832
+ "fore": 704,
833
+ "kept": 705,
834
+ "fly": 706,
835
+ "share": 707,
836
+ "##ect": 708,
837
+ "##fore": 709,
838
+ "before": 710,
839
+ "##ger": 711,
840
+ "##art": 712,
841
+ "while": 713,
842
+ "##xt": 714,
843
+ "##ised": 715,
844
+ "##dded": 716,
845
+ "pic": 717,
846
+ "keep": 718,
847
+ "door": 719,
848
+ "going": 720,
849
+ "pretty": 721,
850
+ "rock": 722,
851
+ "clean": 723,
852
+ "dra": 724,
853
+ "next": 725,
854
+ "why": 726,
855
+ "may": 727,
856
+ "advent": 728,
857
+ "##ied": 729,
858
+ "##ary": 730,
859
+ "give": 731,
860
+ "noise": 732,
861
+ "con": 733,
862
+ "opened": 734,
863
+ "shiny": 735,
864
+ "far": 736,
865
+ "un": 737,
866
+ "doll": 738,
867
+ "wind": 739,
868
+ "id": 740,
869
+ "cry": 741,
870
+ "grand": 742,
871
+ "real": 743,
872
+ "##illy": 744,
873
+ "##end": 745,
874
+ "sto": 746,
875
+ "##so": 747,
876
+ "turn": 748,
877
+ "explore": 749,
878
+ "ground": 750,
879
+ "also": 751,
880
+ "##les": 752,
881
+ "ey": 753,
882
+ "##ner": 754,
883
+ "idea": 755,
884
+ "color": 756,
885
+ "##ap": 757,
886
+ "where": 758,
887
+ "nodded": 759,
888
+ "war": 760,
889
+ "feeling": 761,
890
+ "blue": 762,
891
+ "bob": 763,
892
+ "picked": 764,
893
+ "##imb": 765,
894
+ "walking": 766,
895
+ "clos": 767,
896
+ "##thing": 768,
897
+ "climb": 769,
898
+ "thr": 770,
899
+ "##ting": 771,
900
+ "has": 772,
901
+ "ple": 773,
902
+ "wait": 774,
903
+ "bed": 775,
904
+ "smile": 776,
905
+ "adventure": 777,
906
+ "##oy": 778,
907
+ "being": 779,
908
+ "##th": 780,
909
+ "maybe": 781,
910
+ "finally": 782,
911
+ "looking": 783,
912
+ "da": 784,
913
+ "##iced": 785,
914
+ "##ture": 786,
915
+ "diff": 787,
916
+ "wr": 788,
917
+ "##and": 789,
918
+ "food": 790,
919
+ "listen": 791,
920
+ "##joy": 792,
921
+ "picture": 793,
922
+ "think": 794,
923
+ "remem": 795,
924
+ "del": 796,
925
+ "truck": 797,
926
+ "tra": 798,
927
+ "eyes": 799,
928
+ "##ught": 800,
929
+ "bro": 801,
930
+ "gi": 802,
931
+ "repl": 803,
932
+ "stopped": 804,
933
+ "here": 805,
934
+ "great": 806,
935
+ "bre": 807,
936
+ "ru": 808,
937
+ "vo": 809,
938
+ "walk": 810,
939
+ "##qu": 811,
940
+ "year": 812,
941
+ "enjoy": 813,
942
+ "wonder": 814,
943
+ "remember": 815,
944
+ "##able": 816,
945
+ "hands": 817,
946
+ "sue": 818,
947
+ "quick": 819,
948
+ "##per": 820,
949
+ "ever": 821,
950
+ "cur": 822,
951
+ "head": 823,
952
+ "flowers": 824,
953
+ "import": 825,
954
+ "forest": 826,
955
+ "##og": 827,
956
+ "ac": 828,
957
+ "noticed": 829,
958
+ "near": 830,
959
+ "##bbit": 831,
960
+ "app": 832,
961
+ "dis": 833,
962
+ "##ized": 834,
963
+ "watch": 835,
964
+ "##irst": 836,
965
+ "rabbit": 837,
966
+ "fish": 838,
967
+ "##llow": 839,
968
+ "important": 840,
969
+ "##age": 841,
970
+ "ama": 842,
971
+ "us": 843,
972
+ "rain": 844,
973
+ "sound": 845,
974
+ "slide": 846,
975
+ "bun": 847,
976
+ "amaz": 848,
977
+ "replied": 849,
978
+ "stop": 850,
979
+ "work": 851,
980
+ "follow": 852,
981
+ "mor": 853,
982
+ "showed": 854,
983
+ "tal": 855,
984
+ "mean": 856,
985
+ "tou": 857,
986
+ "##ces": 858,
987
+ "##gan": 859,
988
+ "right": 860,
989
+ "sarah": 861,
990
+ "ask": 862,
991
+ "##bbed": 863,
992
+ "differ": 864,
993
+ "mag": 865,
994
+ "goodby": 866,
995
+ "our": 867,
996
+ "bright": 868,
997
+ "##day": 869,
998
+ "watched": 870,
999
+ "goodbye": 871,
1000
+ "hop": 872,
1001
+ "please": 873,
1002
+ "use": 874,
1003
+ "strong": 875,
1004
+ "am": 876,
1005
+ "stick": 877,
1006
+ "quickly": 878,
1007
+ "daddy": 879,
1008
+ "been": 880,
1009
+ "voice": 881,
1010
+ "yell": 882,
1011
+ "##ath": 883,
1012
+ "jane": 884,
1013
+ "different": 885,
1014
+ "child": 886,
1015
+ "boat": 887,
1016
+ "##llo": 888,
1017
+ "##co": 889,
1018
+ "##che": 890,
1019
+ "hello": 891,
1020
+ "first": 892,
1021
+ "became": 893,
1022
+ "face": 894,
1023
+ "place": 895,
1024
+ "##ange": 896,
1025
+ "does": 897,
1026
+ "high": 898,
1027
+ "##ng": 899,
1028
+ "store": 900,
1029
+ "closer": 901,
1030
+ "curious": 902,
1031
+ "##ak": 903,
1032
+ "warm": 904,
1033
+ "sand": 905,
1034
+ "dress": 906,
1035
+ "bel": 907,
1036
+ "##ummy": 908,
1037
+ "joe": 909,
1038
+ "cook": 910,
1039
+ "tell": 911,
1040
+ "##ila": 912,
1041
+ "forg": 913,
1042
+ "em": 914,
1043
+ "three": 915,
1044
+ "fav": 916,
1045
+ "grandma": 917,
1046
+ "##oon": 918,
1047
+ "##ount": 919,
1048
+ "open": 920,
1049
+ "lila": 921,
1050
+ "##leep": 922,
1051
+ "night": 923,
1052
+ "cake": 924,
1053
+ "block": 925,
1054
+ "##more": 926,
1055
+ "pie": 927,
1056
+ "anymore": 928,
1057
+ "bunny": 929,
1058
+ "butter": 930,
1059
+ "mon": 931,
1060
+ "##iss": 932,
1061
+ "##ached": 933,
1062
+ "lea": 934,
1063
+ "sweet": 935,
1064
+ "##ired": 936,
1065
+ "kid": 937,
1066
+ "##ull": 938,
1067
+ "only": 939,
1068
+ "flo": 940,
1069
+ "kit": 941,
1070
+ "pain": 942,
1071
+ "grabbed": 943,
1072
+ "duck": 944,
1073
+ "##isy": 945,
1074
+ "fell": 946,
1075
+ "birds": 947,
1076
+ "fire": 948,
1077
+ "cont": 949,
1078
+ "##ered": 950,
1079
+ "jumped": 951,
1080
+ "glad": 952,
1081
+ "pet": 953,
1082
+ "##here": 954,
1083
+ "bit": 955,
1084
+ "per": 956,
1085
+ "##chen": 957,
1086
+ "kitchen": 958,
1087
+ "cra": 959,
1088
+ "helped": 960,
1089
+ "sees": 961,
1090
+ "dr": 962,
1091
+ "##outed": 963,
1092
+ "hear": 964,
1093
+ "sing": 965,
1094
+ "##ul": 966,
1095
+ "prin": 967,
1096
+ "happened": 968,
1097
+ "squ": 969,
1098
+ "yummy": 970,
1099
+ "grass": 971,
1100
+ "tri": 972,
1101
+ "ready": 973,
1102
+ "tommy": 974,
1103
+ "story": 975,
1104
+ "shouted": 976,
1105
+ "beh": 977,
1106
+ "##nts": 978,
1107
+ "really": 979,
1108
+ "brother": 980,
1109
+ "cr": 981,
1110
+ "reached": 982,
1111
+ "realized": 983,
1112
+ "lady": 984,
1113
+ "##ey": 985,
1114
+ "draw": 986,
1115
+ "having": 987,
1116
+ "##ins": 988,
1117
+ "through": 989,
1118
+ "favor": 990,
1119
+ "mess": 991,
1120
+ "cre": 992,
1121
+ "hat": 993,
1122
+ "favorite": 994,
1123
+ "game": 995,
1124
+ "less": 996,
1125
+ "ate": 997,
1126
+ "underst": 998,
1127
+ "soft": 999,
1128
+ "pare": 1000,
1129
+ "##zy": 1001,
1130
+ "thing": 1002,
1131
+ "##ket": 1003,
1132
+ "##ather": 1004,
1133
+ "##imes": 1005,
1134
+ "cu": 1006,
1135
+ "magic": 1007,
1136
+ "pretend": 1008,
1137
+ "began": 1009,
1138
+ "##where": 1010,
1139
+ "world": 1011,
1140
+ "looks": 1012,
1141
+ "##ken": 1013,
1142
+ "butterf": 1014,
1143
+ "himself": 1015,
1144
+ "kids": 1016,
1145
+ "##fully": 1017,
1146
+ "making": 1018,
1147
+ "done": 1019,
1148
+ "wow": 1020,
1149
+ "cut": 1021,
1150
+ "sometimes": 1022,
1151
+ "rest": 1023
1152
  }
1153
  }
1154
  }
tokenizer_config.json CHANGED
@@ -9,7 +9,7 @@
9
  "special": true
10
  },
11
  "1": {
12
- "content": "[CLS]",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
@@ -17,15 +17,7 @@
17
  "special": true
18
  },
19
  "2": {
20
- "content": "[SEP]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "3": {
28
- "content": "[PAD]",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
 
9
  "special": true
10
  },
11
  "1": {
12
+ "content": "[BOS]",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
 
17
  "special": true
18
  },
19
  "2": {
20
+ "content": "[EOS]",
 
 
 
 
 
 
 
 
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,