tdooms commited on
Commit
9bdfd4e
1 Parent(s): e62e286

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -1
  2. tokenizer.json +710 -725
  3. tokenizer_config.json +31 -31
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {}
 
1
+ {}
tokenizer.json CHANGED
@@ -75,12 +75,6 @@
75
  "id": "A",
76
  "type_id": 0
77
  }
78
- },
79
- {
80
- "SpecialToken": {
81
- "id": "[EOS]",
82
- "type_id": 0
83
- }
84
  }
85
  ],
86
  "pair": [
@@ -106,15 +100,6 @@
106
  "tokens": [
107
  "[BOS]"
108
  ]
109
- },
110
- "[EOS]": {
111
- "id": "[EOS]",
112
- "ids": [
113
- 2
114
- ],
115
- "tokens": [
116
- "[EOS]"
117
- ]
118
  }
119
  }
120
  },
@@ -195,32 +180,32 @@
195
  "|": 67,
196
  "}": 68,
197
  "~": 69,
198
- "##e": 70,
199
- "##n": 71,
200
- "##t": 72,
201
- "##l": 73,
202
- "##a": 74,
203
- "##m": 75,
204
- "##s": 76,
205
- "##u": 77,
206
- "##k": 78,
207
- "##o": 79,
208
- "##r": 80,
209
- "##d": 81,
210
- "##f": 82,
211
- "##b": 83,
212
  "##y": 84,
213
- "##i": 85,
214
- "##p": 86,
215
- "##v": 87,
216
- "##g": 88,
217
  "##h": 89,
218
- "##c": 90,
219
  "##w": 91,
220
- "##z": 92,
221
- "##q": 93,
222
- "##j": 94,
223
- "##x": 95,
224
  "##he": 96,
225
  "the": 97,
226
  "##nd": 98,
@@ -250,8 +235,8 @@
250
  "##en": 122,
251
  "##is": 123,
252
  "##an": 124,
253
- "th": 125,
254
- "on": 126,
255
  "##or": 127,
256
  "##im": 128,
257
  "##on": 129,
@@ -272,28 +257,28 @@
272
  "said": 144,
273
  "be": 145,
274
  "##ily": 146,
275
- "st": 147,
276
- "##ig": 148,
277
- "tim": 149,
278
  "so": 150,
279
  "##ce": 151,
280
  "##pp": 152,
281
- "wit": 153,
282
- "his": 154,
283
  "with": 155,
284
  "mom": 156,
285
  "##ve": 157,
286
  "lily": 158,
287
  "of": 159,
288
- "##ked": 160,
289
- "fr": 161,
290
- "that": 162,
291
- "pl": 163,
292
- "##ery": 164,
293
- "##am": 165,
294
  "##ad": 166,
295
- "##ke": 167,
296
- "##nt": 168,
297
  "but": 169,
298
  "day": 170,
299
  "up": 171,
@@ -303,57 +288,57 @@
303
  "##el": 175,
304
  "wh": 176,
305
  "for": 177,
306
- "##st": 178,
307
- "##un": 179,
308
- "##ould": 180,
309
- "an": 181,
310
- "##my": 182,
311
- "##ent": 183,
312
  "li": 184,
313
  "##ra": 185,
314
  "##ch": 186,
315
- "do": 187,
316
- "happ": 188,
317
- "one": 189,
318
- "sh": 190,
319
- "want": 191,
320
- "##itt": 192,
321
  "##her": 193,
322
- "not": 194,
323
- "there": 195,
324
- "##ly": 196,
325
  "##ome": 197,
326
- "very": 198,
327
- "##se": 199,
328
  "##ound": 200,
329
- "as": 201,
330
- "ba": 202,
331
- "litt": 203,
332
- "little": 204,
333
- "ne": 205,
334
- "time": 206,
335
  "##ht": 207,
336
  "##al": 208,
337
  "ma": 209,
338
- "sm": 210,
339
- "happy": 211,
340
- "is": 212,
341
- "big": 213,
342
- "loo": 214,
343
- "saw": 215,
344
- "##iend": 216,
345
- "friend": 217,
346
  "##ry": 218,
347
  "re": 219,
348
  "bo": 220,
349
  "##ur": 221,
350
  "##ter": 222,
351
- "##ug": 223,
352
- "##ere": 224,
353
- "##ved": 225,
354
  "lo": 226,
355
- "were": 227,
356
- "once": 228,
357
  "##ore": 229,
358
  "se": 230,
359
  "ev": 231,
@@ -361,794 +346,794 @@
361
  "sp": 233,
362
  "him": 234,
363
  "too": 235,
364
- "ca": 236,
365
- "##ide": 237,
366
  "we": 238,
367
- "are": 239,
368
- "tom": 240,
369
- "at": 241,
370
- "can": 242,
371
- "##irl": 243,
372
- "##ard": 244,
373
- "wanted": 245,
374
- "whe": 246,
375
- "ben": 247,
376
- "##ill": 248,
377
- "girl": 249,
378
- "upon": 250,
379
- "##ec": 251,
380
- "their": 252,
381
- "out": 253,
382
  "them": 254,
383
- "##ys": 255,
384
- "##fu": 256,
385
- "##way": 257,
386
  "did": 258,
387
- "smil": 259,
388
- "##ind": 260,
389
- "could": 261,
390
- "have": 262,
391
  "no": 263,
392
- "##ri": 264,
393
  "##ted": 265,
394
  "##ver": 266,
395
  "##ain": 267,
396
  "ex": 268,
397
- "##hed": 269,
398
- "all": 270,
399
- "ar": 271,
400
- "went": 272,
401
- "hel": 273,
402
  "su": 274,
403
- "##ic": 275,
404
- "when": 276,
405
- "ta": 277,
406
  "help": 278,
407
- "##ful": 279,
408
- "##ood": 280,
409
- "friends": 281,
410
- "##hing": 282,
411
- "nam": 283,
412
  "##ight": 284,
413
- "what": 285,
414
- "kn": 286,
415
  "le": 287,
416
  "##um": 288,
417
- "back": 289,
418
- "##ark": 290,
419
  "##one": 291,
420
  "cl": 292,
421
  "from": 293,
422
- "fun": 294,
423
- "al": 295,
424
- "then": 296,
425
- "##all": 297,
426
- "timmy": 298,
427
- "ro": 299,
428
- "star": 300,
429
  "every": 301,
430
- "sc": 302,
431
- "smiled": 303,
432
- "named": 304,
433
- "##oug": 305,
434
- "loved": 306,
435
  "##side": 307,
436
- "man": 308,
437
- "some": 309,
438
- "##elt": 310,
439
- "asked": 311,
440
- "see": 312,
441
- "me": 313,
442
- "##ick": 314,
443
  "like": 315,
444
  "fe": 316,
445
- "##op": 317,
446
- "felt": 318,
447
  "br": 319,
448
  "looked": 320,
449
  "around": 321,
450
- "look": 322,
451
  "##ep": 323,
452
- "##ame": 324,
453
- "##omet": 325,
454
- "would": 326,
455
- "bir": 327,
456
- "somet": 328,
457
- "##get": 329,
458
- "fa": 330,
459
- "##ong": 331,
460
- "boy": 332,
461
- "##ss": 333,
462
- "bird": 334,
463
  "pr": 335,
464
- "##dd": 336,
465
- "jo": 337,
466
  "##est": 338,
467
  "##ings": 339,
468
  "ag": 340,
469
- "wor": 341,
470
- "mommy": 342,
471
- "make": 343,
472
- "##ade": 344,
473
  "than": 345,
474
- "car": 346,
475
- "##own": 347,
476
- "tre": 348,
477
- "ran": 349,
478
- "##gether": 350,
479
- "together": 351,
480
- "la": 352,
481
- "away": 353,
482
  "dad": 354,
483
- "says": 355,
484
- "started": 356,
485
- "##ice": 357,
486
- "##oud": 358,
487
- "something": 359,
488
- "##ared": 360,
489
- "made": 361,
490
  "co": 362,
491
  "fl": 363,
492
- "##ther": 364,
493
- "##ited": 365,
494
- "park": 366,
495
- "sad": 367,
496
  "good": 368,
497
  "##ack": 369,
498
- "other": 370,
499
- "ch": 371,
500
- "exc": 372,
501
- "new": 373,
502
  "put": 374,
503
  "who": 375,
504
  "##out": 376,
505
  "let": 377,
506
- "hug": 378,
507
- "mu": 379,
508
- "##ble": 380,
509
- "again": 381,
510
- "home": 382,
511
- "sam": 383,
512
- "found": 384,
513
  "dec": 385,
514
- "##pped": 386,
515
- "##ried": 387,
516
- "wal": 388,
517
  "##ure": 389,
518
  "get": 390,
519
- "##ach": 391,
520
- "playing": 392,
521
- "##ought": 393,
522
- "gra": 394,
523
- "##na": 395,
524
- "##pl": 396,
525
- "sw": 397,
526
  "##ous": 398,
527
- "things": 399,
528
- "bl": 400,
529
- "excited": 401,
530
- "your": 402,
531
- "liked": 403,
532
- "got": 404,
533
- "##ny": 405,
534
- "##uck": 406,
535
- "##king": 407,
536
  "##ge": 408,
537
  "##ided": 409,
538
  "decided": 410,
539
  "came": 411,
540
  "my": 412,
541
- "dog": 413,
542
- "this": 414,
543
  "scared": 415,
544
  "##ust": 416,
545
  "down": 417,
546
- "bec": 418,
547
  "##ouse": 419,
548
  "ab": 420,
549
  "find": 421,
550
- "pa": 422,
551
- "care": 423,
552
- "will": 424,
553
- "po": 425,
554
- "sara": 426,
555
- "gr": 427,
556
- "feel": 428,
557
- "anna": 429,
558
  "##ell": 430,
559
- "max": 431,
560
- "##as": 432,
561
- "##ist": 433,
562
- "bu": 434,
563
- "##arn": 435,
564
  "##ave": 436,
565
- "##nder": 437,
566
- "##ways": 438,
567
- "##ess": 439,
568
- "always": 440,
569
- "mo": 441,
570
- "took": 442,
571
  "about": 443,
572
- "##ers": 444,
573
- "lot": 445,
574
- "##ook": 446,
575
  "toys": 447,
576
- "kne": 448,
577
- "didn": 449,
578
- "##ged": 450,
579
- "##ise": 451,
580
- "##bb": 452,
581
- "old": 453,
582
- "outside": 454,
583
- "tree": 455,
584
- "ho": 456,
585
- "##ite": 457,
586
  "how": 458,
587
- "##ally": 459,
588
- "ball": 460,
589
  "thought": 461,
590
- "##ant": 462,
591
- "sor": 463,
592
- "af": 464,
593
- "more": 465,
594
- "##ma": 466,
595
- "##eci": 467,
596
- "##ened": 468,
597
- "##ched": 469,
598
  "learn": 470,
599
  "##ret": 471,
600
- "pu": 472,
601
- "tw": 473,
602
- "know": 474,
603
- "cat": 475,
604
  "##to": 476,
605
  "take": 477,
606
- "don": 478,
607
- "mi": 479,
608
  "laug": 480,
609
- "pe": 481,
610
  "speci": 482,
611
- "special": 483,
612
- "sudd": 484,
613
  "sudden": 485,
614
- "inside": 486,
615
- "knew": 487,
616
- "##ty": 488,
617
- "sorry": 489,
618
- "##ive": 490,
619
- "##ro": 491,
620
- "any": 492,
621
- "jack": 493,
622
- "##ff": 494,
623
- "suddenly": 495,
624
- "just": 496,
625
- "toy": 497,
626
- "##ue": 498,
627
- "after": 499,
628
- "tr": 500,
629
- "if": 501,
630
  "show": 502,
631
- "##ink": 503,
632
- "##lly": 504,
633
- "or": 505,
634
- "run": 506,
635
- "ra": 507,
636
- "sl": 508,
637
- "much": 509,
638
  "##ish": 510,
639
  "hand": 511,
640
- "yes": 512,
641
- "house": 513,
642
- "sun": 514,
643
  "op": 515,
644
  "sk": 516,
645
  "clo": 517,
646
- "into": 518,
647
  "fin": 519,
648
- "tried": 520,
649
- "en": 521,
650
- "water": 522,
651
  "##ate": 523,
652
- "told": 524,
653
- "each": 525,
654
  "##ea": 526,
655
- "over": 527,
656
- "proud": 528,
657
- "##ump": 529,
658
- "gave": 530,
659
- "heard": 531,
660
  "never": 532,
661
- "ok": 533,
662
- "##dy": 534,
663
- "##by": 535,
664
- "thank": 536,
665
- "##use": 537,
666
- "eat": 538,
667
- "room": 539,
668
- "expl": 540,
669
- "##other": 541,
670
- "pick": 542,
671
- "pret": 543,
672
- "qu": 544,
673
- "##lled": 545,
674
- "gre": 546,
675
- "come": 547,
676
- "couldn": 548,
677
- "played": 549,
678
- "##ion": 550,
679
- "mia": 551,
680
- "sha": 552,
681
- "##ause": 553,
682
- "because": 554,
683
- "wat": 555,
684
- "hugged": 556,
685
  "##ious": 557,
686
  "off": 558,
687
- "bear": 559,
688
  "now": 560,
689
  "com": 561,
690
- "nice": 562,
691
  "fo": 563,
692
- "##oth": 564,
693
- "box": 565,
694
- "##our": 566,
695
- "str": 567,
696
- "need": 568,
697
- "bet": 569,
698
- "many": 570,
699
- "##ile": 571,
700
- "##fe": 572,
701
  "##ft": 573,
702
  "small": 574,
703
- "##eep": 575,
704
- "long": 576,
705
  "##ving": 577,
706
- "##kes": 578,
707
- "##sed": 579,
708
- "##gry": 580,
709
- "anim": 581,
710
- "animal": 582,
711
  "try": 583,
712
- "end": 584,
713
- "##ough": 585,
714
- "unt": 586,
715
- "##cy": 587,
716
- "##ild": 588,
717
- "until": 589,
718
- "even": 590,
719
- "##urt": 591,
720
- "##ort": 592,
721
- "##elf": 593,
722
- "soon": 594,
723
- "##iz": 595,
724
- "kind": 596,
725
- "love": 597,
726
- "learned": 598,
727
- "bea": 599,
728
- "everyone": 600,
729
- "by": 601,
730
  "ad": 602,
731
- "better": 603,
732
- "flow": 604,
733
  "spot": 605,
734
- "##ine": 606,
735
  "best": 607,
736
- "##urp": 608,
737
- "say": 609,
738
- "##mp": 610,
739
- "##ady": 611,
740
  "##ream": 612,
741
- "cle": 613,
742
- "##urn": 614,
743
- "##ace": 615,
744
- "##ves": 616,
745
- "fi": 617,
746
  "gard": 618,
747
  "garden": 619,
748
- "fast": 620,
749
- "its": 621,
750
- "che": 622,
751
  "careful": 623,
752
- "##ber": 624,
753
- "beaut": 625,
754
  "##ies": 626,
755
  "bra": 627,
756
- "thanked": 628,
757
- "sky": 629,
758
  "laughed": 630,
759
  "jump": 631,
760
  "gl": 632,
761
  "loud": 633,
762
- "sn": 634,
763
- "list": 635,
764
- "ow": 636,
765
- "##ear": 637,
766
- "##ct": 638,
767
- "##iny": 639,
768
- "wo": 640,
769
- "beauti": 641,
770
- "##sh": 642,
771
- "##lew": 643,
772
- "lots": 644,
773
- "beautiful": 645,
774
  "hard": 646,
775
- "still": 647,
776
- "animals": 648,
777
- "fam": 649,
778
- "joh": 650,
779
- "under": 651,
780
- "john": 652,
781
- "lu": 653,
782
- "mum": 654,
783
- "stay": 655,
784
- "hurt": 656,
785
- "##ning": 657,
786
  "both": 658,
787
  "dan": 659,
788
- "##ree": 660,
789
- "##self": 661,
790
  "way": 662,
791
- "rem": 663,
792
- "two": 664,
793
- "##hes": 665,
794
- "bad": 666,
795
- "safe": 667,
796
  "col": 668,
797
- "##ool": 669,
798
- "##be": 670,
799
  "di": 671,
800
- "red": 672,
801
- "book": 673,
802
- "imp": 674,
803
- "tow": 675,
804
- "##ople": 676,
805
- "##ane": 677,
806
- "lived": 678,
807
  "people": 679,
808
- "okay": 680,
809
- "##em": 681,
810
- "lucy": 682,
811
- "walked": 683,
812
- "should": 684,
813
- "brave": 685,
814
- "surp": 686,
815
- "surpr": 687,
816
- "##ase": 688,
817
- "##ock": 689,
818
- "##ished": 690,
819
- "angry": 691,
820
- "family": 692,
821
- "adv": 693,
822
  "##ress": 694,
823
- "##igh": 695,
824
- "flew": 696,
825
- "called": 697,
826
- "stor": 698,
827
- "##ept": 699,
828
  "##ip": 700,
829
- "##eet": 701,
830
- "sure": 702,
831
- "##led": 703,
832
- "fore": 704,
833
- "kept": 705,
834
- "fly": 706,
835
- "share": 707,
836
- "##ect": 708,
837
- "##fore": 709,
838
- "before": 710,
839
- "##ger": 711,
840
- "##art": 712,
841
- "while": 713,
842
- "##xt": 714,
843
- "##ised": 715,
844
- "##dded": 716,
845
- "pic": 717,
846
  "keep": 718,
847
- "door": 719,
848
- "going": 720,
849
- "pretty": 721,
850
- "rock": 722,
851
  "clean": 723,
852
- "dra": 724,
853
  "next": 725,
854
- "why": 726,
855
- "may": 727,
856
- "advent": 728,
857
- "##ied": 729,
858
  "##ary": 730,
859
- "give": 731,
860
- "noise": 732,
861
- "con": 733,
862
- "opened": 734,
863
- "shiny": 735,
864
- "far": 736,
865
- "un": 737,
866
- "doll": 738,
867
  "wind": 739,
868
- "id": 740,
869
  "cry": 741,
870
- "grand": 742,
871
- "real": 743,
872
- "##illy": 744,
873
- "##end": 745,
874
- "sto": 746,
875
- "##so": 747,
876
- "turn": 748,
877
  "explore": 749,
878
- "ground": 750,
879
- "also": 751,
880
  "##les": 752,
881
- "ey": 753,
882
- "##ner": 754,
883
  "idea": 755,
884
  "color": 756,
885
- "##ap": 757,
886
- "where": 758,
887
- "nodded": 759,
888
- "war": 760,
889
- "feeling": 761,
890
- "blue": 762,
891
- "bob": 763,
892
- "picked": 764,
893
- "##imb": 765,
894
  "walking": 766,
895
- "clos": 767,
896
  "##thing": 768,
897
- "climb": 769,
898
- "thr": 770,
899
- "##ting": 771,
900
- "has": 772,
901
  "ple": 773,
902
  "wait": 774,
903
- "bed": 775,
904
- "smile": 776,
905
- "adventure": 777,
906
  "##oy": 778,
907
- "being": 779,
908
  "##th": 780,
909
- "maybe": 781,
910
- "finally": 782,
911
  "looking": 783,
912
  "da": 784,
913
- "##iced": 785,
914
  "##ture": 786,
915
  "diff": 787,
916
  "wr": 788,
917
- "##and": 789,
918
- "food": 790,
919
- "listen": 791,
920
- "##joy": 792,
921
  "picture": 793,
922
- "think": 794,
923
- "remem": 795,
924
  "del": 796,
925
- "truck": 797,
926
- "tra": 798,
927
- "eyes": 799,
928
  "##ught": 800,
929
- "bro": 801,
930
- "gi": 802,
931
- "repl": 803,
932
- "stopped": 804,
933
- "here": 805,
934
- "great": 806,
935
- "bre": 807,
936
- "ru": 808,
937
- "vo": 809,
938
- "walk": 810,
939
- "##qu": 811,
940
- "year": 812,
941
- "enjoy": 813,
942
- "wonder": 814,
943
- "remember": 815,
944
- "##able": 816,
945
- "hands": 817,
946
- "sue": 818,
947
- "quick": 819,
948
- "##per": 820,
949
- "ever": 821,
950
- "cur": 822,
951
- "head": 823,
952
  "flowers": 824,
953
- "import": 825,
954
- "forest": 826,
955
- "##og": 827,
956
- "ac": 828,
957
- "noticed": 829,
958
- "near": 830,
959
- "##bbit": 831,
960
- "app": 832,
961
- "dis": 833,
962
- "##ized": 834,
963
- "watch": 835,
964
- "##irst": 836,
965
- "rabbit": 837,
966
  "fish": 838,
967
- "##llow": 839,
968
- "important": 840,
969
- "##age": 841,
970
- "ama": 842,
971
- "us": 843,
972
- "rain": 844,
973
- "sound": 845,
974
- "slide": 846,
975
- "bun": 847,
976
  "amaz": 848,
977
- "replied": 849,
978
- "stop": 850,
979
  "work": 851,
980
- "follow": 852,
981
- "mor": 853,
982
- "showed": 854,
983
- "tal": 855,
984
- "mean": 856,
985
- "tou": 857,
986
- "##ces": 858,
987
- "##gan": 859,
988
- "right": 860,
989
- "sarah": 861,
990
- "ask": 862,
991
- "##bbed": 863,
992
- "differ": 864,
993
- "mag": 865,
994
- "goodby": 866,
995
- "our": 867,
996
- "bright": 868,
997
  "##day": 869,
998
- "watched": 870,
999
  "goodbye": 871,
1000
- "hop": 872,
1001
- "please": 873,
1002
  "use": 874,
1003
- "strong": 875,
1004
- "am": 876,
1005
- "stick": 877,
1006
- "quickly": 878,
1007
- "daddy": 879,
1008
- "been": 880,
1009
  "voice": 881,
1010
- "yell": 882,
1011
  "##ath": 883,
1012
- "jane": 884,
1013
  "different": 885,
1014
- "child": 886,
1015
- "boat": 887,
1016
- "##llo": 888,
1017
- "##co": 889,
1018
- "##che": 890,
1019
- "hello": 891,
1020
- "first": 892,
1021
- "became": 893,
1022
- "face": 894,
1023
- "place": 895,
1024
- "##ange": 896,
1025
- "does": 897,
1026
- "high": 898,
1027
  "##ng": 899,
1028
- "store": 900,
1029
- "closer": 901,
1030
- "curious": 902,
1031
- "##ak": 903,
1032
- "warm": 904,
1033
- "sand": 905,
1034
- "dress": 906,
1035
- "bel": 907,
1036
- "##ummy": 908,
1037
- "joe": 909,
1038
- "cook": 910,
1039
- "tell": 911,
1040
- "##ila": 912,
1041
- "forg": 913,
1042
- "em": 914,
1043
- "three": 915,
1044
- "fav": 916,
1045
  "grandma": 917,
1046
  "##oon": 918,
1047
- "##ount": 919,
1048
- "open": 920,
1049
- "lila": 921,
1050
- "##leep": 922,
1051
- "night": 923,
1052
- "cake": 924,
1053
- "block": 925,
1054
- "##more": 926,
1055
- "pie": 927,
1056
- "anymore": 928,
1057
- "bunny": 929,
1058
- "butter": 930,
1059
- "mon": 931,
1060
- "##iss": 932,
1061
- "##ached": 933,
1062
- "lea": 934,
1063
- "sweet": 935,
1064
- "##ired": 936,
1065
- "kid": 937,
1066
- "##ull": 938,
1067
- "only": 939,
1068
- "flo": 940,
1069
- "kit": 941,
1070
- "pain": 942,
1071
- "grabbed": 943,
1072
- "duck": 944,
1073
- "##isy": 945,
1074
- "fell": 946,
1075
- "birds": 947,
1076
- "fire": 948,
1077
- "cont": 949,
1078
- "##ered": 950,
1079
- "jumped": 951,
1080
- "glad": 952,
1081
- "pet": 953,
1082
- "##here": 954,
1083
- "bit": 955,
1084
- "per": 956,
1085
- "##chen": 957,
1086
- "kitchen": 958,
1087
- "cra": 959,
1088
- "helped": 960,
1089
- "sees": 961,
1090
- "dr": 962,
1091
- "##outed": 963,
1092
- "hear": 964,
1093
- "sing": 965,
1094
  "##ul": 966,
1095
- "prin": 967,
1096
  "happened": 968,
1097
- "squ": 969,
1098
- "yummy": 970,
1099
- "grass": 971,
1100
- "tri": 972,
1101
  "ready": 973,
1102
  "tommy": 974,
1103
- "story": 975,
1104
- "shouted": 976,
1105
- "beh": 977,
1106
- "##nts": 978,
1107
- "really": 979,
1108
  "brother": 980,
1109
- "cr": 981,
1110
- "reached": 982,
1111
- "realized": 983,
1112
- "lady": 984,
1113
- "##ey": 985,
1114
- "draw": 986,
1115
- "having": 987,
1116
- "##ins": 988,
1117
- "through": 989,
1118
- "favor": 990,
1119
- "mess": 991,
1120
- "cre": 992,
1121
- "hat": 993,
1122
- "favorite": 994,
1123
- "game": 995,
1124
- "less": 996,
1125
- "ate": 997,
1126
  "underst": 998,
1127
- "soft": 999,
1128
- "pare": 1000,
1129
- "##zy": 1001,
1130
- "thing": 1002,
1131
- "##ket": 1003,
1132
- "##ather": 1004,
1133
- "##imes": 1005,
1134
- "cu": 1006,
1135
  "magic": 1007,
1136
- "pretend": 1008,
1137
- "began": 1009,
1138
- "##where": 1010,
1139
- "world": 1011,
1140
- "looks": 1012,
1141
- "##ken": 1013,
1142
- "butterf": 1014,
1143
- "himself": 1015,
1144
- "kids": 1016,
1145
- "##fully": 1017,
1146
- "making": 1018,
1147
- "done": 1019,
1148
- "wow": 1020,
1149
  "cut": 1021,
1150
- "sometimes": 1022,
1151
- "rest": 1023
1152
  }
1153
  }
1154
  }
 
75
  "id": "A",
76
  "type_id": 0
77
  }
 
 
 
 
 
 
78
  }
79
  ],
80
  "pair": [
 
100
  "tokens": [
101
  "[BOS]"
102
  ]
 
 
 
 
 
 
 
 
 
103
  }
104
  }
105
  },
 
180
  "|": 67,
181
  "}": 68,
182
  "~": 69,
183
+ "##a": 70,
184
+ "##l": 71,
185
+ "##n": 72,
186
+ "##c": 73,
187
+ "##i": 74,
188
+ "##x": 75,
189
+ "##m": 76,
190
+ "##t": 77,
191
+ "##o": 78,
192
+ "##s": 79,
193
+ "##e": 80,
194
+ "##r": 81,
195
+ "##v": 82,
196
+ "##g": 83,
197
  "##y": 84,
198
+ "##k": 85,
199
+ "##d": 86,
200
+ "##b": 87,
201
+ "##p": 88,
202
  "##h": 89,
203
+ "##u": 90,
204
  "##w": 91,
205
+ "##f": 92,
206
+ "##j": 93,
207
+ "##z": 94,
208
+ "##q": 95,
209
  "##he": 96,
210
  "the": 97,
211
  "##nd": 98,
 
235
  "##en": 122,
236
  "##is": 123,
237
  "##an": 124,
238
+ "on": 125,
239
+ "th": 126,
240
  "##or": 127,
241
  "##im": 128,
242
  "##on": 129,
 
257
  "said": 144,
258
  "be": 145,
259
  "##ily": 146,
260
+ "tim": 147,
261
+ "st": 148,
262
+ "##ig": 149,
263
  "so": 150,
264
  "##ce": 151,
265
  "##pp": 152,
266
+ "his": 153,
267
+ "wit": 154,
268
  "with": 155,
269
  "mom": 156,
270
  "##ve": 157,
271
  "lily": 158,
272
  "of": 159,
273
+ "fr": 160,
274
+ "that": 161,
275
+ "##ked": 162,
276
+ "##am": 163,
277
+ "pl": 164,
278
+ "##ery": 165,
279
  "##ad": 166,
280
+ "##nt": 167,
281
+ "##ke": 168,
282
  "but": 169,
283
  "day": 170,
284
  "up": 171,
 
288
  "##el": 175,
289
  "wh": 176,
290
  "for": 177,
291
+ "##my": 178,
292
+ "##st": 179,
293
+ "##un": 180,
294
+ "##ould": 181,
295
+ "##ent": 182,
296
+ "an": 183,
297
  "li": 184,
298
  "##ra": 185,
299
  "##ch": 186,
300
+ "happ": 187,
301
+ "one": 188,
302
+ "##itt": 189,
303
+ "do": 190,
304
+ "sh": 191,
305
+ "want": 192,
306
  "##her": 193,
307
+ "there": 194,
308
+ "##ly": 195,
309
+ "very": 196,
310
  "##ome": 197,
311
+ "##se": 198,
312
+ "not": 199,
313
  "##ound": 200,
314
+ "litt": 201,
315
+ "little": 202,
316
+ "as": 203,
317
+ "ba": 204,
318
+ "time": 205,
319
+ "ne": 206,
320
  "##ht": 207,
321
  "##al": 208,
322
  "ma": 209,
323
+ "happy": 210,
324
+ "big": 211,
325
+ "sm": 212,
326
+ "is": 213,
327
+ "saw": 214,
328
+ "##iend": 215,
329
+ "friend": 216,
330
+ "loo": 217,
331
  "##ry": 218,
332
  "re": 219,
333
  "bo": 220,
334
  "##ur": 221,
335
  "##ter": 222,
336
+ "##ved": 223,
337
+ "##ug": 224,
338
+ "once": 225,
339
  "lo": 226,
340
+ "##ere": 227,
341
+ "were": 228,
342
  "##ore": 229,
343
  "se": 230,
344
  "ev": 231,
 
346
  "sp": 233,
347
  "him": 234,
348
  "too": 235,
349
+ "##ide": 236,
350
+ "ca": 237,
351
  "we": 238,
352
+ "at": 239,
353
+ "##irl": 240,
354
+ "tom": 241,
355
+ "are": 242,
356
+ "upon": 243,
357
+ "can": 244,
358
+ "whe": 245,
359
+ "girl": 246,
360
+ "wanted": 247,
361
+ "##ard": 248,
362
+ "##ec": 249,
363
+ "##ill": 250,
364
+ "out": 251,
365
+ "ben": 252,
366
+ "their": 253,
367
  "them": 254,
368
+ "##fu": 255,
369
+ "##way": 256,
370
+ "##ys": 257,
371
  "did": 258,
372
+ "##ind": 259,
373
+ "could": 260,
374
+ "smil": 261,
375
+ "##ri": 262,
376
  "no": 263,
377
+ "have": 264,
378
  "##ted": 265,
379
  "##ver": 266,
380
  "##ain": 267,
381
  "ex": 268,
382
+ "all": 269,
383
+ "##hed": 270,
384
+ "went": 271,
385
+ "hel": 272,
386
+ "ar": 273,
387
  "su": 274,
388
+ "when": 275,
389
+ "nam": 276,
390
+ "##ic": 277,
391
  "help": 278,
392
+ "ta": 279,
393
+ "friends": 280,
394
+ "##ful": 281,
395
+ "##ood": 282,
396
+ "##hing": 283,
397
  "##ight": 284,
398
+ "kn": 285,
399
+ "what": 286,
400
  "le": 287,
401
  "##um": 288,
402
+ "##ark": 289,
403
+ "back": 290,
404
  "##one": 291,
405
  "cl": 292,
406
  "from": 293,
407
+ "timmy": 294,
408
+ "fun": 295,
409
+ "al": 296,
410
+ "then": 297,
411
+ "named": 298,
412
+ "##all": 299,
413
+ "ro": 300,
414
  "every": 301,
415
+ "star": 302,
416
+ "sc": 303,
417
+ "loved": 304,
418
+ "smiled": 305,
419
+ "##oug": 306,
420
  "##side": 307,
421
+ "asked": 308,
422
+ "##elt": 309,
423
+ "man": 310,
424
+ "some": 311,
425
+ "##ick": 312,
426
+ "see": 313,
427
+ "me": 314,
428
  "like": 315,
429
  "fe": 316,
430
+ "felt": 317,
431
+ "##op": 318,
432
  "br": 319,
433
  "looked": 320,
434
  "around": 321,
435
+ "##ame": 322,
436
  "##ep": 323,
437
+ "bir": 324,
438
+ "look": 325,
439
+ "##omet": 326,
440
+ "would": 327,
441
+ "##get": 328,
442
+ "somet": 329,
443
+ "boy": 330,
444
+ "fa": 331,
445
+ "bird": 332,
446
+ "##ong": 333,
447
+ "##ss": 334,
448
  "pr": 335,
449
+ "mommy": 336,
450
+ "##dd": 337,
451
  "##est": 338,
452
  "##ings": 339,
453
  "ag": 340,
454
+ "jo": 341,
455
+ "wor": 342,
456
+ "##ade": 343,
457
+ "car": 344,
458
  "than": 345,
459
+ "make": 346,
460
+ "##gether": 347,
461
+ "together": 348,
462
+ "tre": 349,
463
+ "##own": 350,
464
+ "ran": 351,
465
+ "away": 352,
466
+ "la": 353,
467
  "dad": 354,
468
+ "started": 355,
469
+ "##ice": 356,
470
+ "##oud": 357,
471
+ "##ared": 358,
472
+ "made": 359,
473
+ "says": 360,
474
+ "something": 361,
475
  "co": 362,
476
  "fl": 363,
477
+ "##ited": 364,
478
+ "park": 365,
479
+ "sad": 366,
480
+ "##ther": 367,
481
  "good": 368,
482
  "##ack": 369,
483
+ "exc": 370,
484
+ "new": 371,
485
+ "ch": 372,
486
+ "other": 373,
487
  "put": 374,
488
  "who": 375,
489
  "##out": 376,
490
  "let": 377,
491
+ "mu": 378,
492
+ "##ble": 379,
493
+ "again": 380,
494
+ "home": 381,
495
+ "hug": 382,
496
+ "found": 383,
497
+ "sam": 384,
498
  "dec": 385,
499
+ "##ried": 386,
500
+ "wal": 387,
501
+ "##pped": 388,
502
  "##ure": 389,
503
  "get": 390,
504
+ "playing": 391,
505
+ "##ought": 392,
506
+ "##ach": 393,
507
+ "##pl": 394,
508
+ "gra": 395,
509
+ "sw": 396,
510
+ "things": 397,
511
  "##ous": 398,
512
+ "excited": 399,
513
+ "##na": 400,
514
+ "got": 401,
515
+ "bl": 402,
516
+ "##ny": 403,
517
+ "##king": 404,
518
+ "##uck": 405,
519
+ "liked": 406,
520
+ "your": 407,
521
  "##ge": 408,
522
  "##ided": 409,
523
  "decided": 410,
524
  "came": 411,
525
  "my": 412,
526
+ "bec": 413,
527
+ "dog": 414,
528
  "scared": 415,
529
  "##ust": 416,
530
  "down": 417,
531
+ "this": 418,
532
  "##ouse": 419,
533
  "ab": 420,
534
  "find": 421,
535
+ "care": 422,
536
+ "pa": 423,
537
+ "gr": 424,
538
+ "feel": 425,
539
+ "po": 426,
540
+ "will": 427,
541
+ "max": 428,
542
+ "sara": 429,
543
  "##ell": 430,
544
+ "bu": 431,
545
+ "##ist": 432,
546
+ "##arn": 433,
547
+ "##ways": 434,
548
+ "##as": 435,
549
  "##ave": 436,
550
+ "always": 437,
551
+ "anna": 438,
552
+ "##nder": 439,
553
+ "didn": 440,
554
+ "##ess": 441,
555
+ "mo": 442,
556
  "about": 443,
557
+ "took": 444,
558
+ "kne": 445,
559
+ "lot": 446,
560
  "toys": 447,
561
+ "outside": 448,
562
+ "##ers": 449,
563
+ "##ook": 450,
564
+ "tree": 451,
565
+ "##ally": 452,
566
+ "af": 453,
567
+ "##ant": 454,
568
+ "##ise": 455,
569
+ "##bb": 456,
570
+ "##ged": 457,
571
  "how": 458,
572
+ "old": 459,
573
+ "##ite": 460,
574
  "thought": 461,
575
+ "ball": 462,
576
+ "ho": 463,
577
+ "more": 464,
578
+ "##ma": 465,
579
+ "##eci": 466,
580
+ "##ened": 467,
581
+ "##ched": 468,
582
+ "sor": 469,
583
  "learn": 470,
584
  "##ret": 471,
585
+ "tw": 472,
586
+ "pu": 473,
587
+ "cat": 474,
588
+ "know": 475,
589
  "##to": 476,
590
  "take": 477,
591
+ "pe": 478,
592
+ "don": 479,
593
  "laug": 480,
594
+ "knew": 481,
595
  "speci": 482,
596
+ "sudd": 483,
597
+ "special": 484,
598
  "sudden": 485,
599
+ "##ty": 486,
600
+ "mi": 487,
601
+ "inside": 488,
602
+ "##ive": 489,
603
+ "any": 490,
604
+ "toy": 491,
605
+ "jack": 492,
606
+ "suddenly": 493,
607
+ "##ro": 494,
608
+ "sorry": 495,
609
+ "after": 496,
610
+ "##ff": 497,
611
+ "just": 498,
612
+ "##ue": 499,
613
+ "if": 500,
614
+ "tr": 501,
615
  "show": 502,
616
+ "##lly": 503,
617
+ "##ink": 504,
618
+ "much": 505,
619
+ "ra": 506,
620
+ "or": 507,
621
+ "run": 508,
622
+ "sl": 509,
623
  "##ish": 510,
624
  "hand": 511,
625
+ "house": 512,
626
+ "sun": 513,
627
+ "yes": 514,
628
  "op": 515,
629
  "sk": 516,
630
  "clo": 517,
631
+ "tried": 518,
632
  "fin": 519,
633
+ "told": 520,
634
+ "into": 521,
635
+ "en": 522,
636
  "##ate": 523,
637
+ "water": 524,
638
+ "over": 525,
639
  "##ea": 526,
640
+ "proud": 527,
641
+ "##ump": 528,
642
+ "##dy": 529,
643
+ "##use": 530,
644
+ "gave": 531,
645
  "never": 532,
646
+ "each": 533,
647
+ "heard": 534,
648
+ "eat": 535,
649
+ "##by": 536,
650
+ "ok": 537,
651
+ "expl": 538,
652
+ "played": 539,
653
+ "couldn": 540,
654
+ "room": 541,
655
+ "thank": 542,
656
+ "##ause": 543,
657
+ "pick": 544,
658
+ "pret": 545,
659
+ "because": 546,
660
+ "##other": 547,
661
+ "qu": 548,
662
+ "gre": 549,
663
+ "##lled": 550,
664
+ "##ion": 551,
665
+ "come": 552,
666
+ "sha": 553,
667
+ "wat": 554,
668
+ "bear": 555,
669
+ "mia": 556,
670
  "##ious": 557,
671
  "off": 558,
672
+ "hugged": 559,
673
  "now": 560,
674
  "com": 561,
675
+ "##oth": 562,
676
  "fo": 563,
677
+ "bet": 564,
678
+ "need": 565,
679
+ "nice": 566,
680
+ "##our": 567,
681
+ "box": 568,
682
+ "str": 569,
683
+ "##ile": 570,
684
+ "##fe": 571,
685
+ "many": 572,
686
  "##ft": 573,
687
  "small": 574,
688
+ "long": 575,
689
+ "##eep": 576,
690
  "##ving": 577,
691
+ "##sed": 578,
692
+ "end": 579,
693
+ "anim": 580,
694
+ "animal": 581,
695
+ "##ough": 582,
696
  "try": 583,
697
+ "unt": 584,
698
+ "##gry": 585,
699
+ "##cy": 586,
700
+ "##kes": 587,
701
+ "even": 588,
702
+ "##ort": 589,
703
+ "until": 590,
704
+ "##ild": 591,
705
+ "##urt": 592,
706
+ "##iz": 593,
707
+ "##elf": 594,
708
+ "learned": 595,
709
+ "soon": 596,
710
+ "kind": 597,
711
+ "bea": 598,
712
+ "everyone": 599,
713
+ "by": 600,
714
+ "better": 601,
715
  "ad": 602,
716
+ "flow": 603,
717
+ "love": 604,
718
  "spot": 605,
719
+ "##mp": 606,
720
  "best": 607,
721
+ "##ine": 608,
722
+ "cle": 609,
723
+ "##ady": 610,
724
+ "##urp": 611,
725
  "##ream": 612,
726
+ "##urn": 613,
727
+ "##ace": 614,
728
+ "fi": 615,
729
+ "say": 616,
730
+ "##ber": 617,
731
  "gard": 618,
732
  "garden": 619,
733
+ "##ves": 620,
734
+ "fast": 621,
735
+ "its": 622,
736
  "careful": 623,
737
+ "beaut": 624,
738
+ "che": 625,
739
  "##ies": 626,
740
  "bra": 627,
741
+ "sky": 628,
742
+ "thanked": 629,
743
  "laughed": 630,
744
  "jump": 631,
745
  "gl": 632,
746
  "loud": 633,
747
+ "ow": 634,
748
+ "sn": 635,
749
+ "list": 636,
750
+ "##ct": 637,
751
+ "##iny": 638,
752
+ "##ear": 639,
753
+ "lots": 640,
754
+ "##lew": 641,
755
+ "beauti": 642,
756
+ "wo": 643,
757
+ "beautiful": 644,
758
+ "##sh": 645,
759
  "hard": 646,
760
+ "fam": 647,
761
+ "still": 648,
762
+ "animals": 649,
763
+ "lu": 650,
764
+ "joh": 651,
765
+ "under": 652,
766
+ "john": 653,
767
+ "stay": 654,
768
+ "hurt": 655,
769
+ "##ning": 656,
770
+ "mum": 657,
771
  "both": 658,
772
  "dan": 659,
773
+ "##self": 660,
774
+ "rem": 661,
775
  "way": 662,
776
+ "##ree": 663,
777
+ "safe": 664,
778
+ "two": 665,
779
+ "##ool": 666,
780
+ "bad": 667,
781
  "col": 668,
782
+ "##hes": 669,
783
+ "imp": 670,
784
  "di": 671,
785
+ "lived": 672,
786
+ "red": 673,
787
+ "tow": 674,
788
+ "##ople": 675,
789
+ "##be": 676,
790
+ "book": 677,
791
+ "##em": 678,
792
  "people": 679,
793
+ "##ane": 680,
794
+ "walked": 681,
795
+ "okay": 682,
796
+ "lucy": 683,
797
+ "surp": 684,
798
+ "surpr": 685,
799
+ "brave": 686,
800
+ "family": 687,
801
+ "should": 688,
802
+ "##ase": 689,
803
+ "adv": 690,
804
+ "flew": 691,
805
+ "##ished": 692,
806
+ "##igh": 693,
807
  "##ress": 694,
808
+ "##ock": 695,
809
+ "stor": 696,
810
+ "##ept": 697,
811
+ "called": 698,
812
+ "##eet": 699,
813
  "##ip": 700,
814
+ "fore": 701,
815
+ "angry": 702,
816
+ "sure": 703,
817
+ "fly": 704,
818
+ "while": 705,
819
+ "kept": 706,
820
+ "##fore": 707,
821
+ "##led": 708,
822
+ "before": 709,
823
+ "##ect": 710,
824
+ "##xt": 711,
825
+ "##ger": 712,
826
+ "share": 713,
827
+ "##ised": 714,
828
+ "##art": 715,
829
+ "pic": 716,
830
+ "pretty": 717,
831
  "keep": 718,
832
+ "going": 719,
833
+ "rock": 720,
834
+ "door": 721,
835
+ "##dded": 722,
836
  "clean": 723,
837
+ "##ied": 724,
838
  "next": 725,
839
+ "dra": 726,
840
+ "advent": 727,
841
+ "con": 728,
842
+ "why": 729,
843
  "##ary": 730,
844
+ "un": 731,
845
+ "##illy": 732,
846
+ "far": 733,
847
+ "real": 734,
848
+ "id": 735,
849
+ "shiny": 736,
850
+ "give": 737,
851
+ "noise": 738,
852
  "wind": 739,
853
+ "opened": 740,
854
  "cry": 741,
855
+ "may": 742,
856
+ "grand": 743,
857
+ "##end": 744,
858
+ "sto": 745,
859
+ "doll": 746,
860
+ "ground": 747,
861
+ "##ner": 748,
862
  "explore": 749,
863
+ "turn": 750,
864
+ "##so": 751,
865
  "##les": 752,
866
+ "also": 753,
867
+ "ey": 754,
868
  "idea": 755,
869
  "color": 756,
870
+ "war": 757,
871
+ "feeling": 758,
872
+ "where": 759,
873
+ "##ap": 760,
874
+ "bob": 761,
875
+ "picked": 762,
876
+ "blue": 763,
877
+ "##imb": 764,
878
+ "nodded": 765,
879
  "walking": 766,
880
+ "climb": 767,
881
  "##thing": 768,
882
+ "clos": 769,
883
+ "##ting": 770,
884
+ "thr": 771,
885
+ "bed": 772,
886
  "ple": 773,
887
  "wait": 774,
888
+ "adventure": 775,
889
+ "being": 776,
890
+ "smile": 777,
891
  "##oy": 778,
892
+ "finally": 779,
893
  "##th": 780,
894
+ "##iced": 781,
895
+ "has": 782,
896
  "looking": 783,
897
  "da": 784,
898
+ "food": 785,
899
  "##ture": 786,
900
  "diff": 787,
901
  "wr": 788,
902
+ "remem": 789,
903
+ "##and": 790,
904
+ "repl": 791,
905
+ "maybe": 792,
906
  "picture": 793,
907
+ "##joy": 794,
908
+ "listen": 795,
909
  "del": 796,
910
+ "tra": 797,
911
+ "bro": 798,
912
+ "great": 799,
913
  "##ught": 800,
914
+ "truck": 801,
915
+ "think": 802,
916
+ "stopped": 803,
917
+ "eyes": 804,
918
+ "walk": 805,
919
+ "##qu": 806,
920
+ "gi": 807,
921
+ "remember": 808,
922
+ "ru": 809,
923
+ "bre": 810,
924
+ "enjoy": 811,
925
+ "sue": 812,
926
+ "##able": 813,
927
+ "here": 814,
928
+ "import": 815,
929
+ "vo": 816,
930
+ "year": 817,
931
+ "forest": 818,
932
+ "ever": 819,
933
+ "quick": 820,
934
+ "wonder": 821,
935
+ "ac": 822,
936
+ "##ized": 823,
937
  "flowers": 824,
938
+ "##og": 825,
939
+ "hands": 826,
940
+ "##bbit": 827,
941
+ "##per": 828,
942
+ "app": 829,
943
+ "noticed": 830,
944
+ "near": 831,
945
+ "cur": 832,
946
+ "head": 833,
947
+ "important": 834,
948
+ "rabbit": 835,
949
+ "dis": 836,
950
+ "watch": 837,
951
  "fish": 838,
952
+ "replied": 839,
953
+ "bun": 840,
954
+ "##irst": 841,
955
+ "##age": 842,
956
+ "rain": 843,
957
+ "ama": 844,
958
+ "##llow": 845,
959
+ "sound": 846,
960
+ "showed": 847,
961
  "amaz": 848,
962
+ "mor": 849,
963
+ "us": 850,
964
  "work": 851,
965
+ "slide": 852,
966
+ "tal": 853,
967
+ "follow": 854,
968
+ "##gan": 855,
969
+ "sarah": 856,
970
+ "stop": 857,
971
+ "right": 858,
972
+ "##ces": 859,
973
+ "mag": 860,
974
+ "tou": 861,
975
+ "mean": 862,
976
+ "differ": 863,
977
+ "goodby": 864,
978
+ "##bbed": 865,
979
+ "watched": 866,
980
+ "bright": 867,
981
+ "daddy": 868,
982
  "##day": 869,
983
+ "ask": 870,
984
  "goodbye": 871,
985
+ "strong": 872,
986
+ "our": 873,
987
  "use": 874,
988
+ "please": 875,
989
+ "quickly": 876,
990
+ "hop": 877,
991
+ "am": 878,
992
+ "been": 879,
993
+ "stick": 880,
994
  "voice": 881,
995
+ "became": 882,
996
  "##ath": 883,
997
+ "yell": 884,
998
  "different": 885,
999
+ "boat": 886,
1000
+ "jane": 887,
1001
+ "##co": 888,
1002
+ "child": 889,
1003
+ "store": 890,
1004
+ "##che": 891,
1005
+ "##llo": 892,
1006
+ "high": 893,
1007
+ "place": 894,
1008
+ "hello": 895,
1009
+ "first": 896,
1010
+ "face": 897,
1011
+ "##ange": 898,
1012
  "##ng": 899,
1013
+ "##ummy": 900,
1014
+ "warm": 901,
1015
+ "##ak": 902,
1016
+ "closer": 903,
1017
+ "dress": 904,
1018
+ "curious": 905,
1019
+ "sand": 906,
1020
+ "cook": 907,
1021
+ "fav": 908,
1022
+ "bel": 909,
1023
+ "does": 910,
1024
+ "forg": 911,
1025
+ "em": 912,
1026
+ "joe": 913,
1027
+ "tell": 914,
1028
+ "##ount": 915,
1029
+ "three": 916,
1030
  "grandma": 917,
1031
  "##oon": 918,
1032
+ "##leep": 919,
1033
+ "bunny": 920,
1034
+ "night": 921,
1035
+ "butter": 922,
1036
+ "open": 923,
1037
+ "##more": 924,
1038
+ "anymore": 925,
1039
+ "pie": 926,
1040
+ "mon": 927,
1041
+ "cake": 928,
1042
+ "##ila": 929,
1043
+ "##ired": 930,
1044
+ "lea": 931,
1045
+ "##ull": 932,
1046
+ "##iss": 933,
1047
+ "sweet": 934,
1048
+ "##ached": 935,
1049
+ "block": 936,
1050
+ "pain": 937,
1051
+ "lila": 938,
1052
+ "kid": 939,
1053
+ "kit": 940,
1054
+ "duck": 941,
1055
+ "flo": 942,
1056
+ "only": 943,
1057
+ "fell": 944,
1058
+ "cont": 945,
1059
+ "grabbed": 946,
1060
+ "##isy": 947,
1061
+ "birds": 948,
1062
+ "##ered": 949,
1063
+ "helped": 950,
1064
+ "##here": 951,
1065
+ "jumped": 952,
1066
+ "cra": 953,
1067
+ "per": 954,
1068
+ "fire": 955,
1069
+ "pet": 956,
1070
+ "bit": 957,
1071
+ "glad": 958,
1072
+ "##chen": 959,
1073
+ "kitchen": 960,
1074
+ "dr": 961,
1075
+ "sing": 962,
1076
+ "yummy": 963,
1077
+ "squ": 964,
1078
+ "prin": 965,
1079
  "##ul": 966,
1080
+ "##outed": 967,
1081
  "happened": 968,
1082
+ "hear": 969,
1083
+ "grass": 970,
1084
+ "story": 971,
1085
+ "realized": 972,
1086
  "ready": 973,
1087
  "tommy": 974,
1088
+ "tri": 975,
1089
+ "##nts": 976,
1090
+ "sees": 977,
1091
+ "really": 978,
1092
+ "beh": 979,
1093
  "brother": 980,
1094
+ "favor": 981,
1095
+ "shouted": 982,
1096
+ "##ey": 983,
1097
+ "draw": 984,
1098
+ "cr": 985,
1099
+ "favorite": 986,
1100
+ "lady": 987,
1101
+ "having": 988,
1102
+ "reached": 989,
1103
+ "through": 990,
1104
+ "ate": 991,
1105
+ "game": 992,
1106
+ "cre": 993,
1107
+ "##zy": 994,
1108
+ "mess": 995,
1109
+ "soft": 996,
1110
+ "pare": 997,
1111
  "underst": 998,
1112
+ "##ins": 999,
1113
+ "hat": 1000,
1114
+ "##imes": 1001,
1115
+ "less": 1002,
1116
+ "##ather": 1003,
1117
+ "butterf": 1004,
1118
+ "thing": 1005,
1119
+ "##ket": 1006,
1120
  "magic": 1007,
1121
+ "began": 1008,
1122
+ "##where": 1009,
1123
+ "world": 1010,
1124
+ "cu": 1011,
1125
+ "##ken": 1012,
1126
+ "himself": 1013,
1127
+ "rest": 1014,
1128
+ "##fully": 1015,
1129
+ "##ppy": 1016,
1130
+ "sometimes": 1017,
1131
+ "pretend": 1018,
1132
+ "mouse": 1019,
1133
+ "making": 1020,
1134
  "cut": 1021,
1135
+ "ted": 1022,
1136
+ "done": 1023
1137
  }
1138
  }
1139
  }
tokenizer_config.json CHANGED
@@ -1,31 +1,31 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "[UNK]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
- "content": "[BOS]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "2": {
20
- "content": "[EOS]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- }
27
- },
28
- "clean_up_tokenization_spaces": true,
29
- "model_max_length": 1000000000000000019884624838656,
30
- "tokenizer_class": "PreTrainedTokenizerFast"
31
- }
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[UNK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[BOS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[EOS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "clean_up_tokenization_spaces": true,
29
+ "model_max_length": 1000000000000000019884624838656,
30
+ "tokenizer_class": "PreTrainedTokenizerFast"
31
+ }