{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[BOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFD" }, { "type": "StripAccents" }, { "type": "Lowercase" } ] }, "pre_tokenizer": { "type": "Sequence", "pretokenizers": [ { "type": "Punctuation", "behavior": "Isolated" }, { "type": "Whitespace" }, { "type": "Digits", "individual_digits": true } ] }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[BOS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "[BOS]": { "id": "[BOS]", "ids": [ 1 ], "tokens": [ "[BOS]" ] } } }, "decoder": null, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "[UNK]": 0, "[BOS]": 1, "[EOS]": 2, "!": 3, "\"": 4, "#": 5, "$": 6, "%": 7, "&": 8, "'": 9, "(": 10, ")": 11, "*": 12, "+": 13, ",": 14, "-": 15, ".": 16, "/": 17, "0": 18, "1": 19, "2": 20, "3": 21, "4": 22, "5": 23, "6": 24, "7": 25, "8": 26, "9": 27, ":": 28, ";": 29, "<": 30, "=": 31, ">": 32, "?": 33, "@": 34, "[": 35, "\\": 36, "]": 37, "_": 38, "`": 39, "a": 40, "b": 41, "c": 42, "d": 43, "e": 44, "f": 45, "g": 46, "h": 47, "i": 48, "j": 49, "k": 50, "l": 51, "m": 52, "n": 53, "o": 54, "p": 55, "q": 56, "r": 57, "s": 58, "t": 59, "u": 60, "v": 61, "w": 62, "x": 63, "y": 64, "z": 65, "{": 66, "|": 67, "}": 68, "~": 69, "##a": 70, "##l": 71, "##n": 72, "##c": 73, "##i": 74, "##x": 75, "##m": 76, "##t": 77, "##o": 78, "##s": 79, "##e": 80, "##r": 81, "##v": 82, "##g": 83, "##y": 84, "##k": 85, "##d": 86, "##b": 87, "##p": 88, "##h": 89, "##u": 90, "##w": 91, "##f": 92, "##j": 93, "##z": 94, "##q": 95, "##he": 96, "the": 97, "##nd": 98, "##ed": 99, "and": 100, "to": 101, "##er": 102, "wa": 103, "##ou": 104, "##in": 105, "he": 106, "##re": 107, "was": 108, "sa": 109, "##ing": 110, "##om": 111, "she": 112, "##ar": 113, "##il": 114, "##it": 115, "##ay": 116, "it": 117, "##id": 118, "##at": 119, "they": 120, "ha": 121, "##en": 122, "##is": 123, "##an": 124, "on": 125, "th": 126, "##or": 127, "##im": 128, "##on": 129, "##ut": 130, "her": 131, "##ll": 132, "##le": 133, "##et": 134, "##ot": 135, "##ir": 136, "##es": 137, "in": 138, "##ow": 139, "you": 140, "##ck": 141, "##ld": 142, "##oo": 143, "said": 144, "be": 145, "##ily": 146, "tim": 147, "st": 148, "##ig": 149, "so": 150, "##ce": 151, "##pp": 152, "his": 153, "wit": 154, "with": 155, "mom": 156, "##ve": 157, "lily": 158, "of": 159, "fr": 160, "that": 161, "##ked": 162, "##am": 163, "pl": 164, "##ery": 165, "##ad": 166, "##nt": 167, "##ke": 168, "but": 169, "day": 170, "up": 171, "##ie": 172, "play": 173, "had": 174, "##el": 175, "wh": 176, "for": 177, "##my": 178, "##st": 179, "##un": 180, "##ould": 181, "##ent": 182, "an": 183, "li": 184, "##ra": 185, "##ch": 186, "happ": 187, "one": 188, "##itt": 189, "do": 190, "sh": 191, "want": 192, "##her": 193, "there": 194, "##ly": 195, "very": 196, "##ome": 197, "##se": 198, "not": 199, "##ound": 200, "litt": 201, "little": 202, "as": 203, "ba": 204, "time": 205, "ne": 206, "##ht": 207, "##al": 208, "ma": 209, "happy": 210, "big": 211, "sm": 212, "is": 213, "saw": 214, "##iend": 215, "friend": 216, "loo": 217, "##ry": 218, "re": 219, "bo": 220, "##ur": 221, "##ter": 222, "##ved": 223, "##ug": 224, "once": 225, "lo": 226, "##ere": 227, "were": 228, "##ore": 229, "se": 230, "ev": 231, "go": 232, "sp": 233, "him": 234, "too": 235, "##ide": 236, "ca": 237, "we": 238, "at": 239, "##irl": 240, "tom": 241, "are": 242, "upon": 243, "can": 244, "whe": 245, "girl": 246, "wanted": 247, "##ard": 248, "##ec": 249, "##ill": 250, "out": 251, "ben": 252, "their": 253, "them": 254, "##fu": 255, "##way": 256, "##ys": 257, "did": 258, "##ind": 259, "could": 260, "smil": 261, "##ri": 262, "no": 263, "have": 264, "##ted": 265, "##ver": 266, "##ain": 267, "ex": 268, "all": 269, "##hed": 270, "went": 271, "hel": 272, "ar": 273, "su": 274, "when": 275, "nam": 276, "##ic": 277, "help": 278, "ta": 279, "friends": 280, "##ful": 281, "##ood": 282, "##hing": 283, "##ight": 284, "kn": 285, "what": 286, "le": 287, "##um": 288, "##ark": 289, "back": 290, "##one": 291, "cl": 292, "from": 293, "timmy": 294, "fun": 295, "al": 296, "then": 297, "named": 298, "##all": 299, "ro": 300, "every": 301, "star": 302, "sc": 303, "loved": 304, "smiled": 305, "##oug": 306, "##side": 307, "asked": 308, "##elt": 309, "man": 310, "some": 311, "##ick": 312, "see": 313, "me": 314, "like": 315, "fe": 316, "felt": 317, "##op": 318, "br": 319, "looked": 320, "around": 321, "##ame": 322, "##ep": 323, "bir": 324, "look": 325, "##omet": 326, "would": 327, "##get": 328, "somet": 329, "boy": 330, "fa": 331, "bird": 332, "##ong": 333, "##ss": 334, "pr": 335, "mommy": 336, "##dd": 337, "##est": 338, "##ings": 339, "ag": 340, "jo": 341, "wor": 342, "##ade": 343, "car": 344, "than": 345, "make": 346, "##gether": 347, "together": 348, "tre": 349, "##own": 350, "ran": 351, "away": 352, "la": 353, "dad": 354, "started": 355, "##ice": 356, "##oud": 357, "##ared": 358, "made": 359, "says": 360, "something": 361, "co": 362, "fl": 363, "##ited": 364, "park": 365, "sad": 366, "##ther": 367, "good": 368, "##ack": 369, "exc": 370, "new": 371, "ch": 372, "other": 373, "put": 374, "who": 375, "##out": 376, "let": 377, "mu": 378, "##ble": 379, "again": 380, "home": 381, "hug": 382, "found": 383, "sam": 384, "dec": 385, "##ried": 386, "wal": 387, "##pped": 388, "##ure": 389, "get": 390, "playing": 391, "##ought": 392, "##ach": 393, "##pl": 394, "gra": 395, "sw": 396, "things": 397, "##ous": 398, "excited": 399, "##na": 400, "got": 401, "bl": 402, "##ny": 403, "##king": 404, "##uck": 405, "liked": 406, "your": 407, "##ge": 408, "##ided": 409, "decided": 410, "came": 411, "my": 412, "bec": 413, "dog": 414, "scared": 415, "##ust": 416, "down": 417, "this": 418, "##ouse": 419, "ab": 420, "find": 421, "care": 422, "pa": 423, "gr": 424, "feel": 425, "po": 426, "will": 427, "max": 428, "sara": 429, "##ell": 430, "bu": 431, "##ist": 432, "##arn": 433, "##ways": 434, "##as": 435, "##ave": 436, "always": 437, "anna": 438, "##nder": 439, "didn": 440, "##ess": 441, "mo": 442, "about": 443, "took": 444, "kne": 445, "lot": 446, "toys": 447, "outside": 448, "##ers": 449, "##ook": 450, "tree": 451, "##ally": 452, "af": 453, "##ant": 454, "##ise": 455, "##bb": 456, "##ged": 457, "how": 458, "old": 459, "##ite": 460, "thought": 461, "ball": 462, "ho": 463, "more": 464, "##ma": 465, "##eci": 466, "##ened": 467, "##ched": 468, "sor": 469, "learn": 470, "##ret": 471, "tw": 472, "pu": 473, "cat": 474, "know": 475, "##to": 476, "take": 477, "pe": 478, "don": 479, "laug": 480, "knew": 481, "speci": 482, "sudd": 483, "special": 484, "sudden": 485, "##ty": 486, "mi": 487, "inside": 488, "##ive": 489, "any": 490, "toy": 491, "jack": 492, "suddenly": 493, "##ro": 494, "sorry": 495, "after": 496, "##ff": 497, "just": 498, "##ue": 499, "if": 500, "tr": 501, "show": 502, "##lly": 503, "##ink": 504, "much": 505, "ra": 506, "or": 507, "run": 508, "sl": 509, "##ish": 510, "hand": 511, "house": 512, "sun": 513, "yes": 514, "op": 515, "sk": 516, "clo": 517, "tried": 518, "fin": 519, "told": 520, "into": 521, "en": 522, "##ate": 523, "water": 524, "over": 525, "##ea": 526, "proud": 527, "##ump": 528, "##dy": 529, "##use": 530, "gave": 531, "never": 532, "each": 533, "heard": 534, "eat": 535, "##by": 536, "ok": 537, "expl": 538, "played": 539, "couldn": 540, "room": 541, "thank": 542, "##ause": 543, "pick": 544, "pret": 545, "because": 546, "##other": 547, "qu": 548, "gre": 549, "##lled": 550, "##ion": 551, "come": 552, "sha": 553, "wat": 554, "bear": 555, "mia": 556, "##ious": 557, "off": 558, "hugged": 559, "now": 560, "com": 561, "##oth": 562, "fo": 563, "bet": 564, "need": 565, "nice": 566, "##our": 567, "box": 568, "str": 569, "##ile": 570, "##fe": 571, "many": 572, "##ft": 573, "small": 574, "long": 575, "##eep": 576, "##ving": 577, "##sed": 578, "end": 579, "anim": 580, "animal": 581, "##ough": 582, "try": 583, "unt": 584, "##gry": 585, "##cy": 586, "##kes": 587, "even": 588, "##ort": 589, "until": 590, "##ild": 591, "##urt": 592, "##iz": 593, "##elf": 594, "learned": 595, "soon": 596, "kind": 597, "bea": 598, "everyone": 599, "by": 600, "better": 601, "ad": 602, "flow": 603, "love": 604, "spot": 605, "##mp": 606, "best": 607, "##ine": 608, "cle": 609, "##ady": 610, "##urp": 611, "##ream": 612, "##urn": 613, "##ace": 614, "fi": 615, "say": 616, "##ber": 617, "gard": 618, "garden": 619, "##ves": 620, "fast": 621, "its": 622, "careful": 623, "beaut": 624, "che": 625, "##ies": 626, "bra": 627, "sky": 628, "thanked": 629, "laughed": 630, "jump": 631, "gl": 632, "loud": 633, "ow": 634, "sn": 635, "list": 636, "##ct": 637, "##iny": 638, "##ear": 639, "lots": 640, "##lew": 641, "beauti": 642, "wo": 643, "beautiful": 644, "##sh": 645, "hard": 646, "fam": 647, "still": 648, "animals": 649, "lu": 650, "joh": 651, "under": 652, "john": 653, "stay": 654, "hurt": 655, "##ning": 656, "mum": 657, "both": 658, "dan": 659, "##self": 660, "rem": 661, "way": 662, "##ree": 663, "safe": 664, "two": 665, "##ool": 666, "bad": 667, "col": 668, "##hes": 669, "imp": 670, "di": 671, "lived": 672, "red": 673, "tow": 674, "##ople": 675, "##be": 676, "book": 677, "##em": 678, "people": 679, "##ane": 680, "walked": 681, "okay": 682, "lucy": 683, "surp": 684, "surpr": 685, "brave": 686, "family": 687, "should": 688, "##ase": 689, "adv": 690, "flew": 691, "##ished": 692, "##igh": 693, "##ress": 694, "##ock": 695, "stor": 696, "##ept": 697, "called": 698, "##eet": 699, "##ip": 700, "fore": 701, "angry": 702, "sure": 703, "fly": 704, "while": 705, "kept": 706, "##fore": 707, "##led": 708, "before": 709, "##ect": 710, "##xt": 711, "##ger": 712, "share": 713, "##ised": 714, "##art": 715, "pic": 716, "pretty": 717, "keep": 718, "going": 719, "rock": 720, "door": 721, "##dded": 722, "clean": 723, "##ied": 724, "next": 725, "dra": 726, "advent": 727, "con": 728, "why": 729, "##ary": 730, "un": 731, "##illy": 732, "far": 733, "real": 734, "id": 735, "shiny": 736, "give": 737, "noise": 738, "wind": 739, "opened": 740, "cry": 741, "may": 742, "grand": 743, "##end": 744, "sto": 745, "doll": 746, "ground": 747, "##ner": 748, "explore": 749, "turn": 750, "##so": 751, "##les": 752, "also": 753, "ey": 754, "idea": 755, "color": 756, "war": 757, "feeling": 758, "where": 759, "##ap": 760, "bob": 761, "picked": 762, "blue": 763, "##imb": 764, "nodded": 765, "walking": 766, "climb": 767, "##thing": 768, "clos": 769, "##ting": 770, "thr": 771, "bed": 772, "ple": 773, "wait": 774, "adventure": 775, "being": 776, "smile": 777, "##oy": 778, "finally": 779, "##th": 780, "##iced": 781, "has": 782, "looking": 783, "da": 784, "food": 785, "##ture": 786, "diff": 787, "wr": 788, "remem": 789, "##and": 790, "repl": 791, "maybe": 792, "picture": 793, "##joy": 794, "listen": 795, "del": 796, "tra": 797, "bro": 798, "great": 799, "##ught": 800, "truck": 801, "think": 802, "stopped": 803, "eyes": 804, "walk": 805, "##qu": 806, "gi": 807, "remember": 808, "ru": 809, "bre": 810, "enjoy": 811, "sue": 812, "##able": 813, "here": 814, "import": 815, "vo": 816, "year": 817, "forest": 818, "ever": 819, "quick": 820, "wonder": 821, "ac": 822, "##ized": 823, "flowers": 824, "##og": 825, "hands": 826, "##bbit": 827, "##per": 828, "app": 829, "noticed": 830, "near": 831, "cur": 832, "head": 833, "important": 834, "rabbit": 835, "dis": 836, "watch": 837, "fish": 838, "replied": 839, "bun": 840, "##irst": 841, "##age": 842, "rain": 843, "ama": 844, "##llow": 845, "sound": 846, "showed": 847, "amaz": 848, "mor": 849, "us": 850, "work": 851, "slide": 852, "tal": 853, "follow": 854, "##gan": 855, "sarah": 856, "stop": 857, "right": 858, "##ces": 859, "mag": 860, "tou": 861, "mean": 862, "differ": 863, "goodby": 864, "##bbed": 865, "watched": 866, "bright": 867, "daddy": 868, "##day": 869, "ask": 870, "goodbye": 871, "strong": 872, "our": 873, "use": 874, "please": 875, "quickly": 876, "hop": 877, "am": 878, "been": 879, "stick": 880, "voice": 881, "became": 882, "##ath": 883, "yell": 884, "different": 885, "boat": 886, "jane": 887, "##co": 888, "child": 889, "store": 890, "##che": 891, "##llo": 892, "high": 893, "place": 894, "hello": 895, "first": 896, "face": 897, "##ange": 898, "##ng": 899, "##ummy": 900, "warm": 901, "##ak": 902, "closer": 903, "dress": 904, "curious": 905, "sand": 906, "cook": 907, "fav": 908, "bel": 909, "does": 910, "forg": 911, "em": 912, "joe": 913, "tell": 914, "##ount": 915, "three": 916, "grandma": 917, "##oon": 918, "##leep": 919, "bunny": 920, "night": 921, "butter": 922, "open": 923, "##more": 924, "anymore": 925, "pie": 926, "mon": 927, "cake": 928, "##ila": 929, "##ired": 930, "lea": 931, "##ull": 932, "##iss": 933, "sweet": 934, "##ached": 935, "block": 936, "pain": 937, "lila": 938, "kid": 939, "kit": 940, "duck": 941, "flo": 942, "only": 943, "fell": 944, "cont": 945, "grabbed": 946, "##isy": 947, "birds": 948, "##ered": 949, "helped": 950, "##here": 951, "jumped": 952, "cra": 953, "per": 954, "fire": 955, "pet": 956, "bit": 957, "glad": 958, "##chen": 959, "kitchen": 960, "dr": 961, "sing": 962, "yummy": 963, "squ": 964, "prin": 965, "##ul": 966, "##outed": 967, "happened": 968, "hear": 969, "grass": 970, "story": 971, "realized": 972, "ready": 973, "tommy": 974, "tri": 975, "##nts": 976, "sees": 977, "really": 978, "beh": 979, "brother": 980, "favor": 981, "shouted": 982, "##ey": 983, "draw": 984, "cr": 985, "favorite": 986, "lady": 987, "having": 988, "reached": 989, "through": 990, "ate": 991, "game": 992, "cre": 993, "##zy": 994, "mess": 995, "soft": 996, "pare": 997, "underst": 998, "##ins": 999, "hat": 1000, "##imes": 1001, "less": 1002, "##ather": 1003, "butterf": 1004, "thing": 1005, "##ket": 1006, "magic": 1007, "began": 1008, "##where": 1009, "world": 1010, "cu": 1011, "##ken": 1012, "himself": 1013, "rest": 1014, "##fully": 1015, "##ppy": 1016, "sometimes": 1017, "pretend": 1018, "mouse": 1019, "making": 1020, "cut": 1021, "ted": 1022, "done": 1023 } } }