sijunhe's picture
Upload with huggingface_hub
00bd198
{"!": 2, "!</w>": 345, "\"": 3, "\"</w>": 344, "#": 4, "#</w>": 325, "$": 5, "$</w>": 348, "%": 6, "%</w>": 351, "&": 7, "&</w>": 352, "'": 8, "'</w>": 296, "(": 9, "(</w>": 318, ")": 10, ")</w>": 330, "*": 11, "*</w>": 327, "+": 12, "+</w>": 341, ",": 13, ",</w>": 279, ",@</w>": 754, "-": 14, "-</w>": 276, "-@</w>": 439, ".": 15, ".</w>": 253, ".@</w>": 695, "/": 16, "/</w>": 350, "0": 17, "00</w>": 647, "0</w>": 216, "1": 18, "1</w>": 222, "2": 19, "2</w>": 231, "3": 20, "3</w>": 243, "4": 21, "4</w>": 233, "5": 22, "5</w>": 240, "6": 23, "6</w>": 226, "7": 24, "7</w>": 215, "8": 25, "8</w>": 236, "9": 26, "9</w>": 242, ":": 27, ":</w>": 353, ";": 28, ";</w>": 317, "<": 29, "<</w>": 340, "<|endoftext|>": 1, "<|startoftext|>": 0, "=": 30, "=</w>": 342, ">": 31, "></w>": 300, "?": 32, "?</w>": 346, "@": 33, "@</w>": 320, "A": 34, "A</w>": 227, "B": 35, "B</w>": 258, "C": 36, "C</w>": 239, "D": 37, "D</w>": 255, "E": 38, "E</w>": 246, "F": 39, "F</w>": 213, "G": 40, "G</w>": 283, "H": 41, "H</w>": 219, "I": 42, "I</w>": 237, "J": 43, "J</w>": 251, "K": 44, "K</w>": 254, "L": 45, "L</w>": 218, "M": 46, "M</w>": 234, "N": 47, "N</w>": 238, "O": 48, "O</w>": 265, "P": 49, "P</w>": 245, "Q": 50, "Q</w>": 309, "R": 51, "R</w>": 264, "S": 52, "S</w>": 230, "T": 53, "T</w>": 235, "U": 54, "U</w>": 268, "V": 55, "V</w>": 248, "W": 56, "W</w>": 274, "X": 57, "X</w>": 263, "Y": 58, "Y</w>": 310, "Z": 59, "Z</w>": 207, "[": 60, "[</w>": 270, "\\": 61, "\\</w>": 338, "]": 62, "]</w>": 289, "^": 63, "^</w>": 331, "_": 64, "_</w>": 334, "`": 65, "`</w>": 347, "a": 66, "a</w>": 197, "ab": 555, "able</w>": 820, "ac": 420, "ace</w>": 806, "ach": 791, "ach</w>": 885, "ack</w>": 670, "act": 929, "ad": 508, "ad</w>": 860, "ade</w>": 771, "ag": 511, "age</w>": 710, "ain": 568, "ain</w>": 675, "ak": 577, "ake</w>": 882, "al": 397, "al</w>": 405, "all": 664, "all</w>": 658, "ally</w>": 588, "als</w>": 796, "am": 426, "am</w>": 817, "ame</w>": 552, "ames</w>": 976, "amp": 800, "an": 384, "an</w>": 425, "ance</w>": 751, "and": 609, "and</w>": 780, "ang": 816, "ans</w>": 844, "ant": 837, "ant</w>": 753, "any</w>": 766, "ap": 586, "ar": 376, "ar</w>": 579, "ard</w>": 649, "ards</w>": 982, "ary</w>": 611, "as": 416, "as</w>": 404, "ase</w>": 849, "ased</w>": 814, "ason</w>": 865, "ass": 792, "ast</w>": 661, "at": 372, "at</w>": 434, "ate</w>": 541, "ated</w>": 543, "ater</w>": 709, "ates</w>": 825, "ath": 730, "ating</w>": 922, "ation</w>": 497, "ational</w>": 933, "ations</w>": 744, "att": 903, "aus": 858, "av": 681, "ay": 684, "ay</w>": 523, "b": 67, "b</w>": 212, "ber</w>": 593, "c": 68, "c</w>": 224, "cc": 960, "ce</w>": 496, "ces</w>": 830, "ch": 520, "ch</w>": 603, "ct": 834, "d": 69, "d</w>": 196, "ded</w>": 665, "der</w>": 690, "ding</w>": 633, "ds</w>": 530, "duc": 671, "e": 70, "e</w>": 195, "ea": 471, "ear": 596, "ear</w>": 669, "ears</w>": 906, "eb": 852, "ec": 418, "ect": 838, "ect</w>": 964, "ed": 563, "ed</w>": 362, "ee": 941, "een</w>": 779, "ef": 840, "eg": 731, "el": 407, "el</w>": 610, "eld</w>": 973, "ell": 759, "ell</w>": 756, "ely</w>": 719, "em": 455, "ember</w>": 777, "ement</w>": 959, "emp": 975, "en": 375, "en</w>": 427, "ence</w>": 685, "ens": 880, "ent": 478, "ent</w>": 468, "ents</w>": 674, "ep": 545, "er": 364, "er</w>": 374, "eral</w>": 793, "ere</w>": 481, "ered</w>": 748, "eric": 921, "erm": 861, "ern": 887, "ern</w>": 977, "ers": 598, "ers</w>": 486, "ert": 986, "ery</w>": 805, "es": 402, "es</w>": 388, "ese</w>": 794, "ess": 678, "ess</w>": 693, "est": 606, "est</w>": 584, "et": 460, "et</w>": 594, "etw": 824, "etween</w>": 886, "ev": 493, "evel": 980, "ever</w>": 855, "ew": 687, "ew</w>": 612, "ex": 938, "ey</w>": 713, "f": 71, "f</w>": 209, "fer": 911, "ff": 587, "for": 728, "form": 901, "fter</w>": 634, "g": 72, "g</w>": 214, "ge</w>": 592, "h": 73, "h</w>": 203, "i": 74, "i</w>": 205, "ia</w>": 605, "ial</w>": 672, "ian</w>": 638, "ib": 726, "ic": 395, "ic</w>": 510, "ical</w>": 625, "ice</w>": 782, "ich</w>": 561, "ics</w>": 996, "id": 463, "id</w>": 613, "ide</w>": 739, "ie</w>": 974, "ied</w>": 812, "ies</w>": 516, "if": 524, "ig": 444, "igh": 537, "ight</w>": 680, "ik": 775, "ike</w>": 984, "il": 406, "il</w>": 714, "ile</w>": 721, "ill": 608, "ill</w>": 789, "ily</w>": 950, "im": 469, "im</w>": 767, "ime</w>": 691, "in": 358, "in</w>": 501, "ine</w>": 607, "ing": 557, "ing</w>": 383, "ings</w>": 815, "ion": 472, "ion</w>": 408, "ional</w>": 717, "ions</w>": 540, "ip": 733, "ip</w>": 818, "ir": 453, "ir</w>": 554, "is": 393, "is</w>": 441, "ish": 694, "ish</w>": 654, "ished</w>": 942, "ision</w>": 944, "iss": 876, "ist": 550, "ist</w>": 811, "it": 378, "it</w>": 746, "ite</w>": 760, "ited</w>": 809, "ition</w>": 797, "ity</w>": 542, "iv": 435, "ive</w>": 549, "ived</w>": 979, "iz": 722, "j": 75, "j</w>": 288, "k": 76, "k</w>": 210, "ked</w>": 810, "king</w>": 924, "ks</w>": 692, "l": 77, "l</w>": 201, "la": 467, "land</w>": 743, "ld</w>": 559, "le": 536, "le</w>": 465, "les</w>": 799, "lud": 718, "ly</w>": 433, "m": 78, "m</w>": 202, "ment</w>": 701, "mp": 651, "n": 79, "n</w>": 199, "nd</w>": 369, "ned</w>": 758, "ning</w>": 843, "o": 80, "o</w>": 198, "ob": 920, "oc": 534, "od": 575, "og": 604, "oh": 972, "oin": 831, "ol": 428, "oll": 703, "ollow": 928, "olog": 932, "om": 419, "om</w>": 883, "ome</w>": 663, "on": 382, "on</w>": 390, "ond</w>": 872, "one</w>": 835, "ong": 850, "ong</w>": 582, "oo": 517, "ood</w>": 927, "ook</w>": 897, "op": 531, "op</w>": 971, "or": 377, "or</w>": 424, "ore</w>": 571, "ors</w>": 917, "ort": 768, "ort</w>": 752, "ory</w>": 737, "os": 447, "ose</w>": 881, "ost</w>": 646, "ot": 600, "ot</w>": 879, "ou": 392, "oug": 659, "ough</w>": 798, "ould</w>": 640, "oun": 553, "ound</w>": 961, "our": 648, "our</w>": 772, "ous</w>": 712, "out</w>": 683, "outh</w>": 945, "ov": 515, "ow": 461, "ow</w>": 666, "own</w>": 657, "oy": 952, "p": 81, "p</w>": 217, "per": 715, "ph": 916, "pp": 518, "q": 82, "q</w>": 280, "qu": 546, "r": 83, "r</w>": 204, "ra": 457, "ran": 624, "re": 367, "ree</w>": 765, "ren": 790, "res": 572, "res</w>": 747, "ri": 487, "rib": 804, "ric": 745, "rit": 589, "ro": 385, "rom</w>": 498, "rop": 826, "roug": 803, "ru": 951, "ruc": 891, "ry</w>": 908, "s": 84, "s</w>": 206, "se</w>": 741, "sh": 795, "so</w>": 630, "sp": 992, "ss": 673, "st": 519, "st</w>": 528, "t": 85, "t</w>": 208, "te</w>": 954, "ted</w>": 489, "ter": 535, "ter</w>": 505, "th": 449, "th</w>": 488, "ther</w>": 576, "ting</w>": 676, "tion</w>": 570, "tr": 619, "ts</w>": 436, "tt": 720, "tur": 953, "ty</w>": 821, "u": 86, "u</w>": 229, "ub": 591, "ubl": 842, "uc": 490, "ud": 538, "ue</w>": 652, "ug": 560, "uil": 930, "ul": 494, "um": 532, "um</w>": 644, "un": 448, "und</w>": 828, "up": 833, "up</w>": 700, "ur": 413, "ure</w>": 635, "uring</w>": 702, "ury</w>": 957, "us": 438, "us</w>": 622, "ust</w>": 846, "ut": 529, "ut</w>": 527, "v": 87, "v</w>": 232, "ve</w>": 567, "vi": 866, "w": 88, "w</w>": 250, "way</w>": 970, "wn</w>": 999, "x": 89, "x</w>": 269, "y": 90, "y</w>": 211, "yp": 993, "z": 91, "z</w>": 228, "|": 92, "|</w>": 304, "}": 93, "}</w>": 336, "~": 94, "~</w>": 343, "¡": 95, "¡</w>": 220, "¢": 96, "¢</w>": 306, "£": 97, "£</w>": 323, "¤": 98, "¤</w>": 292, "¥": 99, "¥</w>": 339, "¦": 100, "¦</w>": 303, "§": 101, "§</w>": 275, "¨": 102, "¨</w>": 282, "©": 103, "©</w>": 259, "ª": 104, "ª</w>": 286, "«": 105, "«</w>": 266, "¬": 106, "¬</w>": 319, "®": 107, "®</w>": 329, "¯": 108, "¯</w>": 287, "°": 109, "°</w>": 298, "±": 110, "±</w>": 200, "²": 111, "²</w>": 284, "³": 112, "³</w>": 272, "´": 113, "´</w>": 307, "µ": 114, "µ</w>": 261, "¶": 115, "¶</w>": 301, "·": 116, "·</w>": 326, "¸": 117, "¸</w>": 257, "¹": 118, "¹</w>": 241, "º": 119, "º</w>": 260, "»": 120, "»</w>": 247, "¼": 121, "¼</w>": 305, "½": 122, "½</w>": 294, "¾": 123, "¾</w>": 316, "¿": 124, "¿</w>": 271, "Â": 125, "Ã": 126, "Ä": 127, "Å": 128, "Æ": 129, "Ç": 130, "È": 131, "É": 132, "Ê": 133, "Ë": 134, "Ì": 135, "Í": 136, "Î": 137, "Ï": 138, "Ð": 139, "Ñ": 140, "Ö": 141, "×": 142, "Ø": 143, "Ù": 144, "Ü": 145, "à": 146, "á": 147, "â": 148, "ã": 149, "ä": 150, "å": 151, "æ": 152, "ç": 153, "è": 154, "é": 155, "ë": 156, "ì": 157, "ï": 158, "Ċ": 159, "Ċ</w>": 349, "Ġ": 160, "Ġ\"</w>": 401, "Ġ'</w>": 431, "Ġ(</w>": 475, "Ġ)</w>": 474, "Ġ,</w>": 360, "Ġ.</w>": 365, "Ġ0": 847, "Ġ1": 411, "Ġ18": 769, "Ġ19": 492, "Ġ199": 893, "Ġ1</w>": 778, "Ġ2": 462, "Ġ20": 522, "Ġ200": 620, "Ġ201": 734, "Ġ2</w>": 813, "Ġ3": 735, "Ġ3</w>": 888, "Ġ4": 870, "Ġ5": 907, "Ġ5</w>": 990, "Ġ:</w>": 637, "Ġ;</w>": 615, "Ġ</w>": 333, "Ġ=</w>": 399, "Ġ@": 417, "Ġ@,@</w>": 755, "Ġ@-@</w>": 440, "Ġ@.@</w>": 696, "ĠA": 409, "ĠA</w>": 807, "ĠAl": 716, "ĠAm": 829, "ĠAmeric": 958, "ĠAn": 784, "ĠAr": 894, "ĠB": 432, "ĠC": 410, "ĠCh": 581, "ĠCom": 904, "ĠD": 464, "ĠE": 500, "ĠEn": 878, "ĠF": 470, "ĠG": 482, "ĠH": 445, "ĠHe</w>": 742, "ĠI": 442, "ĠI</w>": 827, "ĠIn": 704, "ĠIn</w>": 574, "ĠIt</w>": 774, "ĠJ": 491, "ĠK": 548, "ĠL": 484, "ĠM": 423, "ĠMar": 776, "ĠN": 483, "ĠO": 504, "ĠP": 450, "ĠPar": 967, "ĠR": 459, "ĠS": 403, "ĠSh": 750, "ĠSt": 590, "ĠT": 396, "ĠTh": 414, "ĠThe</w>": 437, "ĠThis</w>": 997, "ĠU": 585, "ĠUn": 773, "ĠV": 617, "ĠW": 479, "ĠWh": 853, "ĠY": 757, "Ġa": 356, "Ġa</w>": 394, "Ġab": 653, "Ġabout</w>": 899, "Ġac": 583, "Ġacc": 874, "Ġad": 656, "Ġafter</w>": 763, "Ġag": 725, "Ġal": 476, "Ġalb": 991, "Ġall</w>": 839, "Ġalso</w>": 641, "Ġan": 602, "Ġan</w>": 562, "Ġand</w>": 381, "Ġapp": 711, "Ġar": 507, "Ġare</w>": 601, "Ġas</w>": 454, "Ġass": 947, "Ġat</w>": 514, "Ġatt": 788, "Ġb": 371, "Ġbe": 499, "Ġbe</w>": 595, "Ġbec": 706, "Ġbeen</w>": 686, "Ġbeg": 915, "Ġbetween</w>": 914, "Ġbo": 819, "Ġbut</w>": 623, "Ġby</w>": 473, "Ġc": 368, "Ġcent": 823, "Ġch": 526, "Ġchar": 822, "Ġcl": 689, "Ġcom": 509, "Ġcomm": 707, "Ġcomp": 616, "Ġcon": 477, "Ġcons": 841, "Ġcont": 655, "Ġcre": 931, "Ġd": 387, "Ġde": 627, "Ġdec": 873, "Ġdef": 965, "Ġdes": 738, "Ġdi": 892, "Ġdis": 708, "Ġduring</w>": 864, "Ġe": 421, "Ġear": 854, "Ġel": 869, "Ġen": 662, "Ġev": 682, "Ġex": 539, "Ġexp": 867, "Ġf": 370, "Ġfe": 845, "Ġfil": 913, "Ġfin": 786, "Ġfir": 599, "Ġfirst</w>": 626, "Ġfl": 877, "Ġfor": 614, "Ġfor</w>": 458, "Ġform": 963, "Ġfrom</w>": 503, "Ġg": 430, "Ġgame</w>": 895, "Ġgen": 985, "Ġgro": 890, "Ġh": 380, "Ġha": 485, "Ġhad</w>": 566, "Ġhas</w>": 679, "Ġhave</w>": 667, "Ġhe</w>": 558, "Ġher</w>": 660, "Ġhim</w>": 896, "Ġhis</w>": 512, "Ġi": 366, "Ġimp": 909, "Ġin": 429, "Ġin</w>": 389, "Ġinc": 618, "Ġinclud": 761, "Ġind": 983, "Ġint": 628, "Ġinter": 832, "Ġinto</w>": 785, "Ġis</w>": 480, "Ġit</w>": 533, "Ġits</w>": 642, "Ġj": 723, "Ġk": 564, "Ġkn": 857, "Ġl": 398, "Ġlar": 962, "Ġlater</w>": 936, "Ġlea": 868, "Ġm": 386, "Ġmade</w>": 949, "Ġme": 968, "Ġmore</w>": 802, "Ġmost</w>": 910, "Ġmov": 956, "Ġmus": 966, "Ġn": 415, "Ġnew</w>": 989, "Ġno": 547, "Ġnor": 978, "Ġnot</w>": 632, "Ġnum": 926, "Ġo": 359, "Ġof</w>": 373, "Ġoff": 875, "Ġon": 551, "Ġon</w>": 456, "Ġone</w>": 677, "Ġonly</w>": 871, "Ġor": 699, "Ġor</w>": 645, "Ġother</w>": 787, "Ġout</w>": 925, "Ġov": 729, "Ġover</w>": 856, "Ġp": 379, "Ġpar": 636, "Ġper": 736, "Ġpl": 698, "Ġpla": 697, "Ġplay": 808, "Ġpos": 859, "Ġpr": 889, "Ġpre": 749, "Ġpres": 912, "Ġpro": 506, "Ġproduc": 934, "Ġqu": 955, "Ġr": 521, "Ġra": 863, "Ġre": 400, "Ġrec": 597, "Ġrecor": 919, "Ġreg": 943, "Ġrel": 900, "Ġrele": 946, "Ġrem": 848, "Ġrep": 762, "Ġres": 650, "Ġro": 629, "Ġs": 361, "Ġsa": 905, "Ġsc": 732, "Ġse": 569, "Ġseason</w>": 948, "Ġsec": 781, "Ġser": 740, "Ġsev": 884, "Ġsh": 513, "Ġshe</w>": 862, "Ġsp": 578, "Ġspec": 940, "Ġst": 446, "Ġstar": 939, "Ġsu": 770, "Ġsub": 969, "Ġsuc": 764, "Ġsuch</w>": 981, "Ġt": 354, "Ġth": 355, "Ġthan</w>": 918, "Ġthat</w>": 452, "Ġthe": 502, "Ġthe</w>": 357, "Ġtheir</w>": 621, "Ġthem</w>": 998, "Ġthey</w>": 727, "Ġthis</w>": 705, "Ġthree</w>": 902, "Ġthroug": 923, "Ġtime</w>": 783, "Ġto</w>": 391, "Ġtra": 836, "Ġtw": 639, "Ġtwo</w>": 688, "Ġun": 544, "Ġup</w>": 898, "Ġus": 668, "Ġused</w>": 988, "Ġv": 495, "Ġw": 363, "Ġwas</w>": 422, "Ġwere</w>": 525, "Ġwh": 443, "Ġwhen</w>": 851, "Ġwhere</w>": 995, "Ġwhich</w>": 573, "Ġwhile</w>": 935, "Ġwho</w>": 724, "Ġwit": 451, "Ġwith": 994, "Ġwith</w>": 466, "Ġwor": 643, "Ġwould</w>": 801, "Ġwrit": 937, "Ġy": 580, "Ġyear</w>": 987, "Ġâ": 556, "ĠâĢ": 565, "ĠâĢĵ</w>": 631, "ĠĊ</w>": 412, "Ģ": 161, "Ģ</w>": 223, "ģ": 162, "ģ</w>": 273, "Ĥ": 163, "Ĥ</w>": 262, "ĥ": 164, "ĥ</w>": 337, "Ħ": 165, "Ħ</w>": 278, "ħ": 166, "ħ</w>": 281, "Ĩ": 167, "Ĩ</w>": 308, "ĩ": 168, "ĩ</w>": 225, "Ī": 169, "Ī</w>": 221, "ī": 170, "ī</w>": 244, "Ĭ": 171, "Ĭ</w>": 315, "ĭ": 172, "ĭ</w>": 321, "Į": 173, "Į</w>": 324, "į": 174, "į</w>": 302, "İ": 175, "İ</w>": 249, "ı": 176, "ı</w>": 332, "IJ": 177, "IJ</w>": 295, "ij": 178, "ij</w>": 313, "Ĵ": 179, "Ĵ</w>": 328, "ĵ": 180, "ĵ</w>": 312, "Ķ": 181, "Ķ</w>": 256, "ķ": 182, "ķ</w>": 314, "ĸ": 183, "ĸ</w>": 277, "Ĺ": 184, "Ĺ</w>": 322, "ĺ": 185, "ĺ</w>": 285, "Ļ": 186, "Ļ</w>": 267, "ļ": 187, "ļ</w>": 290, "Ľ": 188, "Ľ</w>": 311, "ľ": 189, "ľ</w>": 299, "Ŀ": 190, "Ŀ</w>": 291, "ŀ": 191, "ŀ</w>": 293, "Ł": 192, "Ł</w>": 335, "ł": 193, "ł</w>": 252, "Ń": 194, "Ń</w>": 297}