tr_gpt2_tokenizer_v1k / vocab.json
mismayil's picture
Upload tokenizer
739b70e verified
{
"!": 1,
"\"": 2,
"#": 3,
"$": 4,
"%": 5,
"&": 6,
"'": 7,
"'d": 339,
"'t": 535,
"(": 8,
")": 9,
"),": 538,
"*": 10,
"+": 11,
",": 12,
"-": 13,
".": 14,
"/": 15,
"0": 16,
"00": 455,
"1": 17,
"18": 865,
"19": 401,
"199": 831,
"2": 18,
"20": 327,
"200": 692,
"201": 705,
"3": 19,
"4": 20,
"5": 21,
"6": 22,
"7": 23,
"8": 24,
"9": 25,
":": 26,
":#": 914,
";": 27,
"<": 28,
"<|endoftext|>": 0,
"=": 29,
"=\"": 396,
">": 30,
"?": 31,
"@": 32,
"A": 33,
"B": 34,
"C": 35,
"D": 36,
"E": 37,
"F": 38,
"G": 39,
"H": 40,
"I": 41,
"J": 42,
"K": 43,
"L": 44,
"M": 45,
"N": 46,
"O": 47,
"P": 48,
"Q": 49,
"R": 50,
"S": 51,
"T": 52,
"U": 53,
"V": 54,
"W": 55,
"X": 56,
"Y": 57,
"Z": 58,
"[": 59,
"\\": 60,
"]": 61,
"^": 62,
"_": 63,
"`": 64,
"a": 65,
"ab": 362,
"ac": 380,
"ack": 717,
"ad": 361,
"ada": 670,
"ade": 759,
"af": 378,
"ag": 661,
"ah": 334,
"ak": 277,
"aki": 434,
"al": 268,
"alar": 432,
"aları": 770,
"all": 997,
"alı": 450,
"alık": 957,
"am": 296,
"ama": 936,
"aman": 706,
"amp": 738,
"ampiyon": 1007,
"an": 259,
"and": 892,
"anlar": 637,
"anlı": 826,
"ans": 487,
"ant": 1011,
"anı": 512,
"ap": 341,
"ar": 258,
"ara": 871,
"arak": 846,
"ard": 950,
"ark": 486,
"arı": 679,
"as": 286,
"ast": 958,
"asyon": 788,
"ası": 357,
"asında": 539,
"at": 297,
"atı": 864,
"av": 400,
"ay": 285,
"aya": 536,
"ayan": 592,
"ayn": 415,
"aynak": 469,
"ayı": 566,
"az": 336,
"azı": 968,
"aç": 541,
"aÄŁ": 345,
"aģı": 801,
"aÅŁ": 315,
"b": 66,
"back": 845,
"background": 858,
"bir": 919,
"bol": 440,
"bul": 874,
"c": 67,
"ca": 479,
"cak": 615,
"ce": 485,
"cen": 640,
"center": 666,
"ch": 561,
"ci": 580,
"col": 756,
"color": 842,
"cu": 391,
"cı": 586,
"d": 68,
"da": 293,
"daki": 610,
"dan": 349,
"de": 301,
"deki": 676,
"den": 404,
"der": 780,
"di": 353,
"dir": 422,
"du": 379,
"dur": 870,
"dü": 750,
"dül": 787,
"dür": 709,
"dı": 299,
"dır": 376,
"dıģı": 684,
"dÄ±ÅŁ": 593,
"e": 69,
"eb": 529,
"ec": 433,
"ed": 319,
"ede": 901,
"eden": 697,
"edi": 955,
"edir": 635,
"ef": 685,
"eh": 505,
"ek": 291,
"eki": 493,
"ekt": 463,
"ektedir": 687,
"el": 283,
"eler": 484,
"eleri": 665,
"eli": 735,
"em": 320,
"en": 266,
"eni": 558,
"ent": 920,
"ep": 703,
"er": 260,
"erek": 965,
"eri": 374,
"erik": 599,
"erk": 496,
"erkez": 730,
"ers": 502,
"es": 290,
"esi": 372,
"esinde": 776,
"esine": 708,
"est": 633,
"et": 287,
"ev": 348,
"ey": 343,
"eye": 773,
"ez": 403,
"eç": 442,
"eÅŁ": 500,
"f": 70,
"ft": 714,
"g": 71,
"ge": 930,
"gi": 701,
"gil": 590,
"gro": 835,
"ground": 854,
"h": 72,
"han": 802,
"he": 492,
"ht": 1012,
"hur": 959,
"i": 73,
"ia": 808,
"ib": 530,
"ibi": 654,
"ic": 418,
"id": 498,
"if": 598,
"ig": 429,
"ign": 503,
"ih": 452,
"ik": 300,
"il": 270,
"ildi": 648,
"ilen": 621,
"iler": 451,
"ileri": 840,
"ili": 768,
"ilir": 606,
"ilm": 389,
"ilmiÅŁtir": 948,
"im": 318,
"imi": 798,
"in": 264,
"inal": 1005,
"inde": 363,
"inden": 589,
"ine": 370,
"ing": 604,
"ini": 398,
"inin": 423,
"ion": 823,
"ip": 471,
"ir": 271,
"is": 302,
"isi": 443,
"ist": 412,
"istan": 777,
"it": 328,
"iv": 544,
"ivers": 803,
"iversit": 833,
"iy": 314,
"iyas": 891,
"iyat": 800,
"iye": 508,
"iyet": 647,
"iyle": 671,
"iyon": 699,
"iz": 355,
"iç": 973,
"iÄŁ": 646,
"iÅŁ": 377,
"iÅŁtir": 510,
"j": 74,
"k": 75,
"kaynak": 499,
"kaynakça": 511,
"ken": 639,
"ket": 889,
"ki": 483,
"kin": 749,
"km": 1009,
"l": 76,
"la": 352,
"ladı": 764,
"lam": 460,
"lan": 317,
"lar": 276,
"lara": 931,
"larak": 395,
"larda": 885,
"ları": 344,
"ların": 578,
"larında": 848,
"larını": 960,
"lat": 986,
"laÅŁ": 681,
"le": 310,
"left": 821,
"lem": 675,
"len": 466,
"ler": 295,
"leri": 365,
"lerin": 742,
"lerini": 964,
"let": 527,
"ley": 939,
"leÅŁ": 632,
"li": 337,
"lik": 445,
"liÄŁ": 1018,
"liÄŁi": 795,
"lu": 397,
"luk": 1023,
"lü": 737,
"lı": 329,
"lık": 528,
"m": 77,
"ma": 438,
"mak": 457,
"maktadır": 683,
"man": 435,
"mas": 704,
"ması": 571,
"maya": 995,
"me": 600,
"mek": 887,
"mektedir": 868,
"men": 628,
"met": 733,
"mi": 847,
"miÅŁ": 910,
"miÅŁtir": 799,
"mÄ±ÅŁ": 698,
"mÄ±ÅŁtır": 572,
"n": 78,
"na": 748,
"nda": 1016,
"nde": 974,
"ne": 645,
"ngil": 822,
"ngiliz": 1017,
"nin": 553,
"nın": 574,
"o": 79,
"ob": 757,
"oc": 975,
"od": 614,
"og": 657,
"oj": 816,
"ok": 402,
"ol": 306,
"oloj": 894,
"om": 382,
"on": 288,
"op": 428,
"or": 307,
"os": 416,
"ot": 514,
"ov": 534,
"ow": 839,
"oy": 579,
"oz": 785,
"oÄŁ": 388,
"p": 80,
"par": 918,
"por": 850,
"q": 81,
"r": 82,
"ra": 437,
"raf": 782,
"ram": 820,
"ran": 725,
"rans": 758,
"re": 568,
"ren": 745,
"ret": 961,
"ri": 672,
"ro": 375,
"rup": 956,
"rı": 663,
"rıca": 827,
"s": 83,
"sa": 888,
"se": 979,
"sel": 660,
"si": 945,
"son": 990,
"sp": 573,
"span": 664,
"st": 368,
"stan": 834,
"stanbul": 932,
"ster": 813,
"t": 84,
"ta": 394,
"tadır": 582,
"tal": 753,
"tan": 659,
"tar": 829,
"te": 490,
"ter": 430,
"th": 650,
"ti": 383,
"tr": 682,
"türk": 924,
"tı": 399,
"tılar": 576,
"tır": 674,
"u": 85,
"ub": 581,
"uh": 862,
"uk": 555,
"ul": 312,
"um": 356,
"umhur": 1008,
"umlu": 677,
"un": 292,
"una": 890,
"und": 673,
"unda": 617,
"unu": 643,
"unun": 766,
"up": 474,
"ur": 308,
"us": 338,
"ust": 792,
"usu": 715,
"ut": 360,
"utbol": 472,
"uv": 805,
"uy": 804,
"uz": 545,
"uÅŁ": 517,
"uÅŁt": 613,
"uÅŁtur": 649,
"v": 86,
"van": 976,
"w": 87,
"x": 88,
"y": 89,
"ya": 347,
"yan": 866,
"ye": 967,
"yi": 836,
"yla": 562,
"yle": 767,
"yon": 489,
"yıl": 851,
"z": 90,
"{": 91,
"|": 92,
"|-": 591,
"}": 93,
"~": 94,
"¡": 95,
"¢": 96,
"£": 97,
"¤": 98,
"¥": 99,
"¦": 100,
"§": 101,
"¨": 102,
"©": 103,
"ª": 104,
"«": 105,
"¬": 106,
"®": 107,
"¯": 108,
"°": 109,
"±": 110,
"²": 111,
"³": 112,
"´": 113,
"µ": 114,
"¶": 115,
"·": 116,
"¸": 117,
"¹": 118,
"º": 119,
"»": 120,
"¼": 121,
"½": 122,
"¾": 123,
"¿": 124,
"À": 125,
"Á": 126,
"Â": 127,
"Âł": 577,
"Ã": 128,
"â": 658,
"ç": 272,
"ça": 462,
"çe": 761,
"çek": 906,
"çi": 830,
"é": 927,
"î": 694,
"ö": 280,
"öl": 497,
"ön": 425,
"ör": 515,
"öy": 542,
"öz": 736,
"ü": 261,
"üc": 853,
"üf": 778,
"ük": 385,
"ül": 386,
"ült": 1015,
"üm": 390,
"ün": 322,
"ünde": 977,
"üp": 949,
"ür": 321,
"ürk": 458,
"üs": 688,
"üst": 991,
"üt": 722,
"üy": 468,
"üz": 364,
"üç": 588,
"Ã¼ÅŁ": 597,
"Ä": 129,
"ı": 257,
"ık": 373,
"ıl": 304,
"ılan": 651,
"ıldı": 754,
"ılı": 794,
"ım": 371,
"ın": 269,
"ına": 424,
"ında": 333,
"ındaki": 883,
"ından": 409,
"ını": 410,
"ının": 477,
"ır": 325,
"ıs": 413,
"ısı": 627,
"ız": 481,
"Ä±ÅŁ": 369,
"Ä±ÅŁt": 475,
"Ä±ÅŁtır": 509,
"ÄŁ": 275,
"ÄŁi": 427,
"ÄŁu": 559,
"ģı": 447,
"Å": 130,
"ÅŁ": 263,
"ÅŁt": 332,
"ÅŁtir": 431,
"Æ": 131,
"Ç": 132,
"È": 133,
"É": 134,
"Ê": 135,
"Ë": 136,
"Ì": 137,
"Ìĩ": 340,
"Í": 138,
"Î": 139,
"Ï": 140,
"Ð": 141,
"Ñ": 142,
"Ò": 143,
"Ó": 144,
"Ô": 145,
"Õ": 146,
"Ö": 147,
"×": 148,
"Ø": 149,
"Ù": 150,
"Ú": 151,
"Û": 152,
"Ü": 153,
"Ý": 154,
"Þ": 155,
"ß": 156,
"à": 157,
"á": 158,
"â": 159,
"âĢ": 387,
"âĢĻ": 516,
"ã": 160,
"ä": 161,
"å": 162,
"æ": 163,
"ç": 164,
"è": 165,
"é": 166,
"ê": 167,
"ë": 168,
"ì": 169,
"í": 170,
"î": 171,
"ï": 172,
"ð": 173,
"ñ": 174,
"ò": 175,
"ó": 176,
"ô": 177,
"õ": 178,
"ö": 179,
"÷": 180,
"ø": 181,
"ù": 182,
"ú": 183,
"û": 184,
"ü": 185,
"ý": 186,
"þ": 187,
"ÿ": 188,
"Ā": 189,
"ā": 190,
"Ă": 191,
"ă": 192,
"Ą": 193,
"ą": 194,
"Ć": 195,
"ć": 196,
"Ĉ": 197,
"ĉ": 198,
"Ċ": 199,
"ċ": 200,
"Č": 201,
"č": 202,
"Ď": 203,
"ď": 204,
"Đ": 205,
"đ": 206,
"Ē": 207,
"ē": 208,
"Ĕ": 209,
"ĕ": 210,
"Ė": 211,
"ė": 212,
"Ę": 213,
"ę": 214,
"Ě": 215,
"ě": 216,
"Ĝ": 217,
"ĝ": 218,
"Ğ": 219,
"ğ": 220,
"Ġ": 221,
"Ġ\"": 439,
"Ġ%": 980,
"Ġ(": 316,
"Ġ-": 419,
"Ġ1": 311,
"Ġ18": 690,
"Ġ19": 350,
"Ġ196": 902,
"Ġ197": 815,
"Ġ198": 751,
"Ġ199": 636,
"Ġ2": 461,
"Ġ20": 359,
"Ġ200": 518,
"Ġ201": 495,
"Ġ3": 563,
"Ġ4": 695,
"Ġ5": 771,
"Ġ6": 857,
"Ġ7": 942,
"Ġ8": 1013,
"Ġa": 313,
"Ġab": 873,
"Ġad": 504,
"Ġadı": 875,
"Ġak": 584,
"Ġal": 323,
"Ġalan": 818,
"Ġalb": 824,
"Ġalbüm": 912,
"Ġalign": 547,
"Ġalt": 760,
"Ġam": 524,
"Ġamerik": 781,
"Ġan": 366,
"Ġancak": 880,
"Ġand": 981,
"Ġar": 358,
"Ġarasında": 763,
"Ġas": 556,
"Ġat": 587,
"Ġav": 700,
"Ġay": 507,
"Ġayn": 877,
"Ġaynı": 929,
"Ġaz": 893,
"Ġaç": 668,
"ĠaÄŁ": 796,
"ĠaÅŁ": 1021,
"Ġb": 262,
"Ġbak": 728,
"Ġbas": 841,
"ĠbaÄŁ": 421,
"ĠbaÄŁlan": 567,
"ĠbaÄŁlantılar": 603,
"ĠbaÄŁlı": 693,
"ĠbaÅŁ": 408,
"ĠbaÅŁk": 779,
"Ġbel": 557,
"Ġbelir": 978,
"Ġbil": 522,
"Ġbir": 303,
"ĠbirleÅŁ": 1006,
"Ġbirlik": 849,
"Ġbirlikte": 999,
"Ġboy": 993,
"Ġbu": 392,
"Ġbul": 482,
"Ġbulun": 549,
"Ġbulunan": 992,
"Ġböl": 552,
"Ġbölg": 744,
"Ġbölüm": 944,
"Ġbüy": 620,
"Ġbüyük": 662,
"Ġc": 324,
"Ġch": 743,
"Ġcol": 724,
"Ġcolspan": 838,
"Ġd": 273,
"Ġda": 476,
"Ġdah": 532,
"Ġdaha": 575,
"Ġde": 406,
"Ġden": 762,
"Ġder": 810,
"Ġdev": 546,
"Ġdevlet": 731,
"ĠdeÄŁ": 720,
"Ġdiz": 925,
"ĠdiÄŁ": 895,
"ĠdiÄŁer": 911,
"ĠdoÄŁ": 473,
"ĠdoÄŁumlu": 741,
"ĠdoÄŁumlular": 872,
"Ġdur": 926,
"Ġdön": 596,
"Ġdönem": 807,
"Ġdün": 723,
"Ġdünya": 884,
"Ġdüz": 855,
"ĠdÃ¼ÅŁ": 897,
"Ġe": 449,
"Ġed": 453,
"Ġek": 644,
"Ġel": 531,
"Ġen": 456,
"Ġer": 797,
"Ġerk": 982,
"Ġes": 607,
"Ġet": 444,
"Ġetti": 923,
"Ġev": 765,
"Ġey": 852,
"ĠeÄŁ": 938,
"Ġf": 305,
"Ġfilm": 551,
"Ġfor": 953,
"Ġfutbol": 478,
"Ġfutbolcu": 537,
"Ġfutbolcuları": 669,
"Ġg": 281,
"Ġgel": 501,
"Ġgen": 564,
"Ġgenel": 828,
"Ġger": 638,
"Ġgeç": 602,
"Ġgibi": 711,
"Ġgir": 837,
"Ġgr": 611,
"Ġgö": 367,
"Ġgör": 436,
"Ġgöre": 859,
"Ġgörev": 863,
"Ġgöster": 907,
"Ġgün": 585,
"Ġh": 298,
"Ġhak": 903,
"Ġhal": 634,
"Ġhar": 716,
"Ġhay": 817,
"Ġhaz": 941,
"Ġher": 793,
"Ġi": 289,
"Ġiki": 832,
"Ġil": 331,
"Ġile": 417,
"Ġilk": 622,
"Ġilç": 954,
"Ġin": 459,
"Ġins": 812,
"Ġis": 525,
"Ġise": 726,
"Ġist": 921,
"Ġiç": 393,
"Ġiçin": 446,
"ĠiÅŁ": 680,
"Ġj": 467,
"Ġk": 265,
"Ġkab": 971,
"Ġkad": 540,
"Ġkadar": 775,
"Ġkadın": 935,
"Ġkal": 569,
"Ġkan": 904,
"Ġkap": 879,
"Ġkar": 411,
"ĠkarÅŁ": 689,
"ĠkarÅŁÄ±": 1010,
"Ġkat": 629,
"Ġkay": 933,
"Ġkaz": 727,
"Ġkazan": 881,
"Ġken": 630,
"ĠkiÅŁ": 867,
"Ġkom": 772,
"Ġkon": 526,
"Ġkor": 876,
"Ġkr": 734,
"Ġkul": 519,
"Ġkullan": 583,
"Ġkur": 488,
"Ġkurul": 739,
"Ġkö": 1003,
"Ġköy": 856,
"Ġkıs": 966,
"Ġl": 420,
"Ġle": 811,
"Ġli": 625,
"Ġm": 284,
"Ġmad": 928,
"Ġmah": 814,
"Ġmar": 667,
"Ġmaç": 994,
"Ġmerkez": 747,
"Ġmet": 972,
"Ġmil": 618,
"Ġmüz": 791,
"Ġn": 346,
"Ġne": 970,
"Ġneden": 989,
"Ġo": 279,
"Ġof": 732,
"Ġok": 908,
"Ġol": 342,
"Ġolan": 480,
"Ġolarak": 407,
"Ġoldu": 491,
"ĠolduÄŁu": 922,
"Ġolm": 806,
"Ġon": 608,
"Ġor": 454,
"Ġort": 899,
"Ġos": 1014,
"Ġoy": 464,
"Ġoyun": 560,
"Ġoyuncu": 898,
"Ġp": 309,
"Ġpar": 465,
"Ġpart": 844,
"Ġpartisi": 1002,
"Ġpol": 996,
"Ġpro": 619,
"Ġr": 330,
"Ġres": 915,
"Ġrom": 940,
"Ġs": 267,
"Ġsah": 595,
"Ġsahip": 819,
"Ġsal": 952,
"Ġsan": 655,
"Ġsav": 652,
"ĠsavaÅŁ": 774,
"Ġsay": 712,
"ĠsaÄŁ": 878,
"Ġser": 825,
"Ġsez": 909,
"Ġsezon": 946,
"Ġseç": 786,
"Ġson": 405,
"Ġsonra": 554,
"Ġsonu": 947,
"Ġst": 521,
"Ġstyle": 984,
"Ġsu": 983,
"Ġsür": 605,
"Ġsır": 686,
"Ġt": 278,
"Ġtak": 601,
"Ġtakım": 678,
"Ġtam": 943,
"Ġtan": 713,
"Ġtar": 384,
"Ġtaraf": 494,
"Ġtarafından": 523,
"Ġtarih": 565,
"Ġtarihinde": 1004,
"ĠtaÅŁ": 985,
"Ġtek": 631,
"Ġtel": 1001,
"Ġtem": 707,
"Ġthe": 616,
"Ġtop": 594,
"Ġtur": 963,
"Ġtür": 934,
"Ġtürk": 543,
"Ġtürkiye": 913,
"Ġu": 426,
"Ġul": 710,
"Ġulus": 987,
"Ġuy": 755,
"Ġuz": 691,
"Ġv": 282,
"Ġvar": 729,
"Ġve": 294,
"Ġver": 548,
"Ġveya": 769,
"Ġw": 448,
"Ġy": 274,
"Ġyak": 740,
"Ġyan": 988,
"Ġyap": 414,
"Ġyaptı": 1020,
"Ġyar": 550,
"Ġyay": 612,
"Ġyayın": 861,
"Ġyaz": 570,
"ĠyaÅŁ": 641,
"Ġyeni": 746,
"Ġyer": 470,
"Ġyol": 882,
"Ġyön": 609,
"Ġyönet": 790,
"Ġyük": 783,
"Ġyüz": 656,
"Ġyüzyıl": 905,
"Ġyıl": 381,
"Ġyılında": 520,
"Ġz": 441,
"Ġzaman": 843,
"Ġ|": 506,
"Ġ||": 789,
"Ġç": 326,
"Ġçal": 719,
"ĠçalÄ±ÅŁ": 784,
"Ġçek": 951,
"Ġçok": 718,
"Ġçık": 623,
"Ġö": 335,
"Ġödül": 869,
"Ġöl": 696,
"Ġön": 513,
"Ġör": 1019,
"Ġöz": 624,
"Ġözel": 860,
"ĠÃ¶ÄŁ": 886,
"Ġül": 916,
"Ġülk": 1022,
"Ġün": 752,
"Ġüniversit": 900,
"Ġür": 917,
"Ġüy": 1000,
"Ġüz": 626,
"Ġüzer": 642,
"Ġüç": 937,
"ĠÅŁ": 351,
"ĠÅŁark": 702,
"ĠÅŁarkı": 809,
"ĠÅŁeh": 721,
"ĠÅŁek": 896,
"ĠâĢ": 653,
"ĠâĢĵ": 969,
"ĠĊ": 354,
"ĠĊĊ": 998,
"ĠĠ": 533,
"ĠĠĠĠ": 962,
"ġ": 222,
"Ģ": 223,
"ģ": 224,
"Ĥ": 225,
"ĥ": 226,
"Ħ": 227,
"ħ": 228,
"Ĩ": 229,
"ĩ": 230,
"Ī": 231,
"ī": 232,
"Ĭ": 233,
"ĭ": 234,
"Į": 235,
"į": 236,
"İ": 237,
"ı": 238,
"IJ": 239,
"ij": 240,
"Ĵ": 241,
"ĵ": 242,
"Ķ": 243,
"ķ": 244,
"ĸ": 245,
"Ĺ": 246,
"ĺ": 247,
"Ļ": 248,
"ļ": 249,
"Ľ": 250,
"ľ": 251,
"Ŀ": 252,
"ŀ": 253,
"Ł": 254,
"ł": 255,
"Ń": 256
}