seny1004's picture
Upload tokenizer
201bf9c
raw
history blame contribute delete
No virus
16.3 kB
{
"kor": {
"[PAD]": 1020,
"[UNK]": 1019,
"|": 0,
" ": 1,
"가": 2,
"각": 3,
"간": 4,
"갈": 5,
"감": 6,
"갑": 7,
"값": 8,
"갓": 9,
"갔": 10,
"강": 11,
"갖": 12,
"같": 13,
"갚": 14,
"개": 15,
"객": 16,
"갱": 17,
"걀": 18,
"걔": 19,
"거": 20,
"걱": 21,
"건": 22,
"걷": 23,
"걸": 24,
"검": 25,
"겁": 26,
"것": 27,
"겉": 28,
"게": 29,
"겐": 30,
"겠": 31,
"겨": 32,
"격": 33,
"겪": 34,
"견": 35,
"결": 36,
"겸": 37,
"겹": 38,
"겼": 39,
"경": 40,
"곁": 41,
"계": 42,
"곈": 43,
"고": 44,
"곡": 45,
"곤": 46,
"곧": 47,
"골": 48,
"곳": 49,
"공": 50,
"과": 51,
"관": 52,
"광": 53,
"괜": 54,
"괴": 55,
"굉": 56,
"교": 57,
"굣": 58,
"구": 59,
"국": 60,
"군": 61,
"굳": 62,
"굴": 63,
"굶": 64,
"궁": 65,
"권": 66,
"귀": 67,
"귄": 68,
"귈": 69,
"규": 70,
"균": 71,
"그": 72,
"극": 73,
"근": 74,
"글": 75,
"금": 76,
"급": 77,
"긋": 78,
"기": 79,
"긴": 80,
"길": 81,
"김": 82,
"깁": 83,
"깃": 84,
"깊": 85,
"까": 86,
"깎": 87,
"깐": 88,
"깜": 89,
"깝": 90,
"깡": 91,
"깥": 92,
"깨": 93,
"깰": 94,
"깼": 95,
"꺼": 96,
"껄": 97,
"껍": 98,
"껏": 99,
"껐": 100,
"께": 101,
"껴": 102,
"꼈": 103,
"꼬": 104,
"꼭": 105,
"꼴": 106,
"꼼": 107,
"꼽": 108,
"꽃": 109,
"꽉": 110,
"꽤": 111,
"꾀": 112,
"꾸": 113,
"꾼": 114,
"꿀": 115,
"꿇": 116,
"꿈": 117,
"꿍": 118,
"꿎": 119,
"꿔": 120,
"뀌": 121,
"뀐": 122,
"뀔": 123,
"끄": 124,
"끈": 125,
"끊": 126,
"끌": 127,
"끓": 128,
"끔": 129,
"끗": 130,
"끝": 131,
"끼": 132,
"낀": 133,
"낄": 134,
"낌": 135,
"나": 136,
"낙": 137,
"낚": 138,
"난": 139,
"날": 140,
"남": 141,
"납": 142,
"낫": 143,
"났": 144,
"낭": 145,
"낮": 146,
"낯": 147,
"낳": 148,
"내": 149,
"낸": 150,
"낼": 151,
"냄": 152,
"냈": 153,
"냉": 154,
"냐": 155,
"냥": 156,
"너": 157,
"넉": 158,
"넋": 159,
"널": 160,
"넓": 161,
"넘": 162,
"넣": 163,
"네": 164,
"넥": 165,
"넨": 166,
"넷": 167,
"녀": 168,
"녁": 169,
"년": 170,
"념": 171,
"녔": 172,
"노": 173,
"녹": 174,
"논": 175,
"놀": 176,
"놈": 177,
"농": 178,
"높": 179,
"놓": 180,
"놔": 181,
"놨": 182,
"뇌": 183,
"뇨": 184,
"누": 185,
"눈": 186,
"눌": 187,
"눕": 188,
"눠": 189,
"눴": 190,
"뉴": 191,
"느": 192,
"는": 193,
"늘": 194,
"늙": 195,
"능": 196,
"늦": 197,
"니": 198,
"닌": 199,
"닐": 200,
"님": 201,
"닝": 202,
"다": 203,
"닥": 204,
"닦": 205,
"단": 206,
"닫": 207,
"달": 208,
"닮": 209,
"담": 210,
"답": 211,
"닷": 212,
"당": 213,
"닿": 214,
"대": 215,
"댁": 216,
"댄": 217,
"댔": 218,
"더": 219,
"덕": 220,
"던": 221,
"덜": 222,
"덟": 223,
"덤": 224,
"덧": 225,
"덩": 226,
"덮": 227,
"데": 228,
"델": 229,
"도": 230,
"독": 231,
"돈": 232,
"돋": 233,
"돌": 234,
"돔": 235,
"돕": 236,
"동": 237,
"돼": 238,
"됐": 239,
"되": 240,
"된": 241,
"될": 242,
"두": 243,
"둑": 244,
"둔": 245,
"둘": 246,
"둥": 247,
"둬": 248,
"뒀": 249,
"뒤": 250,
"뒷": 251,
"드": 252,
"득": 253,
"든": 254,
"듣": 255,
"들": 256,
"듬": 257,
"듯": 258,
"등": 259,
"디": 260,
"딜": 261,
"딧": 262,
"딩": 263,
"딪": 264,
"따": 265,
"딱": 266,
"딴": 267,
"딸": 268,
"땀": 269,
"땅": 270,
"때": 271,
"땐": 272,
"땡": 273,
"떠": 274,
"떡": 275,
"떤": 276,
"떨": 277,
"떴": 278,
"떻": 279,
"떼": 280,
"또": 281,
"똑": 282,
"똥": 283,
"뚜": 284,
"뚝": 285,
"뚱": 286,
"뛰": 287,
"뜨": 288,
"뜩": 289,
"뜬": 290,
"뜻": 291,
"띠": 292,
"라": 293,
"락": 294,
"란": 295,
"랄": 296,
"람": 297,
"랍": 298,
"랐": 299,
"랑": 300,
"랗": 301,
"래": 302,
"랜": 303,
"랫": 304,
"랬": 305,
"랭": 306,
"랴": 307,
"량": 308,
"러": 309,
"럭": 310,
"런": 311,
"럴": 312,
"럼": 313,
"럽": 314,
"렀": 315,
"렁": 316,
"렇": 317,
"레": 318,
"렜": 319,
"려": 320,
"력": 321,
"련": 322,
"렴": 323,
"렵": 324,
"렸": 325,
"령": 326,
"례": 327,
"로": 328,
"록": 329,
"론": 330,
"롤": 331,
"롭": 332,
"롱": 333,
"뢰": 334,
"료": 335,
"루": 336,
"룩": 337,
"룬": 338,
"룰": 339,
"룸": 340,
"뤄": 341,
"뤘": 342,
"류": 343,
"륜": 344,
"률": 345,
"륭": 346,
"르": 347,
"른": 348,
"를": 349,
"름": 350,
"릇": 351,
"릎": 352,
"리": 353,
"린": 354,
"릴": 355,
"림": 356,
"립": 357,
"릿": 358,
"링": 359,
"마": 360,
"막": 361,
"만": 362,
"많": 363,
"말": 364,
"맘": 365,
"맙": 366,
"맛": 367,
"망": 368,
"맞": 369,
"맡": 370,
"맣": 371,
"매": 372,
"맥": 373,
"맨": 374,
"맹": 375,
"머": 376,
"먹": 377,
"먼": 378,
"멀": 379,
"멈": 380,
"멋": 381,
"멍": 382,
"멎": 383,
"메": 384,
"멘": 385,
"며": 386,
"멱": 387,
"면": 388,
"멸": 389,
"명": 390,
"몇": 391,
"모": 392,
"목": 393,
"몫": 394,
"몰": 395,
"몸": 396,
"몹": 397,
"못": 398,
"묘": 399,
"무": 400,
"묵": 401,
"묶": 402,
"문": 403,
"묻": 404,
"물": 405,
"뭇": 406,
"뭉": 407,
"뭐": 408,
"뭔": 409,
"뭘": 410,
"뮤": 411,
"므": 412,
"미": 413,
"민": 414,
"믿": 415,
"밀": 416,
"밉": 417,
"밌": 418,
"밑": 419,
"바": 420,
"박": 421,
"밖": 422,
"반": 423,
"받": 424,
"발": 425,
"밝": 426,
"밟": 427,
"밤": 428,
"밥": 429,
"방": 430,
"밭": 431,
"배": 432,
"백": 433,
"밴": 434,
"뱃": 435,
"뱉": 436,
"버": 437,
"번": 438,
"벌": 439,
"범": 440,
"법": 441,
"벗": 442,
"베": 443,
"벤": 444,
"벨": 445,
"벳": 446,
"벼": 447,
"벽": 448,
"변": 449,
"별": 450,
"볍": 451,
"병": 452,
"볕": 453,
"보": 454,
"복": 455,
"볶": 456,
"본": 457,
"볼": 458,
"봄": 459,
"봇": 460,
"봉": 461,
"봐": 462,
"봤": 463,
"뵙": 464,
"부": 465,
"북": 466,
"분": 467,
"불": 468,
"붉": 469,
"붓": 470,
"붙": 471,
"뷔": 472,
"브": 473,
"블": 474,
"비": 475,
"빈": 476,
"빌": 477,
"빚": 478,
"빠": 479,
"빡": 480,
"빤": 481,
"빨": 482,
"빴": 483,
"빵": 484,
"빼": 485,
"뺏": 486,
"뺐": 487,
"뺨": 488,
"뻐": 489,
"뻑": 490,
"뻔": 491,
"뻘": 492,
"뻣": 493,
"뻤": 494,
"뼈": 495,
"뽀": 496,
"뽑": 497,
"뿌": 498,
"뿐": 499,
"쁘": 500,
"쁜": 501,
"쁠": 502,
"삐": 503,
"사": 504,
"삭": 505,
"산": 506,
"살": 507,
"삶": 508,
"삼": 509,
"삽": 510,
"삿": 511,
"샀": 512,
"상": 513,
"새": 514,
"색": 515,
"샌": 516,
"샘": 517,
"생": 518,
"샤": 519,
"서": 520,
"석": 521,
"섞": 522,
"선": 523,
"설": 524,
"섭": 525,
"섯": 526,
"섰": 527,
"성": 528,
"세": 529,
"섹": 530,
"센": 531,
"셋": 532,
"셌": 533,
"셔": 534,
"션": 535,
"셨": 536,
"소": 537,
"속": 538,
"손": 539,
"솔": 540,
"솜": 541,
"송": 542,
"쇄": 543,
"쇼": 544,
"수": 545,
"숙": 546,
"순": 547,
"술": 548,
"숨": 549,
"숫": 550,
"숭": 551,
"숴": 552,
"쉈": 553,
"쉬": 554,
"쉰": 555,
"쉴": 556,
"쉼": 557,
"쉽": 558,
"슐": 559,
"스": 560,
"슥": 561,
"슨": 562,
"슬": 563,
"슴": 564,
"습": 565,
"슷": 566,
"승": 567,
"시": 568,
"식": 569,
"신": 570,
"실": 571,
"싫": 572,
"심": 573,
"십": 574,
"싱": 575,
"싶": 576,
"싸": 577,
"싹": 578,
"싼": 579,
"쌀": 580,
"쌍": 581,
"쌓": 582,
"써": 583,
"썩": 584,
"썰": 585,
"썼": 586,
"썽": 587,
"쎄": 588,
"쏘": 589,
"쏟": 590,
"쑤": 591,
"쑥": 592,
"쓰": 593,
"쓱": 594,
"쓴": 595,
"쓸": 596,
"씀": 597,
"씁": 598,
"씨": 599,
"씩": 600,
"씬": 601,
"씹": 602,
"씻": 603,
"아": 604,
"악": 605,
"안": 606,
"앉": 607,
"않": 608,
"알": 609,
"앓": 610,
"암": 611,
"압": 612,
"앗": 613,
"았": 614,
"앞": 615,
"애": 616,
"액": 617,
"야": 618,
"약": 619,
"얄": 620,
"양": 621,
"얕": 622,
"얘": 623,
"어": 624,
"억": 625,
"언": 626,
"얹": 627,
"얻": 628,
"얼": 629,
"엄": 630,
"업": 631,
"없": 632,
"엇": 633,
"었": 634,
"엉": 635,
"엊": 636,
"엎": 637,
"에": 638,
"엑": 639,
"엔": 640,
"엘": 641,
"엠": 642,
"여": 643,
"역": 644,
"연": 645,
"열": 646,
"염": 647,
"였": 648,
"영": 649,
"옆": 650,
"예": 651,
"옛": 652,
"오": 653,
"옥": 654,
"온": 655,
"올": 656,
"옮": 657,
"옳": 658,
"옷": 659,
"와": 660,
"완": 661,
"왔": 662,
"왕": 663,
"왜": 664,
"왠": 665,
"외": 666,
"요": 667,
"욕": 668,
"용": 669,
"우": 670,
"욱": 671,
"운": 672,
"울": 673,
"움": 674,
"웃": 675,
"웅": 676,
"워": 677,
"원": 678,
"월": 679,
"웠": 680,
"웬": 681,
"위": 682,
"윗": 683,
"유": 684,
"육": 685,
"윤": 686,
"율": 687,
"윳": 688,
"융": 689,
"으": 690,
"은": 691,
"을": 692,
"음": 693,
"응": 694,
"의": 695,
"이": 696,
"익": 697,
"인": 698,
"일": 699,
"읽": 700,
"잃": 701,
"임": 702,
"입": 703,
"잇": 704,
"있": 705,
"잉": 706,
"잊": 707,
"자": 708,
"작": 709,
"잔": 710,
"잖": 711,
"잘": 712,
"잠": 713,
"잡": 714,
"잣": 715,
"잤": 716,
"장": 717,
"잦": 718,
"재": 719,
"쟁": 720,
"저": 721,
"적": 722,
"전": 723,
"절": 724,
"젊": 725,
"점": 726,
"접": 727,
"정": 728,
"제": 729,
"젝": 730,
"젠": 731,
"젯": 732,
"져": 733,
"졌": 734,
"조": 735,
"족": 736,
"존": 737,
"졸": 738,
"좀": 739,
"좁": 740,
"종": 741,
"좋": 742,
"좌": 743,
"죄": 744,
"주": 745,
"죽": 746,
"준": 747,
"줄": 748,
"중": 749,
"줗": 750,
"줘": 751,
"줬": 752,
"쥐": 753,
"즈": 754,
"즉": 755,
"즐": 756,
"즘": 757,
"증": 758,
"지": 759,
"직": 760,
"진": 761,
"질": 762,
"짐": 763,
"집": 764,
"짓": 765,
"징": 766,
"짖": 767,
"짚": 768,
"짜": 769,
"짝": 770,
"짢": 771,
"짧": 772,
"짱": 773,
"째": 774,
"쩌": 775,
"쩍": 776,
"쩐": 777,
"쩔": 778,
"쩠": 779,
"쩡": 780,
"쪘": 781,
"쪽": 782,
"쫄": 783,
"쫓": 784,
"쭈": 785,
"쭘": 786,
"쯤": 787,
"찌": 788,
"찍": 789,
"찜": 790,
"찝": 791,
"찢": 792,
"차": 793,
"착": 794,
"찬": 795,
"찮": 796,
"찰": 797,
"참": 798,
"찼": 799,
"창": 800,
"찾": 801,
"채": 802,
"책": 803,
"챘": 804,
"챙": 805,
"처": 806,
"척": 807,
"천": 808,
"철": 809,
"첨": 810,
"첩": 811,
"첫": 812,
"청": 813,
"체": 814,
"쳐": 815,
"쳤": 816,
"초": 817,
"촉": 818,
"촌": 819,
"총": 820,
"최": 821,
"추": 822,
"축": 823,
"춘": 824,
"출": 825,
"춤": 826,
"충": 827,
"춰": 828,
"췄": 829,
"췌": 830,
"취": 831,
"츠": 832,
"측": 833,
"층": 834,
"치": 835,
"친": 836,
"칠": 837,
"침": 838,
"칫": 839,
"칭": 840,
"카": 841,
"캄": 842,
"캐": 843,
"캠": 844,
"커": 845,
"컥": 846,
"컨": 847,
"컬": 848,
"컴": 849,
"컵": 850,
"컸": 851,
"케": 852,
"켓": 853,
"켜": 854,
"켰": 855,
"코": 856,
"콘": 857,
"콜": 858,
"콱": 859,
"쾌": 860,
"큐": 861,
"크": 862,
"큰": 863,
"클": 864,
"큼": 865,
"키": 866,
"킨": 867,
"킬": 868,
"킷": 869,
"타": 870,
"탁": 871,
"탄": 872,
"탈": 873,
"탐": 874,
"탓": 875,
"탔": 876,
"탕": 877,
"태": 878,
"택": 879,
"터": 880,
"턱": 881,
"턴": 882,
"털": 883,
"텃": 884,
"텅": 885,
"테": 886,
"텐": 887,
"텔": 888,
"템": 889,
"텨": 890,
"텼": 891,
"토": 892,
"톡": 893,
"톤": 894,
"톨": 895,
"톱": 896,
"통": 897,
"퇴": 898,
"투": 899,
"툭": 900,
"툰": 901,
"툴": 902,
"툼": 903,
"퉈": 904,
"퉜": 905,
"튀": 906,
"트": 907,
"특": 908,
"튼": 909,
"틀": 910,
"틈": 911,
"티": 912,
"틱": 913,
"틸": 914,
"팀": 915,
"팅": 916,
"파": 917,
"판": 918,
"팔": 919,
"팠": 920,
"팡": 921,
"패": 922,
"팬": 923,
"팽": 924,
"퍼": 925,
"펐": 926,
"펑": 927,
"페": 928,
"펜": 929,
"펴": 930,
"편": 931,
"펼": 932,
"평": 933,
"폐": 934,
"포": 935,
"폭": 936,
"폰": 937,
"표": 938,
"푸": 939,
"푹": 940,
"푼": 941,
"풀": 942,
"품": 943,
"풍": 944,
"퓨": 945,
"프": 946,
"픈": 947,
"플": 948,
"픔": 949,
"피": 950,
"픽": 951,
"핀": 952,
"필": 953,
"핍": 954,
"핑": 955,
"하": 956,
"학": 957,
"한": 958,
"할": 959,
"함": 960,
"합": 961,
"항": 962,
"해": 963,
"핵": 964,
"핸": 965,
"햄": 966,
"했": 967,
"행": 968,
"향": 969,
"허": 970,
"헌": 971,
"험": 972,
"헛": 973,
"헤": 974,
"헬": 975,
"혀": 976,
"현": 977,
"혈": 978,
"혐": 979,
"협": 980,
"혔": 981,
"형": 982,
"혜": 983,
"호": 984,
"혹": 985,
"혼": 986,
"홀": 987,
"홈": 988,
"화": 989,
"확": 990,
"환": 991,
"활": 992,
"홧": 993,
"황": 994,
"회": 995,
"획": 996,
"횟": 997,
"횡": 998,
"효": 999,
"후": 1000,
"훈": 1001,
"훌": 1002,
"훔": 1003,
"훨": 1004,
"휘": 1005,
"휠": 1006,
"휩": 1007,
"휴": 1008,
"흐": 1009,
"흔": 1010,
"흘": 1011,
"흡": 1012,
"흥": 1013,
"희": 1014,
"히": 1015,
"힌": 1016,
"힐": 1017,
"힘": 1018
}
}