jaeyeon's picture
add tokenizer
e5f25c2
{
"[PAD]": 529,
"[UNK]": 528,
"|": 0,
"가": 1,
"각": 2,
"간": 3,
"갈": 4,
"감": 5,
"갑": 6,
"값": 7,
"강": 8,
"갖": 9,
"같": 10,
"개": 11,
"거": 12,
"걱": 13,
"건": 14,
"걸": 15,
"겁": 16,
"것": 17,
"게": 18,
"겠": 19,
"겨": 20,
"격": 21,
"결": 22,
"겹": 23,
"경": 24,
"계": 25,
"고": 26,
"곡": 27,
"곱": 28,
"공": 29,
"과": 30,
"관": 31,
"괄": 32,
"괜": 33,
"굉": 34,
"교": 35,
"구": 36,
"국": 37,
"궤": 38,
"규": 39,
"그": 40,
"극": 41,
"근": 42,
"글": 43,
"금": 44,
"기": 45,
"긴": 46,
"길": 47,
"김": 48,
"깁": 49,
"깊": 50,
"까": 51,
"깐": 52,
"꼭": 53,
"꼴": 54,
"꾸": 55,
"꿰": 56,
"뀌": 57,
"뀐": 58,
"끈": 59,
"끊": 60,
"끝": 61,
"끼": 62,
"낌": 63,
"나": 64,
"낙": 65,
"난": 66,
"날": 67,
"납": 68,
"났": 69,
"내": 70,
"냈": 71,
"냐": 72,
"냥": 73,
"너": 74,
"넓": 75,
"넘": 76,
"넣": 77,
"네": 78,
"년": 79,
"념": 80,
"노": 81,
"논": 82,
"높": 83,
"놓": 84,
"놨": 85,
"누": 86,
"눈": 87,
"눠": 88,
"뉜": 89,
"뉴": 90,
"느": 91,
"는": 92,
"늘": 93,
"능": 94,
"니": 95,
"닌": 96,
"닐": 97,
"님": 98,
"닙": 99,
"다": 100,
"단": 101,
"달": 102,
"닮": 103,
"담": 104,
"답": 105,
"당": 106,
"대": 107,
"댈": 108,
"댓": 109,
"더": 110,
"던": 111,
"덟": 112,
"데": 113,
"도": 114,
"독": 115,
"돌": 116,
"동": 117,
"돼": 118,
"됐": 119,
"되": 120,
"된": 121,
"될": 122,
"됩": 123,
"두": 124,
"둘": 125,
"둥": 126,
"뒤": 127,
"드": 128,
"든": 129,
"듣": 130,
"들": 131,
"듬": 132,
"듭": 133,
"등": 134,
"디": 135,
"따": 136,
"딱": 137,
"때": 138,
"떠": 139,
"떤": 140,
"떨": 141,
"떻": 142,
"또": 143,
"똑": 144,
"똥": 145,
"뚱": 146,
"뜻": 147,
"라": 148,
"란": 149,
"랄": 150,
"람": 151,
"랍": 152,
"랑": 153,
"래": 154,
"랜": 155,
"략": 156,
"량": 157,
"러": 158,
"런": 159,
"럴": 160,
"럼": 161,
"럽": 162,
"렇": 163,
"레": 164,
"려": 165,
"력": 166,
"련": 167,
"렵": 168,
"렸": 169,
"령": 170,
"례": 171,
"로": 172,
"론": 173,
"롭": 174,
"료": 175,
"루": 176,
"류": 177,
"률": 178,
"륨": 179,
"륭": 180,
"르": 181,
"륵": 182,
"른": 183,
"를": 184,
"름": 185,
"릅": 186,
"리": 187,
"릭": 188,
"린": 189,
"림": 190,
"립": 191,
"마": 192,
"막": 193,
"만": 194,
"많": 195,
"말": 196,
"맘": 197,
"맞": 198,
"맣": 199,
"맥": 200,
"머": 201,
"먹": 202,
"먼": 203,
"멈": 204,
"메": 205,
"멘": 206,
"멩": 207,
"면": 208,
"명": 209,
"몇": 210,
"모": 211,
"목": 212,
"몰": 213,
"못": 214,
"무": 215,
"문": 216,
"물": 217,
"뭉": 218,
"뭐": 219,
"뭘": 220,
"뭡": 221,
"미": 222,
"민": 223,
"밀": 224,
"바": 225,
"밖": 226,
"반": 227,
"받": 228,
"발": 229,
"방": 230,
"배": 231,
"백": 232,
"버": 233,
"번": 234,
"벌": 235,
"범": 236,
"벡": 237,
"벽": 238,
"변": 239,
"별": 240,
"볍": 241,
"보": 242,
"복": 243,
"본": 244,
"볼": 245,
"봐": 246,
"봤": 247,
"부": 248,
"분": 249,
"불": 250,
"붓": 251,
"붙": 252,
"비": 253,
"빠": 254,
"빼": 255,
"뺏": 256,
"뿔": 257,
"사": 258,
"산": 259,
"살": 260,
"삼": 261,
"상": 262,
"새": 263,
"색": 264,
"생": 265,
"서": 266,
"석": 267,
"선": 268,
"설": 269,
"섯": 270,
"성": 271,
"세": 272,
"셨": 273,
"소": 274,
"속": 275,
"손": 276,
"수": 277,
"숙": 278,
"순": 279,
"숫": 280,
"쉬": 281,
"슈": 282,
"슉": 283,
"스": 284,
"습": 285,
"슷": 286,
"시": 287,
"식": 288,
"신": 289,
"실": 290,
"심": 291,
"십": 292,
"싶": 293,
"싹": 294,
"쌍": 295,
"써": 296,
"쏟": 297,
"쓰": 298,
"쓴": 299,
"쓸": 300,
"씌": 301,
"씨": 302,
"씩": 303,
"씬": 304,
"아": 305,
"안": 306,
"않": 307,
"알": 308,
"았": 309,
"앞": 310,
"애": 311,
"야": 312,
"약": 313,
"얇": 314,
"양": 315,
"얕": 316,
"얘": 317,
"어": 318,
"억": 319,
"언": 320,
"얼": 321,
"엄": 322,
"업": 323,
"없": 324,
"었": 325,
"에": 326,
"엑": 327,
"엔": 328,
"엠": 329,
"여": 330,
"역": 331,
"연": 332,
"열": 333,
"였": 334,
"영": 335,
"예": 336,
"오": 337,
"온": 338,
"올": 339,
"옮": 340,
"옷": 341,
"와": 342,
"완": 343,
"왔": 344,
"왜": 345,
"외": 346,
"왼": 347,
"요": 348,
"용": 349,
"우": 350,
"운": 351,
"울": 352,
"움": 353,
"웁": 354,
"워": 355,
"원": 356,
"웠": 357,
"위": 358,
"유": 359,
"육": 360,
"율": 361,
"으": 362,
"은": 363,
"을": 364,
"음": 365,
"응": 366,
"의": 367,
"이": 368,
"익": 369,
"인": 370,
"일": 371,
"읽": 372,
"임": 373,
"입": 374,
"있": 375,
"잊": 376,
"자": 377,
"작": 378,
"잖": 379,
"잘": 380,
"잠": 381,
"잡": 382,
"장": 383,
"재": 384,
"저": 385,
"적": 386,
"전": 387,
"절": 388,
"점": 389,
"접": 390,
"정": 391,
"제": 392,
"져": 393,
"졌": 394,
"조": 395,
"족": 396,
"존": 397,
"졸": 398,
"좀": 399,
"종": 400,
"좋": 401,
"좌": 402,
"죠": 403,
"주": 404,
"준": 405,
"줄": 406,
"중": 407,
"증": 408,
"지": 409,
"직": 410,
"진": 411,
"질": 412,
"짐": 413,
"집": 414,
"짓": 415,
"징": 416,
"짜": 417,
"짝": 418,
"짧": 419,
"짱": 420,
"째": 421,
"쨌": 422,
"쩜": 423,
"쪼": 424,
"쪽": 425,
"쫙": 426,
"쭉": 427,
"쯤": 428,
"찌": 429,
"찍": 430,
"차": 431,
"찮": 432,
"참": 433,
"채": 434,
"책": 435,
"처": 436,
"천": 437,
"철": 438,
"첫": 439,
"청": 440,
"체": 441,
"쳤": 442,
"초": 443,
"총": 444,
"최": 445,
"추": 446,
"축": 447,
"출": 448,
"충": 449,
"춰": 450,
"치": 451,
"칙": 452,
"친": 453,
"칠": 454,
"침": 455,
"카": 456,
"칸": 457,
"커": 458,
"컴": 459,
"케": 460,
"코": 461,
"퀄": 462,
"크": 463,
"큰": 464,
"클": 465,
"큼": 466,
"킬": 467,
"타": 468,
"태": 469,
"택": 470,
"터": 471,
"테": 472,
"텐": 473,
"토": 474,
"통": 475,
"투": 476,
"트": 477,
"특": 478,
"튼": 479,
"틀": 480,
"티": 481,
"파": 482,
"팔": 483,
"페": 484,
"펴": 485,
"편": 486,
"펼": 487,
"폅": 488,
"평": 489,
"포": 490,
"폴": 491,
"표": 492,
"푸": 493,
"푼": 494,
"풀": 495,
"프": 496,
"플": 497,
"피": 498,
"필": 499,
"하": 500,
"학": 501,
"한": 502,
"할": 503,
"함": 504,
"합": 505,
"항": 506,
"해": 507,
"했": 508,
"행": 509,
"험": 510,
"혀": 511,
"현": 512,
"형": 513,
"호": 514,
"화": 515,
"확": 516,
"환": 517,
"활": 518,
"회": 519,
"획": 520,
"효": 521,
"후": 522,
"훌": 523,
"훨": 524,
"휘": 525,
"히": 526,
"힘": 527
}