zpn's picture
Upload tokenizer
7305e12
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[CLS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[SEP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "[PAD]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "[MASK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "WhitespaceSplit"
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"[CLS]": {
"id": "[CLS]",
"ids": [
1
],
"tokens": [
"[CLS]"
]
},
"[SEP]": {
"id": "[SEP]",
"ids": [
2
],
"tokens": [
"[SEP]"
]
}
}
},
"decoder": null,
"model": {
"type": "WordLevel",
"vocab": {
"[UNK]": 0,
"[CLS]": 1,
"[SEP]": 2,
"[PAD]": 3,
"[MASK]": 4,
"[C]": 5,
"[=C]": 6,
"[Ring1]": 7,
"[Branch1]": 8,
"[=Branch1]": 9,
"[N]": 10,
"[O]": 11,
"[=O]": 12,
"[Ring2]": 13,
"[Branch2]": 14,
"[=N]": 15,
"[S]": 16,
"[#Branch1]": 17,
"[=Branch2]": 18,
"[F]": 19,
"[#Branch2]": 20,
"[C@H1]": 21,
"[C@@H1]": 22,
"[Cl]": 23,
"[#C]": 24,
".": 25,
"[P]": 26,
"[/C]": 27,
"[=Ring1]": 28,
"[NH1]": 29,
"[Br]": 30,
"[\\C]": 31,
"[O-1]": 32,
"[N+1]": 33,
"[#N]": 34,
"[/N]": 35,
"[C@]": 36,
"[=S]": 37,
"[C@@]": 38,
"[=Ring2]": 39,
"[2H]": 40,
"[Si]": 41,
"[I]": 42,
"[=N+1]": 43,
"[\\N]": 44,
"[C-1]": 45,
"[B]": 46,
"[H]": 47,
"[NH1+1]": 48,
"[=C-1]": 49,
"[/O]": 50,
"[Na+1]": 51,
"[\\O]": 52,
"[Cl-1]": 53,
"[N-1]": 54,
"[BH0]": 55,
"[#N+1]": 56,
"[\\S]": 57,
"[=N-1]": 58,
"[CH1-1]": 59,
"[Y]": 60,
"[=P]": 61,
"[NH2+1]": 62,
"[CH1]": 63,
"[Ir]": 64,
"[P+1]": 65,
"[CH2-1]": 66,
"[K+1]": 67,
"[/C@@H1]": 68,
"[Br-1]": 69,
"[/S]": 70,
"[CH3-1]": 71,
"[I-1]": 72,
"[S+1]": 73,
"[NH3+1]": 74,
"[Se]": 75,
"[/C@H1]": 76,
"[B-1]": 77,
"[S-1]": 78,
"[Li+1]": 79,
"[#O+1]": 80,
"[3H]": 81,
"[=NH1+1]": 82,
"[Pt+2]": 83,
"[Pt]": 84,
"[Sn]": 85,
"[Branch3]": 86,
"[SiH1]": 87,
"[CH0]": 88,
"[NH1-1]": 89,
"[W]": 90,
"[#C-1]": 91,
"[OH1-1]": 92,
"[V]": 93,
"[\\C@@H1]": 94,
"[SiH2]": 95,
"[O-2]": 96,
"[H-1]": 97,
"[\\C@H1]": 98,
"[Na]": 99,
"[Mg+2]": 100,
"[S@@]": 101,
"[S@]": 102,
"[Ge]": 103,
"[PH1+1]": 104,
"[F-1]": 105,
"[SiH3]": 106,
"[Zn+2]": 107,
"[K]": 108,
"[NH4+1]": 109,
"[Al]": 110,
"[-\\Ring2]": 111,
"[Fe+2]": 112,
"[Cl+3]": 113,
"[/N+1]": 114,
"[CH2]": 115,
"[/Cl]": 116,
"[Pd+2]": 117,
"[Cu+2]": 118,
"[Ca+2]": 119,
"[\\O-1]": 120,
"[\\F]": 121,
"[Fe]": 122,
"[Pd]": 123,
"[Ir+3]": 124,
"[/F]": 125,
"[P@]": 126,
"[P@@]": 127,
"[Co]": 128,
"[\\Cl]": 129,
"[Ni+2]": 130,
"[As]": 131,
"[Zn]": 132,
"[Ti]": 133,
"[U]": 134,
"[Rb]": 135,
"[C+1]": 136,
"[Cu]": 137,
"[Ni]": 138,
"[=P+1]": 139,
"[-\\Ring1]": 140,
"[H+1]": 141,
"[-/Ring2]": 142,
"[=NH2+1]": 143,
"[Zr+2]": 144,
"[=O+1]": 145,
"[CH1+1]": 146,
"[Zr]": 147,
"[13C]": 148,
"[Ac]": 149,
"[Te]": 150,
"[Cu+1]": 151,
"[OH0]": 152,
"[Ti+4]": 153,
"[Zr+4]": 154,
"[NH2-1]": 155,
"[13CH2]": 156,
"[OH1+1]": 157,
"[Li]": 158,
"[Ru+2]": 159,
"[Rb+1]": 160,
"[18F]": 161,
"[Cs+1]": 162,
"[I+1]": 163,
"[OH2+1]": 164,
"[Co+2]": 165,
"[Al+3]": 166,
"[Ag+1]": 167,
"[Cr]": 168,
"[-/Ring1]": 169,
"[Rf]": 170,
"[Ru]": 171,
"[Re]": 172,
"[Au+1]": 173,
"[Rh]": 174,
"[=OH1+1]": 175,
"[/P]": 176,
"[/Br]": 177,
"[Ba+2]": 178,
"[Mo]": 179,
"[Ring3]": 180,
"[Mn]": 181,
"[=W]": 182,
"[=Zr+2]": 183,
"[/I]": 184,
"[W+2]": 185,
"[Hg]": 186,
"[O+1]": 187,
"[Ar]": 188,
"[\\N+1]": 189,
"[Fe+3]": 190,
"[=Se]": 191,
"[CH2+1]": 192,
"[Sb]": 193,
"[Mn+2]": 194,
"[=Si]": 195,
"[/NH1+1]": 196,
"[/O-1]": 197,
"[\\NH1]": 198,
"[/Si]": 199,
"[Pb]": 200,
"[Y+3]": 201,
"[/C-1]": 202,
"[/C@@]": 203,
"[Ag]": 204,
"[\\Br]": 205,
"[\\N-1]": 206,
"[PH1]": 207,
"[13CH1]": 208,
"[/B]": 209,
"[Ti+2]": 210,
"[/C@]": 211,
"[N@+1]": 212,
"[U+2]": 213,
"[Hf]": 214,
"[\\I]": 215,
"[SH1+1]": 216,
"[13CH3]": 217,
"[Tb]": 218,
"[Sn+2]": 219,
"[Pt+4]": 220,
"[=Ru]": 221,
"[Si-1]": 222,
"[=Zr]": 223,
"[N@@+1]": 224,
"[=13CH1]": 225,
"[S-2]": 226,
"[=B]": 227,
"[Hf+4]": 228,
"[Au]": 229,
"[Co+3]": 230,
"[Zn+1]": 231,
"[NH0]": 232,
"[Ga]": 233,
"[Ti+3]": 234,
"[Bi]": 235,
"[=S+1]": 236,
"[Ca]": 237,
"[Mg]": 238,
"[In]": 239,
"[SH2+1]": 240,
"[Se-1]": 241,
"[#P]": 242,
"[Zr+3]": 243,
"[=C+1]": 244,
"[=V]": 245,
"[Sr+2]": 246,
"[Cr+3]": 247,
"[SH1-1]": 248,
"[/NH1]": 249,
"[Cd+2]": 250,
"[/N-1]": 251,
"[Ru+1]": 252,
"[15NH1]": 253,
"[Gd+3]": 254,
"[V+2]": 255,
"[BH3-1]": 256,
"[Sn+4]": 257,
"[=13C]": 258,
"[P-1]": 259,
"[BH1-1]": 260,
"[=Mo]": 261,
"[Os]": 262,
"[OH3+1]": 263,
"[Cd]": 264,
"[Ru+3]": 265,
"[=Cr]": 266,
"[\\Si]": 267,
"[TlH2]": 268,
"[AlH2]": 269,
"[15N]": 270,
"[\\P]": 271,
"[=Ti]": 272,
"[=CH0]": 273,
"[Au+3]": 274,
"[=S-1]": 275,
"[Pb+2]": 276,
"[Rh+2]": 277,
"[PH3+1]": 278,
"[Gd]": 279,
"[AlH3]": 280,
"[=As]": 281,
"[Mn+3]": 282,
"[14C]": 283,
"[Pd+1]": 284,
"[Rh+3]": 285,
"[CH3+1]": 286,
"[Sn+1]": 287,
"[BH2-1]": 288,
"[Bi+3]": 289,
"[Ga+3]": 290,
"[Hg+2]": 291,
"[=S@]": 292,
"[13C@H1]": 293,
"[/CH0]": 294,
"[Hg+1]": 295,
"[=S@@]": 296,
"[2H-1]": 297,
"[AlH1]": 298,
"[\\C@]": 299,
"[\\C@@]": 300,
"[=Mn]": 301,
"[Be+2]": 302,
"[Pr]": 303,
"[=Ti+2]": 304,
"[13C@@H1]": 305,
"[Cr+2]": 306,
"[Mo+2]": 307,
"[/S-1]": 308,
"[11CH3]": 309,
"[Eu]": 310,
"[La+3]": 311,
"[La]": 312,
"[PH2+1]": 313,
"[Sb+3]": 314,
"[\\S-1]": 315,
"[Ni+3]": 316,
"[Ce]": 317,
"[Si@]": 318,
"[Ce+3]": 319,
"[/2H]": 320,
"[SeH1]": 321,
"[Al+1]": 322,
"[In+3]": 323,
"[IH1+1]": 324,
"[Os+2]": 325,
"[Ba]": 326,
"[#S]": 327,
"[15NH2]": 328,
"[/P+1]": 329,
"[125I]": 330,
"[ClH1+1]": 331,
"[Eu+3]": 332,
"[=Pt]": 333,
"[=SH1+1]": 334,
"[Sm]": 335,
"[Si@@]": 336,
"[Se-2]": 337,
"[Pt+1]": 338,
"[\\Se]": 339,
"[Nd+3]": 340,
"[Ta]": 341,
"[/O+1]": 342,
"[Dy+3]": 343,
"[Mo+4]": 344,
"[Sb+5]": 345,
"[Nb]": 346,
"[\\B]": 347,
"[Nd]": 348,
"[Sb-1]": 349,
"[14CH2]": 350,
"[Os+1]": 351,
"[Ti+1]": 352,
"[V+4]": 353,
"[#Ring2]": 354,
"[=14CH1]": 355,
"[As+1]": 356,
"[=CH1-1]": 357,
"[PH1-1]": 358,
"[/NH2+1]": 359,
"[123I]": 360,
"[Cl+2]": 361,
"[BH4-1]": 362,
"[Yb+3]": 363,
"[=Hf+2]": 364,
"[Se+1]": 365,
"[Al-1]": 366,
"[14CH1]": 367,
"[=Al]": 368,
"[Sm+3]": 369,
"[Yb]": 370,
"[Sc]": 371,
"[=PH1+1]": 372,
"[Fm]": 373,
"[Tb+3]": 374,
"[SiH1-1]": 375,
"[SnH1]": 376,
"[Sc+3]": 377,
"[TlH2+1]": 378,
"[GeH1]": 379,
"[SiH2+1]": 380,
"[SiH4]": 381,
"[=Co]": 382,
"[=18O]": 383,
"[I+2]": 384,
"[Al+2]": 385,
"[/B-1]": 386,
"[U+4]": 387,
"[Fe+4]": 388,
"[GeH2]": 389,
"[#NH1+1]": 390,
"[I+3]": 391,
"[/Se]": 392,
"[=Branch3]": 393,
"[=Sb]": 394,
"[Sn+3]": 395,
"[Sr]": 396,
"[Cl+1]": 397,
"[Ta+5]": 398,
"[Nb+5]": 399,
"[=14C]": 400,
"[#Ring1]": 401,
"[Er+3]": 402,
"[=15N]": 403,
"[Po]": 404,
"[Cs]": 405,
"[\\2H]": 406,
"[=V+2]": 407,
"[=Ni]": 408,
"[/BH0]": 409,
"[\\NH2+1]": 410,
"[=Te]": 411,
"[131I]": 412,
"[14CH3]": 413,
"[=Fe]": 414,
"[=P@@]": 415,
"[Pr+3]": 416,
"[=Ge]": 417,
"[3H-1]": 418,
"[Ru+6]": 419,
"[Lu]": 420,
"[=P@]": 421,
"[AsH1]": 422,
"[Mo+3]": 423,
"[Lu+3]": 424,
"[SH3+1]": 425,
"[=SiH1]": 426,
"[/C+1]": 427,
"[Ir+2]": 428,
"[\\C-1]": 429,
"[Te+1]": 430,
"[Fe+1]": 431,
"[=Cu]": 432,
"[Th+4]": 433,
"[=Os]": 434,
"[=Sn]": 435,
"[PH4+1]": 436,
"[/S@]": 437,
"[=Mo+2]": 438,
"[=PH1]": 439,
"[Dy]": 440,
"[\\NH1-1]": 441,
"[Er]": 442,
"[No]": 443,
"[/S@@]": 444,
"[=Zn]": 445,
"[Ho]": 446,
"[99Tc]": 447,
"[Br+2]": 448,
"[Tc]": 449,
"[=Ag]": 450,
"[=SiH2]": 451,
"[Th]": 452,
"[=99Tc+4]": 453,
"[Tl]": 454,
"[=Ta]": 455,
"[Os+4]": 456,
"[Ce+4]": 457,
"[10B]": 458,
"[18O]": 459,
"[Ru+4]": 460,
"[SiH8+4]": 461,
"[Cr+6]": 462,
"[Fe+6]": 463,
"[11C]": 464,
"[BiH2]": 465,
"[1H]": 466,
"[18OH1]": 467,
"[/13CH1]": 468,
"[As-1]": 469,
"[SH0]": 470,
"[/Sn]": 471,
"[\\S@]": 472,
"[U+3]": 473,
"[=Nb]": 474,
"[\\NH1+1]": 475,
"[=Pd]": 476,
"[=PH2+1]": 477,
"[BH1]": 478,
"[GeH3]": 479,
"[Yb+2]": 480,
"[PH0]": 481,
"[Ho+3]": 482,
"[Tm]": 483,
"[19F]": 484,
"[IH2+1]": 485,
"[At]": 486,
"[Cm]": 487,
"[=Ti+1]": 488,
"[Fe+5]": 489,
"[Hf+2]": 490,
"[\\P+1]": 491,
"[Eu+2]": 492,
"[14C@@H1]": 493,
"[\\CH1-1]": 494,
"[#Si]": 495,
"[Cr+1]": 496,
"[P-3]": 497,
"[\\S@@]": 498,
"[Np]": 499,
"[P-2]": 500,
"[B@@-1]": 501,
"[IH1-1]": 502,
"[/S+1]": 503,
"[14C@H1]": 504,
"[124I]": 505,
"[=Re]": 506,
"[Tm+3]": 507,
"[Cr+4]": 508,
"[PH2-1]": 509,
"[\\OH2+1]": 510,
"[SnH2]": 511,
"[Nb+2]": 512,
"[/NH1-1]": 513,
"[Co+1]": 514,
"[B@-1]": 515,
"[Lr]": 516,
"[=Bi]": 517,
"[10BH2]": 518,
"[Sg]": 519,
"[=U]": 520,
"[=Ru+1]": 521,
"[15N+1]": 522,
"[/13C]": 523,
"[/CH1-1]": 524,
"[ClH2+1]": 525,
"[Pu]": 526,
"[=In]": 527,
"[#13C]": 528,
"[Bi+2]": 529,
"[Xe]": 530,
"[AsH2]": 531,
"[SnH3]": 532,
"[GaH1]": 533,
"[\\CH0]": 534,
"[SbH1]": 535,
"[\\Te]": 536,
"[Ta+2]": 537,
"[13C@]": 538,
"[76Br]": 539,
"[13C@@]": 540,
"[=BH0]": 541,
"[=Hf]": 542,
"[Ru+5]": 543,
"[=Rh]": 544,
"[=Tc+4]": 545,
"[=NH0]": 546,
"[=Se+1]": 547,
"[Be]": 548,
"[\\Sn]": 549,
"[=U+2]": 550,
"[=Pb]": 551,
"[#Mo]": 552,
"[Si@H1]": 553,
"[/Te]": 554,
"[TeH1]": 555,
"[Pm]": 556,
"[Gd+2]": 557,
"[/As]": 558,
"[=B-1]": 559,
"[=Fe+1]": 560,
"[11CH2]": 561,
"[Am]": 562,
"[\\CH2-1]": 563,
"[=99Tc+3]": 564,
"[=P-1]": 565,
"[Zn-2]": 566,
"[Cf]": 567,
"[Cr+5]": 568,
"[11B]": 569,
"[Fe-1]": 570,
"[=Ca]": 571,
"[=Mg]": 572,
"[BiH1]": 573,
"[=Tc]": 574,
"[Ge+4]": 575,
"[Si@@H1]": 576,
"[Sb+1]": 577,
"[LiH2-1]": 578,
"[12C]": 579,
"[InH2+1]": 580,
"[OH1]": 581,
"[/CH1+1]": 582,
"[=11C]": 583,
"[#S-1]": 584,
"[He]": 585,
"[211At]": 586,
"[/Ru]": 587,
"[/I+1]": 588,
"[=13CH2]": 589,
"[16OH1]": 590,
"[Es]": 591,
"[Ga-1]": 592,
"[Th+2]": 593,
"[Tl+3]": 594,
"[/Ge]": 595,
"[Nb+3]": 596,
"[Ra]": 597,
"[InH2]": 598,
"[Mn+1]": 599,
"[Pa]": 600,
"[/SiH1]": 601,
"[Ir-3]": 602,
"[#P+1]": 603,
"[18F-1]": 604,
"[\\NH3+1]": 605,
"[\\S+1]": 606,
"[32P]": 607,
"[Ir+1]": 608,
"[#W]": 609,
"[PbH1]": 610,
"[SeH2]": 611,
"[Si+1]": 612,
"[64Cu]": 613,
"[Zn-1]": 614,
"[Bi+1]": 615,
"[/3H]": 616,
"[/Al]": 617,
"[/NH3+1]": 618,
"[\\CH1+1]": 619,
"[AsH3]": 620,
"[\\Ge]": 621,
"[/OH2+1]": 622,
"[SbH2]": 623,
"[\\BH0]": 624,
"[3H+1]": 625,
"[64Cu+2]": 626,
"[Bk]": 627,
"[\\I+1]": 628,
"[68Ga+3]": 629,
"[=Tc+3]": 630,
"[Os+6]": 631,
"[\\PH1+1]": 632,
"[111In+3]": 633,
"[/CH1]": 634,
"[PH1-2]": 635,
"[=GeH1]": 636,
"[Rh+1]": 637,
"[GaH2]": 638,
"[/CH2-1]": 639,
"[=Au]": 640,
"[Tc+4]": 641,
"[=SH1-1]": 642,
"[SbH3]": 643,
"[16O]": 644,
"[Ni+1]": 645,
"[#15N]": 646,
"[35S]": 647,
"[AsH6+3]": 648,
"[=17O]": 649,
"[InH1]": 650,
"[13CH4]": 651,
"[Mg+1]": 652,
"[=SiH4+2]": 653,
"[=Ring3]": 654,
"[Tc+5]": 655,
"[17OH1]": 656,
"[SiH4+2]": 657,
"[\\3H]": 658,
"[17O]": 659,
"[Ne]": 660,
"[\\B-1]": 661,
"[As@]": 662,
"[XeH1]": 663,
"[\\SH2+1]": 664,
"[/15N]": 665,
"[=99Tc]": 666,
"[Cu-1]": 667,
"[Ru-1]": 668,
"[\\C+1]": 669,
"[/I-1]": 670,
"[=Ir]": 671,
"[=Tl]": 672,
"[Db]": 673,
"[Mt]": 674,
"[\\CH1]": 675,
"[SiH6+3]": 676,
"[Ru+8]": 677,
"[SiH3+1]": 678,
"[99Tc+4]": 679,
"[=AsH1]": 680,
"[Br+3]": 681,
"[SiH2-1]": 682,
"[PoH1]": 683,
"[=SeH1]": 684,
"[18O-1]": 685,
"[=CH1+1]": 686,
"[BiH3]": 687,
"[Os+5]": 688,
"[\\I-1]": 689,
"[PbH2]": 690,
"[\\As]": 691,
"[#Y]": 692,
"[/14CH1]": 693,
"[11CH1]": 694,
"[\\13CH2]": 695,
"[=Si-1]": 696,
"[/13CH2]": 697,
"[=Ga]": 698,
"[N@@]": 699,
"[\\Hg]": 700,
"[13C-1]": 701,
"[=Cd]": 702,
"[=SH0]": 703,
"[=Tc+2]": 704,
"[TlH1]": 705,
"[Sn-1]": 706,
"[=Hg]": 707,
"[Hs]": 708,
"[#W+1]": 709,
"[=Os+2]": 710,
"[=Sb+1]": 711,
"[As@@]": 712,
"[Br+1]": 713,
"[GeH4]": 714,
"[Pt-2]": 715,
"[77Br]": 716,
"[=Ba]": 717,
"[Au-1]": 718,
"[Bh]": 719,
"[N@]": 720,
"[#14C]": 721,
"[/125I]": 722,
"[=15N+1]": 723,
"[=Mo+4]": 724,
"[Tc+7]": 725,
"[=10B]": 726,
"[68Ga]": 727,
"[AlH2+1]": 728,
"[#Os]": 729,
"[177Lu]": 730,
"[Pd-2]": 731,
"[\\13CH3]": 732,
"[Cu-2]": 733,
"[18FH1]": 734,
"[AlH2-1]": 735,
"[InH3]": 736,
"[\\SeH1]": 737,
"[#Cr]": 738,
"[2H+1]": 739,
"[=12C]": 740,
"[/123I]": 741,
"[P@+1]": 742,
"[Rn]": 743,
"[111In]": 744,
"[Pd-1]": 745,
"[12CH2]": 746,
"[#Mn]": 747,
"[/SH2+1]": 748,
"[37Cl]": 749,
"[ClH0]": 750,
"[#11C]": 751,
"[16N]": 752,
"[=16O]": 753,
"[=AlH1]": 754,
"[P@H1]": 755,
"[/P@]": 756,
"[10BH1]": 757,
"[=Bi+1]": 758,
"[#Re]": 759,
"[99Tc+7]": 760,
"[=Y]": 761,
"[14N]": 762,
"[90Y]": 763,
"[=CH1]": 764,
"[Ge@]": 765,
"[Ir-2]": 766,
"[#Mo+1]": 767,
"[/Hg]": 768,
"[12CH1]": 769,
"[AlH4-1]": 770,
"[Ru-2]": 771,
"[#Fe]": 772,