biot5-base / added_tokens.json
QizhiPei's picture
Upload tokenizer
4883dfd
{
"<bom>": 35063,
"<bop>": 35065,
"<eom>": 35064,
"<eop>": 35066,
"<p>A": 32100,
"<p>C": 32101,
"<p>D": 32102,
"<p>E": 32103,
"<p>F": 32104,
"<p>G": 32105,
"<p>H": 32106,
"<p>I": 32107,
"<p>K": 32108,
"<p>L": 32109,
"<p>M": 32110,
"<p>N": 32111,
"<p>P": 32112,
"<p>Q": 32113,
"<p>R": 32114,
"<p>S": 32115,
"<p>T": 32116,
"<p>V": 32117,
"<p>W": 32118,
"<p>Y": 32119,
"DESCRIPTION": 35068,
"FUNCTION": 35070,
"MOLECULE NAME": 35067,
"PROTEIN FAMILIES": 35072,
"PROTEIN NAME": 35069,
"SUBCELLULAR LOCATION": 35071,
"[#11C-1]": 34809,
"[#11CH1]": 34772,
"[#11C]": 32177,
"[#121Sb]": 32200,
"[#12CH1]": 32522,
"[#13C-1]": 33245,
"[#13CH1]": 34692,
"[#13C]": 32874,
"[#13N]": 33007,
"[#14C-1]": 33877,
"[#14CH1]": 33622,
"[#14C]": 33992,
"[#14N]": 32581,
"[#15N+1]": 34477,
"[#15N]": 33321,
"[#15O+1]": 33445,
"[#16O+1]": 32376,
"[#17C-1]": 32890,
"[#17CH1]": 33689,
"[#17O+1]": 32807,
"[#188Re]": 32950,
"[#18C-1]": 33652,
"[#18CH1]": 32989,
"[#18O+1]": 33775,
"[#34S+1]": 34627,
"[#99Tc]": 34132,
"[#Al]": 34474,
"[#As+1]": 34177,
"[#AsH1]": 34270,
"[#As]": 34438,
"[#B-1]": 34157,
"[#B]": 32885,
"[#Bi]": 34031,
"[#Br]": 32978,
"[#Branch1]": 32976,
"[#Branch2]": 32728,
"[#Branch3]": 32164,
"[#C+1]": 34373,
"[#C-1]": 33359,
"[#CH0]": 34822,
"[#C]": 33286,
"[#Ce]": 32187,
"[#Cl]": 32716,
"[#Co]": 34500,
"[#Cr]": 32240,
"[#Dy]": 32337,
"[#Er]": 33139,
"[#Eu]": 34994,
"[#Fe+1]": 33529,
"[#Fe]": 34940,
"[#Ga]": 32802,
"[#Gd]": 34981,
"[#GeH1]": 32379,
"[#Ge]": 32563,
"[#Hf+1]": 34664,
"[#Ho]": 34864,
"[#I]": 34345,
"[#In]": 32530,
"[#Ir+1]": 33205,
"[#La]": 33210,
"[#Lu]": 34656,
"[#Mn]": 34331,
"[#Mo+1]": 32475,
"[#MoH1]": 32622,
"[#Mo]": 34079,
"[#N+1]": 34871,
"[#NH1+1]": 34084,
"[#N]": 32644,
"[#Nb]": 33294,
"[#Nd]": 32500,
"[#Ni]": 32923,
"[#O+1]": 33152,
"[#Os+2]": 34541,
"[#Os-1]": 34293,
"[#Os-2]": 34743,
"[#Os]": 34961,
"[#P+1]": 34616,
"[#P-1]": 34954,
"[#PH1+1]": 32369,
"[#PH2]": 32725,
"[#P]": 32394,
"[#PbH1]": 33091,
"[#Pb]": 33514,
"[#Pd]": 33312,
"[#Pr]": 33610,
"[#Re]": 33278,
"[#Ring2]": 34759,
"[#Ru-1]": 34439,
"[#Ru]": 32929,
"[#S+1]": 34020,
"[#S-1]": 34934,
"[#SH1-1]": 32249,
"[#S]": 33111,
"[#Sb]": 32887,
"[#Sc]": 32796,
"[#SeH1]": 33376,
"[#Se]": 34497,
"[#Si+1]": 32691,
"[#Si-1]": 34610,
"[#SiH1]": 32717,
"[#Si]": 34607,
"[#Sm]": 34906,
"[#Sn]": 34695,
"[#Ta+1]": 32639,
"[#TaH1]": 34671,
"[#Ta]": 32317,
"[#Tb]": 32389,
"[#Tc+1]": 33644,
"[#Tc]": 34530,
"[#Te]": 34199,
"[#Th]": 32473,
"[#Ti+1]": 34902,
"[#Ti]": 32502,
"[#Tl]": 33651,
"[#Tm]": 33785,
"[#U]": 33074,
"[#V+1]": 33201,
"[#V]": 34948,
"[#W+1]": 33000,
"[#W-1]": 33041,
"[#WH1]": 34629,
"[#W]": 33692,
"[#Y]": 32526,
"[#Yb]": 32197,
"[#Zr+1]": 34779,
"[#Zr]": 33477,
"[-/Ring2]": 32375,
"[-\\Ring1]": 32731,
"[-\\Ring2]": 32878,
"[/-Ring1]": 33996,
"[/-Ring2]": 34249,
"[/-Ring3]": 34910,
"[/11CH3]": 33784,
"[/11C]": 34782,
"[/123I]": 32695,
"[/124I]": 33874,
"[/125I]": 33662,
"[/125Te]": 34688,
"[/127I]": 32781,
"[/12C]": 33102,
"[/131I]": 32284,
"[/13C@@H1]": 33670,
"[/13C@H1]": 32173,
"[/13CH1-1]": 32459,
"[/13CH1]": 33213,
"[/13CH2]": 33621,
"[/13CH3]": 34829,
"[/13C]": 34580,
"[/14C@H1]": 33046,
"[/14CH1]": 33386,
"[/14CH2-1]": 33560,
"[/14CH2]": 33978,
"[/14CH3]": 33921,
"[/14C]": 34451,
"[/14NH1]": 34240,
"[/15N+1]": 33131,
"[/15NH1]": 34064,
"[/15NH2]": 34372,
"[/15N]": 32880,
"[/18C]": 34037,
"[/18F]": 33068,
"[/18OH1]": 34195,
"[/18O]": 33288,
"[/19F]": 34133,
"[/2H]": 34318,
"[/32P]": 33358,
"[/34S]": 32774,
"[/35Cl]": 34875,
"[/35S]": 33332,
"[/37Cl]": 33627,
"[/3H]": 34783,
"[/76Br]": 33035,
"[/79Br]": 32485,
"[/80Br]": 32605,
"[/9C]": 32413,
"[/Al+1]": 32753,
"[/Al+2]": 32626,
"[/Al-1]": 32186,
"[/AlH1+1]": 33019,
"[/AlH1]": 34929,
"[/AlH2]": 33044,
"[/Al]": 33498,
"[/As+1]": 32359,
"[/AsH1]": 33774,
"[/As]": 34944,
"[/At]": 34543,
"[/B+1]": 34865,
"[/B-1]": 34450,
"[/BH0]": 34025,
"[/BH1-1]": 32306,
"[/B]": 34068,
"[/Bi]": 34099,
"[/Br-1]": 32617,
"[/Br]": 34260,
"[/C+1]": 34102,
"[/C-1]": 32436,
"[/C@@H1]": 33873,
"[/C@@]": 33142,
"[/C@H1]": 33631,
"[/C@]": 33220,
"[/CH0]": 34182,
"[/CH1+1]": 33539,
"[/CH1-1]": 33165,
"[/CH1]": 34768,
"[/CH2+1]": 33308,
"[/CH2-1]": 32847,
"[/CH2]": 34322,
"[/C]": 32853,
"[/Cl+1]": 33228,
"[/Cl-1]": 33990,
"[/ClH1+1]": 33496,
"[/Cl]": 34985,
"[/F]": 34505,
"[/Ga]": 32985,
"[/GeH1]": 33997,
"[/GeH2]": 33878,
"[/GeH3]": 33484,
"[/Ge]": 32524,
"[/H]": 34913,
"[/Hg+1]": 32520,
"[/HgH1]": 34573,
"[/Hg]": 32210,
"[/I+1]": 33395,
"[/I-1]": 33441,
"[/IH1+1]": 34911,
"[/IH1]": 33169,
"[/I]": 34308,
"[/InH2]": 33475,
"[/In]": 33962,
"[/N+1]": 33216,
"[/N-1]": 33712,
"[/N@+1]": 32945,
"[/N@@+1]": 34090,
"[/NH0]": 33072,
"[/NH1+1]": 32289,
"[/NH1-1]": 33958,
"[/NH1]": 32435,
"[/NH2+1]": 32958,
"[/NH3+1]": 34338,
"[/N]": 33697,
"[/O+1]": 32183,
"[/O-1]": 32897,
"[/OH0]": 32933,
"[/OH1+1]": 34551,
"[/OH2+1]": 34612,
"[/O]": 34896,
"[/P+1]": 34601,
"[/P-1]": 32262,
"[/P@@]": 33759,
"[/P@]": 33008,
"[/PH0]": 34645,
"[/PH1+1]": 34684,
"[/PH1-1]": 32656,
"[/PH1]": 33051,
"[/PH2+1]": 33187,
"[/PH3+1]": 35029,
"[/P]": 33831,
"[/Pb]": 34879,
"[/Po]": 33189,
"[/Ru]": 34156,
"[/S+1]": 34126,
"[/S-1]": 32954,
"[/S@@]": 34016,
"[/S@]": 34148,
"[/SH0]": 32175,
"[/SH1+1]": 35002,
"[/SH2+1]": 33936,
"[/S]": 32545,
"[/Sb]": 34103,
"[/Se+1]": 34839,
"[/Se-1]": 32573,
"[/SeH1]": 33824,
"[/Se]": 32984,
"[/Si+1]": 33908,
"[/Si-1]": 32421,
"[/Si-2]": 34701,
"[/Si@@H1]": 34963,
"[/Si@@]": 35039,
"[/Si@H1]": 34329,
"[/Si@]": 34252,
"[/SiH1-1]": 33717,
"[/SiH1]": 33324,
"[/SiH2+1]": 34991,
"[/SiH2]": 32144,
"[/SiH3]": 33380,
"[/Si]": 34283,
"[/Sn+1]": 33765,
"[/Sn+2]": 32860,
"[/Sn+3]": 32800,
"[/SnH1]": 34117,
"[/SnH2]": 32674,
"[/SnH3]": 34581,
"[/Sn]": 33506,
"[/Te+1]": 33623,
"[/TeH1]": 33950,
"[/Te]": 32198,
"[/TlH1]": 33426,
"[/Tl]": 32446,
"[/W]": 34164,
"[/XeH1]": 33240,
"[100Mo]": 34264,
"[100Pd]": 34650,
"[100Rh]": 32560,
"[100Tc+4]": 33964,
"[100Tc+5]": 34867,
"[100Tc]": 34239,
"[101Mo]": 32803,
"[101Pd]": 33517,
"[101Rh]": 34796,
"[101Tc]": 33473,
"[102Ag]": 34615,
"[102Pd]": 33223,
"[102Rh]": 32795,
"[102Ru]": 33505,
"[103Ag]": 34160,
"[103Cd]": 33639,
"[103Pd+2]": 34793,
"[103Pd]": 33646,
"[103Rh]": 32908,
"[103Ru+2]": 32360,
"[103Ru]": 32905,
"[104Ag]": 32785,
"[104Cd]": 32591,
"[104Pd]": 33696,
"[104Rh]": 33752,
"[104Tc]": 34200,
"[105Ag]": 32964,
"[105Pd]": 33423,
"[105Rh+3]": 34364,
"[105Rh]": 33161,
"[105Ru]": 34464,
"[106Ag]": 33601,
"[106Cd]": 33605,
"[106Pd]": 32907,
"[106Rh]": 33258,
"[106Ru+3]": 33872,
"[106Ru]": 34092,
"[107Ag]": 34467,
"[107Cd]": 32190,
"[107Pd]": 32307,
"[107Rh]": 32221,
"[108Ag]": 32236,
"[108Cd]": 33460,
"[108Pd]": 34000,
"[109Ag]": 32567,
"[109Cd+2]": 32650,
"[109Cd]": 33289,
"[109In]": 32689,
"[109Pd+2]": 33687,
"[109Pd]": 33897,
"[10B-1]": 33311,
"[10BH1-1]": 32630,
"[10BH1]": 34891,
"[10BH2]": 33625,
"[10BH3]": 33713,
"[10B]": 32884,
"[10Be]": 32537,
"[10CH1]": 34651,
"[10CH2]": 34063,
"[10CH3]": 34058,
"[10CH4]": 34540,
"[10C]": 34008,
"[110Ag+1]": 34659,
"[110Ag]": 33834,
"[110Cd]": 32232,
"[110In]": 32917,
"[110Pd]": 33578,
"[110Ru]": 32477,
"[110Sn]": 34998,
"[110Te]": 35051,
"[111Ag]": 34230,
"[111Cd+2]": 32601,
"[111Cd]": 33504,
"[111IH1]": 32411,
"[111In+3]": 34583,
"[111In-1]": 33034,
"[111InH2]": 34989,
"[111InH3]": 33583,
"[111In]": 34760,
"[111Sn]": 34557,
"[112Ag]": 34089,
"[112Cd]": 32253,
"[112In]": 33562,
"[112Pd]": 34559,
"[112Sn]": 33474,
"[113Ag]": 32992,
"[113Cd]": 33842,
"[113In+3]": 32276,
"[113In]": 33940,
"[113Sn]": 32366,
"[114Cd]": 33480,
"[114In+3]": 34882,
"[114In]": 34178,
"[114Sn]": 34356,
"[115Ag]": 34468,
"[115Cd]": 34617,
"[115In]": 33020,
"[115Sb]": 33678,
"[115Sn]": 34191,
"[116Cd]": 33608,
"[116In]": 34717,
"[116Sb]": 34732,
"[116Sn]": 32136,
"[116Te]": 33903,
"[117Cd]": 33319,
"[117In]": 34586,
"[117Sb]": 34186,
"[117Sn+2]": 34569,
"[117Sn+4]": 32733,
"[117SnH2]": 33056,
"[117Sn]": 33801,
"[118Pd+2]": 33619,
"[118Pd]": 34054,
"[118Sb]": 32438,
"[118Sn]": 33062,
"[119In]": 33750,
"[119Sb]": 32496,
"[119Sn]": 33549,
"[11B-1]": 32771,
"[11BH3]": 34640,
"[11B]": 34328,
"[11C-1]": 32664,
"[11C@@H1]": 34734,
"[11C@H1]": 34677,
"[11CH1-1]": 33684,
"[11CH1]": 33461,
"[11CH2]": 33594,
"[11CH3+1]": 33431,
"[11CH3-1]": 32904,
"[11CH3]": 34515,
"[11CH4]": 32986,
"[11C]": 32826,
"[120I-1]": 33973,
"[120IH1]": 33417,
"[120I]": 33603,
"[120Sb]": 34162,
"[120Sn]": 34140,
"[120Te]": 33788,
"[120Xe]": 32508,
"[121I-1]": 33013,
"[121IH1]": 32155,
"[121I]": 32487,
"[121Sb]": 34359,
"[121Sn+2]": 34706,
"[121SnH2]": 33772,
"[121Sn]": 32914,
"[121Te]": 34309,
"[121Xe]": 33674,
"[122I-1]": 33344,
"[122IH1]": 32732,
"[122I]": 33989,
"[122Sb]": 34351,
"[122Sn]": 32582,
"[122Te]": 32271,
"[122Xe]": 34835,
"[123I-1]": 33918,
"[123IH1]": 33023,
"[123I]": 32226,
"[123Sb]": 34837,
"[123Sn]": 33664,
"[123Te]": 34184,
"[123Xe]": 34251,
"[124I-1]": 34285,
"[124IH1]": 33370,
"[124I]": 34785,
"[124Sb]": 34412,
"[124Sn]": 32433,
"[124Te]": 33829,
"[124Xe]": 33292,
"[125Cs+1]": 34854,
"[125Cs]": 34754,
"[125I-1]": 34576,
"[125IH1]": 34410,
"[125I]": 33742,
"[125Sb]": 32341,
"[125Sn+4]": 33065,
"[125Sn]": 32171,
"[125Te+4]": 34510,
"[125Te]": 32928,
"[125Xe]": 32371,
"[126Ba]": 32594,
"[126I-1]": 33012,
"[126IH1]": 33995,
"[126I]": 32924,
"[126Sb+3]": 32948,
"[126Sb]": 32735,
"[126Sn]": 33914,
"[126Te]": 34256,
"[126Xe]": 32223,
"[127Cs+1]": 33847,
"[127Cs]": 34799,
"[127I-1]": 32609,
"[127IH1]": 33993,
"[127I]": 34002,
"[127Sb+3]": 34379,
"[127Sb]": 34272,
"[127Sn]": 32125,
"[127Te]": 33691,
"[127Xe]": 33206,
"[128Ba]": 34723,
"[128IH1]": 33409,
"[128I]": 34277,
"[128Sb]": 32426,
"[128Sn]": 32808,
"[128Te]": 34790,
"[128Xe]": 32994,
"[129Cs+1]": 32607,
"[129Cs]": 35023,
"[129I-1]": 33497,
"[129IH1]": 33130,
"[129I]": 34053,
"[129Sb]": 34914,
"[129Te]": 33440,
"[129Xe]": 34153,
"[12BH2]": 32158,
"[12B]": 32787,
"[12C-1]": 32247,
"[12C@@H1]": 34213,
"[12C@@]": 33310,
"[12C@H1]": 34206,
"[12C@]": 32612,</