ZackBradshaw's picture
Upload folder using huggingface_hub
e67043b verified
import requests
from pydantic import BaseModel
from bs4 import BeautifulSoup
import json, random
from ...tool import Tool
from typing import List, Optional, Union
class ChemicalPropAPI:
def __init__(self) -> None:
self._endpoint = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/"
def get_name_by_cid(self, cid: str, top_k: Optional[int] = None) -> List[str]:
html_doc = requests.get(f"{self._endpoint}cid/{cid}/synonyms/XML").text
soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8")
syns = soup.find_all("synonym")
ans = []
if top_k is None:
top_k = len(syns)
for syn in syns[:top_k]:
ans.append(syn.text)
return ans
def get_cid_by_struct(self, smiles: str) -> List[str]:
html_doc = requests.get(f"{self._endpoint}smiles/{smiles}/cids/XML").text
soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8")
cids = soup.find_all("cid")
if cids is None:
return []
ans = []
for cid in cids:
ans.append(cid.text)
return ans
def get_cid_by_name(self, name: str, name_type: Optional[str] = None) -> List[str]:
url = f"{self._endpoint}name/{name}/cids/XML"
if name_type is not None:
url += f"?name_type={name_type}"
html_doc = requests.get(url).text
soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8")
cids = soup.find_all("cid")
if cids is None:
return []
ans = []
for cid in cids:
ans.append(cid.text)
return ans
def get_prop_by_cid(self, cid: str) -> str:
html_doc = requests.get(
f"{self._endpoint}cid/{cid}/property/MolecularFormula,MolecularWeight,CanonicalSMILES,IsomericSMILES,IUPACName,XLogP,ExactMass,MonoisotopicMass,TPSA,Complexity,Charge,HBondDonorCount,HBondAcceptorCount,RotatableBondCount,HeavyAtomCount,CovalentUnitCount/json"
).text
return json.loads(html_doc)["PropertyTable"]["Properties"][0]
class GetNameResponse(BaseModel):
"""name list"""
names: List[str]
class GetStructureResponse(BaseModel):
"""structure list"""
state: int
content: Optional[str] = None
class GetIDResponse(BaseModel):
state: int
content: Union[str, List[str]]
def build_tool(config) -> Tool:
tool = Tool(
"Chemical Property Plugin",
description="looking up a chemical's property",
name_for_model="Chemical Property",
description_for_model="Plugin for looking up a chemical's property using a chemical knowledge base. All input should be a json like {'input': 'some input'}. Please use the provided questions and search step by step.",
logo_url="https://your-app-url.com/.well-known/logo.png",
contact_email="hello@contact.com",
legal_info_url="hello@legal.com",
)
if "debug" in config and config["debug"]:
chemical_prop_api = config["chemical_prop_api"]
else:
chemical_prop_api = ChemicalPropAPI()
@tool.get("/get_name")
def get_name(cid: str):
"""prints the possible 3 synonyms of the queried compound ID"""
ans = chemical_prop_api.get_name_by_cid(cid, top_k=3)
return {"names": ans}
@tool.get("/get_allname")
def get_allname(cid: str):
"""prints all the possible synonyms (might be too many, use this function carefully)."""
ans = chemical_prop_api.get_name_by_cid(cid)
return {"names": ans}
@tool.get("/get_id_by_struct")
def get_id_by_struct(smiles: str):
"""prints the ID of the queried compound SMILES. This should only be used if smiles is provided or retrieved in the previous step. The input should not be a string, but a SMILES formula."""
cids = chemical_prop_api.get_cid_by_struct(smiles)
if len(cids) == 0:
return {"state": "no result"}
else:
return {"state": "matched", "content": cids[0]}
@tool.get("/get_id")
def get_id(name: str):
"""prints the ID of the queried compound name, and prints the possible 5 names if the queried name can not been precisely matched,"""
cids = chemical_prop_api.get_cid_by_name(name)
if len(cids) > 0:
return {"state": "precise", "content": cids[0]}
cids = chemical_prop_api.get_cid_by_name(name, name_type="word")
if len(cids) > 0:
if name in get_name(cids[0]):
return {"state": "precise", "content": cids[0]}
ans = []
random.shuffle(cids)
for cid in cids[:5]:
nms = get_name(cid)
ans.append(nms)
return {"state": "not precise", "content": ans}
@tool.get("/get_prop")
def get_prop(cid: str):
"""prints the properties of the queried compound ID"""
return chemical_prop_api.get_prop_by_cid(cid)
return tool