Spaces:
Runtime error
Runtime error
| import requests | |
| from pydantic import BaseModel | |
| from bs4 import BeautifulSoup | |
| import json, random | |
| from ...tool import Tool | |
| from typing import List, Optional, Union | |
| class ChemicalPropAPI: | |
| def __init__(self) -> None: | |
| self._endpoint = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/" | |
| def get_name_by_cid(self, cid: str, top_k: Optional[int] = None) -> List[str]: | |
| html_doc = requests.get(f"{self._endpoint}cid/{cid}/synonyms/XML").text | |
| soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8") | |
| syns = soup.find_all("synonym") | |
| ans = [] | |
| if top_k is None: | |
| top_k = len(syns) | |
| for syn in syns[:top_k]: | |
| ans.append(syn.text) | |
| return ans | |
| def get_cid_by_struct(self, smiles: str) -> List[str]: | |
| html_doc = requests.get(f"{self._endpoint}smiles/{smiles}/cids/XML").text | |
| soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8") | |
| cids = soup.find_all("cid") | |
| if cids is None: | |
| return [] | |
| ans = [] | |
| for cid in cids: | |
| ans.append(cid.text) | |
| return ans | |
| def get_cid_by_name(self, name: str, name_type: Optional[str] = None) -> List[str]: | |
| url = f"{self._endpoint}name/{name}/cids/XML" | |
| if name_type is not None: | |
| url += f"?name_type={name_type}" | |
| html_doc = requests.get(url).text | |
| soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8") | |
| cids = soup.find_all("cid") | |
| if cids is None: | |
| return [] | |
| ans = [] | |
| for cid in cids: | |
| ans.append(cid.text) | |
| return ans | |
| def get_prop_by_cid(self, cid: str) -> str: | |
| html_doc = requests.get( | |
| f"{self._endpoint}cid/{cid}/property/MolecularFormula,MolecularWeight,CanonicalSMILES,IsomericSMILES,IUPACName,XLogP,ExactMass,MonoisotopicMass,TPSA,Complexity,Charge,HBondDonorCount,HBondAcceptorCount,RotatableBondCount,HeavyAtomCount,CovalentUnitCount/json" | |
| ).text | |
| return json.loads(html_doc)["PropertyTable"]["Properties"][0] | |
| class GetNameResponse(BaseModel): | |
| """name list""" | |
| names: List[str] | |
| class GetStructureResponse(BaseModel): | |
| """structure list""" | |
| state: int | |
| content: Optional[str] = None | |
| class GetIDResponse(BaseModel): | |
| state: int | |
| content: Union[str, List[str]] | |
| def build_tool(config) -> Tool: | |
| tool = Tool( | |
| "Chemical Property Plugin", | |
| description="looking up a chemical's property", | |
| name_for_model="Chemical Property", | |
| description_for_model="Plugin for looking up a chemical's property using a chemical knowledge base. All input should be a json like {'input': 'some input'}. Please use the provided questions and search step by step.", | |
| logo_url="https://your-app-url.com/.well-known/logo.png", | |
| contact_email="hello@contact.com", | |
| legal_info_url="hello@legal.com", | |
| ) | |
| if "debug" in config and config["debug"]: | |
| chemical_prop_api = config["chemical_prop_api"] | |
| else: | |
| chemical_prop_api = ChemicalPropAPI() | |
| def get_name(cid: str): | |
| """prints the possible 3 synonyms of the queried compound ID""" | |
| ans = chemical_prop_api.get_name_by_cid(cid, top_k=3) | |
| return {"names": ans} | |
| def get_allname(cid: str): | |
| """prints all the possible synonyms (might be too many, use this function carefully).""" | |
| ans = chemical_prop_api.get_name_by_cid(cid) | |
| return {"names": ans} | |
| def get_id_by_struct(smiles: str): | |
| """prints the ID of the queried compound SMILES. This should only be used if smiles is provided or retrieved in the previous step. The input should not be a string, but a SMILES formula.""" | |
| cids = chemical_prop_api.get_cid_by_struct(smiles) | |
| if len(cids) == 0: | |
| return {"state": "no result"} | |
| else: | |
| return {"state": "matched", "content": cids[0]} | |
| def get_id(name: str): | |
| """prints the ID of the queried compound name, and prints the possible 5 names if the queried name can not been precisely matched,""" | |
| cids = chemical_prop_api.get_cid_by_name(name) | |
| if len(cids) > 0: | |
| return {"state": "precise", "content": cids[0]} | |
| cids = chemical_prop_api.get_cid_by_name(name, name_type="word") | |
| if len(cids) > 0: | |
| if name in get_name(cids[0]): | |
| return {"state": "precise", "content": cids[0]} | |
| ans = [] | |
| random.shuffle(cids) | |
| for cid in cids[:5]: | |
| nms = get_name(cid) | |
| ans.append(nms) | |
| return {"state": "not precise", "content": ans} | |
| def get_prop(cid: str): | |
| """prints the properties of the queried compound ID""" | |
| return chemical_prop_api.get_prop_by_cid(cid) | |
| return tool | |