Spaces:
Runtime error
Runtime error
File size: 4,961 Bytes
e67043b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import requests
from pydantic import BaseModel
from bs4 import BeautifulSoup
import json, random
from ...tool import Tool
from typing import List, Optional, Union
class ChemicalPropAPI:
def __init__(self) -> None:
self._endpoint = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/"
def get_name_by_cid(self, cid: str, top_k: Optional[int] = None) -> List[str]:
html_doc = requests.get(f"{self._endpoint}cid/{cid}/synonyms/XML").text
soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8")
syns = soup.find_all("synonym")
ans = []
if top_k is None:
top_k = len(syns)
for syn in syns[:top_k]:
ans.append(syn.text)
return ans
def get_cid_by_struct(self, smiles: str) -> List[str]:
html_doc = requests.get(f"{self._endpoint}smiles/{smiles}/cids/XML").text
soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8")
cids = soup.find_all("cid")
if cids is None:
return []
ans = []
for cid in cids:
ans.append(cid.text)
return ans
def get_cid_by_name(self, name: str, name_type: Optional[str] = None) -> List[str]:
url = f"{self._endpoint}name/{name}/cids/XML"
if name_type is not None:
url += f"?name_type={name_type}"
html_doc = requests.get(url).text
soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8")
cids = soup.find_all("cid")
if cids is None:
return []
ans = []
for cid in cids:
ans.append(cid.text)
return ans
def get_prop_by_cid(self, cid: str) -> str:
html_doc = requests.get(
f"{self._endpoint}cid/{cid}/property/MolecularFormula,MolecularWeight,CanonicalSMILES,IsomericSMILES,IUPACName,XLogP,ExactMass,MonoisotopicMass,TPSA,Complexity,Charge,HBondDonorCount,HBondAcceptorCount,RotatableBondCount,HeavyAtomCount,CovalentUnitCount/json"
).text
return json.loads(html_doc)["PropertyTable"]["Properties"][0]
class GetNameResponse(BaseModel):
"""name list"""
names: List[str]
class GetStructureResponse(BaseModel):
"""structure list"""
state: int
content: Optional[str] = None
class GetIDResponse(BaseModel):
state: int
content: Union[str, List[str]]
def build_tool(config) -> Tool:
tool = Tool(
"Chemical Property Plugin",
description="looking up a chemical's property",
name_for_model="Chemical Property",
description_for_model="Plugin for looking up a chemical's property using a chemical knowledge base. All input should be a json like {'input': 'some input'}. Please use the provided questions and search step by step.",
logo_url="https://your-app-url.com/.well-known/logo.png",
contact_email="hello@contact.com",
legal_info_url="hello@legal.com",
)
if "debug" in config and config["debug"]:
chemical_prop_api = config["chemical_prop_api"]
else:
chemical_prop_api = ChemicalPropAPI()
@tool.get("/get_name")
def get_name(cid: str):
"""prints the possible 3 synonyms of the queried compound ID"""
ans = chemical_prop_api.get_name_by_cid(cid, top_k=3)
return {"names": ans}
@tool.get("/get_allname")
def get_allname(cid: str):
"""prints all the possible synonyms (might be too many, use this function carefully)."""
ans = chemical_prop_api.get_name_by_cid(cid)
return {"names": ans}
@tool.get("/get_id_by_struct")
def get_id_by_struct(smiles: str):
"""prints the ID of the queried compound SMILES. This should only be used if smiles is provided or retrieved in the previous step. The input should not be a string, but a SMILES formula."""
cids = chemical_prop_api.get_cid_by_struct(smiles)
if len(cids) == 0:
return {"state": "no result"}
else:
return {"state": "matched", "content": cids[0]}
@tool.get("/get_id")
def get_id(name: str):
"""prints the ID of the queried compound name, and prints the possible 5 names if the queried name can not been precisely matched,"""
cids = chemical_prop_api.get_cid_by_name(name)
if len(cids) > 0:
return {"state": "precise", "content": cids[0]}
cids = chemical_prop_api.get_cid_by_name(name, name_type="word")
if len(cids) > 0:
if name in get_name(cids[0]):
return {"state": "precise", "content": cids[0]}
ans = []
random.shuffle(cids)
for cid in cids[:5]:
nms = get_name(cid)
ans.append(nms)
return {"state": "not precise", "content": ans}
@tool.get("/get_prop")
def get_prop(cid: str):
"""prints the properties of the queried compound ID"""
return chemical_prop_api.get_prop_by_cid(cid)
return tool
|