File size: 4,961 Bytes
e67043b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import requests
from pydantic import BaseModel
from bs4 import BeautifulSoup
import json, random
from ...tool import Tool
from typing import List, Optional, Union


class ChemicalPropAPI:
    def __init__(self) -> None:
        self._endpoint = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/"

    def get_name_by_cid(self, cid: str, top_k: Optional[int] = None) -> List[str]:
        html_doc = requests.get(f"{self._endpoint}cid/{cid}/synonyms/XML").text
        soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8")
        syns = soup.find_all("synonym")
        ans = []
        if top_k is None:
            top_k = len(syns)
        for syn in syns[:top_k]:
            ans.append(syn.text)
        return ans

    def get_cid_by_struct(self, smiles: str) -> List[str]:
        html_doc = requests.get(f"{self._endpoint}smiles/{smiles}/cids/XML").text
        soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8")
        cids = soup.find_all("cid")
        if cids is None:
            return []
        ans = []
        for cid in cids:
            ans.append(cid.text)
        return ans

    def get_cid_by_name(self, name: str, name_type: Optional[str] = None) -> List[str]:
        url = f"{self._endpoint}name/{name}/cids/XML"
        if name_type is not None:
            url += f"?name_type={name_type}"
        html_doc = requests.get(url).text
        soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8")
        cids = soup.find_all("cid")
        if cids is None:
            return []
        ans = []
        for cid in cids:
            ans.append(cid.text)
        return ans

    def get_prop_by_cid(self, cid: str) -> str:
        html_doc = requests.get(
            f"{self._endpoint}cid/{cid}/property/MolecularFormula,MolecularWeight,CanonicalSMILES,IsomericSMILES,IUPACName,XLogP,ExactMass,MonoisotopicMass,TPSA,Complexity,Charge,HBondDonorCount,HBondAcceptorCount,RotatableBondCount,HeavyAtomCount,CovalentUnitCount/json"
        ).text
        return json.loads(html_doc)["PropertyTable"]["Properties"][0]


class GetNameResponse(BaseModel):

    """name list"""

    names: List[str]


class GetStructureResponse(BaseModel):

    """structure list"""

    state: int
    content: Optional[str] = None


class GetIDResponse(BaseModel):
    state: int
    content: Union[str, List[str]]


def build_tool(config) -> Tool:
    tool = Tool(
        "Chemical Property Plugin",
        description="looking up a chemical's property",
        name_for_model="Chemical Property",
        description_for_model="Plugin for looking up a chemical's property using a chemical knowledge base. All input should be a json like {'input': 'some input'}. Please use the provided questions and search step by step.",
        logo_url="https://your-app-url.com/.well-known/logo.png",
        contact_email="hello@contact.com",
        legal_info_url="hello@legal.com",
    )

    if "debug" in config and config["debug"]:
        chemical_prop_api = config["chemical_prop_api"]
    else:
        chemical_prop_api = ChemicalPropAPI()

    @tool.get("/get_name")
    def get_name(cid: str):
        """prints the possible 3 synonyms of the queried compound ID"""
        ans = chemical_prop_api.get_name_by_cid(cid, top_k=3)
        return {"names": ans}

    @tool.get("/get_allname")
    def get_allname(cid: str):
        """prints all the possible synonyms (might be too many, use this function carefully)."""
        ans = chemical_prop_api.get_name_by_cid(cid)
        return {"names": ans}

    @tool.get("/get_id_by_struct")
    def get_id_by_struct(smiles: str):
        """prints the ID of the queried compound SMILES. This should only be used if smiles is provided or retrieved in the previous step. The input should not be a string, but a SMILES formula."""
        cids = chemical_prop_api.get_cid_by_struct(smiles)
        if len(cids) == 0:
            return {"state": "no result"}
        else:
            return {"state": "matched", "content": cids[0]}

    @tool.get("/get_id")
    def get_id(name: str):
        """prints the ID of the queried compound name, and prints the possible 5 names if the queried name can not been precisely matched,"""
        cids = chemical_prop_api.get_cid_by_name(name)
        if len(cids) > 0:
            return {"state": "precise", "content": cids[0]}

        cids = chemical_prop_api.get_cid_by_name(name, name_type="word")
        if len(cids) > 0:
            if name in get_name(cids[0]):
                return {"state": "precise", "content": cids[0]}

        ans = []
        random.shuffle(cids)
        for cid in cids[:5]:
            nms = get_name(cid)
            ans.append(nms)
        return {"state": "not precise", "content": ans}

    @tool.get("/get_prop")
    def get_prop(cid: str):
        """prints the properties of the queried compound ID"""
        return chemical_prop_api.get_prop_by_cid(cid)

    return tool