Spaces:
Build error
Build error
maykcaldas
commited on
Commit
•
f274d93
1
Parent(s):
5216067
First commit
Browse files
agent.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mapi_tools import MAPI_class_tools, MAPI_reg_tools
|
2 |
+
from utils import common_tools
|
3 |
+
from langchain import OpenAI
|
4 |
+
from gpt_index import GPTListIndex, GPTIndexMemory
|
5 |
+
from langchain import agents
|
6 |
+
from langchain.agents import initialize_agent
|
7 |
+
|
8 |
+
stability = MAPI_class_tools(
|
9 |
+
"is_stable","stable","Stable","Unstable"
|
10 |
+
)
|
11 |
+
magnetism = MAPI_class_tools(
|
12 |
+
"is_magnetic","magnetic","Magnetic","Not magnetic"
|
13 |
+
)
|
14 |
+
metal = MAPI_class_tools(
|
15 |
+
"is_metal","metallic","Metal","Not metal"
|
16 |
+
)
|
17 |
+
gap_direct = MAPI_class_tools(
|
18 |
+
"is_gap_direct","gap direct","Gap direct","Gap indirect"
|
19 |
+
)
|
20 |
+
band_gap = MAPI_reg_tools(
|
21 |
+
"band_gap","band gap"
|
22 |
+
)
|
23 |
+
energy_per_atom = MAPI_reg_tools(
|
24 |
+
"energy_per_atom","energy per atom gap"
|
25 |
+
)
|
26 |
+
formation_energy_per_atom = MAPI_reg_tools(
|
27 |
+
"formation_energy_per_atom","formation energy per atom gap"
|
28 |
+
)
|
29 |
+
volume = MAPI_reg_tools(
|
30 |
+
"volume","volume"
|
31 |
+
)
|
32 |
+
density = MAPI_reg_tools(
|
33 |
+
"density","density"
|
34 |
+
)
|
35 |
+
atomic_density = MAPI_reg_tools(
|
36 |
+
"density_atomic","atomic density"
|
37 |
+
)
|
38 |
+
electronic_energy = MAPI_reg_tools(
|
39 |
+
"e_electronic","electronic energy"
|
40 |
+
)
|
41 |
+
ionic_energy = MAPI_reg_tools(
|
42 |
+
"e_ion","cationic energy"
|
43 |
+
)
|
44 |
+
total_energy = MAPI_reg_tools(
|
45 |
+
"e_total","total energy"
|
46 |
+
)
|
47 |
+
|
48 |
+
|
49 |
+
memory = GPTIndexMemory(index=GPTListIndex([]), memory_key="chat_history", query_kwargs={"response_mode": "compact"})
|
50 |
+
llm=OpenAI(temperature=0.7)
|
51 |
+
tools = (
|
52 |
+
stability.get_tools() +
|
53 |
+
magnetism.get_tools() +
|
54 |
+
gap_direct.get_tools() +
|
55 |
+
metal.get_tools() +
|
56 |
+
band_gap.get_tools() +
|
57 |
+
volume.get_tools() +
|
58 |
+
density.get_tools() +
|
59 |
+
atomic_density.get_tools() +
|
60 |
+
formation_energy_per_atom.get_tools() +
|
61 |
+
energy_per_atom.get_tools() +
|
62 |
+
electronic_energy.get_tools() +
|
63 |
+
ionic_energy.get_tools() +
|
64 |
+
total_energy.get_tools() +
|
65 |
+
agents.load_tools(["llm-math", "python_repl"], llm=llm) +
|
66 |
+
common_tools
|
67 |
+
)
|
68 |
+
agent_chain = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True, memory=memory)
|
app.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
import agent
|
4 |
+
import os
|
5 |
+
|
6 |
+
css_style = """
|
7 |
+
.gradio-container {
|
8 |
+
font-family: "IBM Plex Mono";
|
9 |
+
}
|
10 |
+
"""
|
11 |
+
|
12 |
+
def agent_run(q, openai_api_key, mapi_api_key):
|
13 |
+
os.environ["OPENAI_API_KEY"]=openai_api_key
|
14 |
+
os.environ["MAPI_API_KEY"]=mapi_api_key
|
15 |
+
try:
|
16 |
+
out = agent.agent_chain.run(input=q)
|
17 |
+
except:
|
18 |
+
out = "Something went wrong, please try again"
|
19 |
+
return out
|
20 |
+
|
21 |
+
with gr.Blocks(css=css_style) as demo:
|
22 |
+
gr.Markdown(f'''
|
23 |
+
# A LLM application developed during the LLM March *MADNESS* Hackathon
|
24 |
+
- Developed by: Mayk Caldas ([@maykcaldas](https://github.com/maykcaldas)) and Sam Cox ([@SamCox822](https://github.com/SamCox822))
|
25 |
+
|
26 |
+
## What is this?
|
27 |
+
- This is a demo of a LLM agent that can answer questions about materials science using the [LangChain🦜️🔗](https://github.com/hwchase17/langchain/) and the [Materials Project API](https://materialsproject.org/).
|
28 |
+
- Its behave is based on Large Language Models (LLM) and aim to be a tool to help scientists with quick predictions of a nunerous of properties of materials.
|
29 |
+
It is a work in progress, so please be patient with it.
|
30 |
+
|
31 |
+
|
32 |
+
### Some keys are needed in order to use it:
|
33 |
+
1. An openAI API key ( [Check it here](https://platform.openai.com/account/api-keys) )
|
34 |
+
2. A material project's API key ( [Check it here](https://materialsproject.org/api#api-key) )
|
35 |
+
''')
|
36 |
+
with gr.Accordion("List of properties we developed tools for", open=False):
|
37 |
+
gr.Markdown(f"""
|
38 |
+
Classification tasks: Stability, magnetism, gap_direct, metal,
|
39 |
+
regression tasks: band_gap, volume, density, atomic_density, formation energy per atom, energy per atom, electronic energy, ionic energy, total energy
|
40 |
+
""")
|
41 |
+
openai_api_key = gr.Textbox(
|
42 |
+
label="OpenAI API Key", placeholder="sk-...", type="password")
|
43 |
+
mapi_api_key = gr.Textbox(
|
44 |
+
label="Material Project API Key", placeholder="...", type="password")
|
45 |
+
with gr.Tab("MAPI Query"):
|
46 |
+
text_input = gr.Textbox(label="", placeholder="Enter question here...")
|
47 |
+
text_output = gr.Textbox()
|
48 |
+
text_button = gr.Button("Query!")
|
49 |
+
|
50 |
+
text_button.click(agent_run, inputs=[text_input, openai_api_key, mapi_api_key], outputs=text_output)
|
51 |
+
|
52 |
+
demo.launch()
|
mapi_tools.py
ADDED
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mp_api.client import MPRester
|
2 |
+
from emmet.core.summary import HasProps
|
3 |
+
import openai
|
4 |
+
import langchain
|
5 |
+
from langchain import OpenAI
|
6 |
+
from langchain import agents
|
7 |
+
from langchain.agents import initialize_agent
|
8 |
+
from langchain.agents import Tool, tool
|
9 |
+
from langchain import LLMMathChain, SerpAPIWrapper
|
10 |
+
from gpt_index import GPTListIndex, GPTIndexMemory
|
11 |
+
from langchain import SerpAPIWrapper
|
12 |
+
from langchain.prompts.few_shot import FewShotPromptTemplate
|
13 |
+
from langchain.prompts.prompt import PromptTemplate
|
14 |
+
from langchain.vectorstores import FAISS, Chroma
|
15 |
+
from langchain.embeddings import OpenAIEmbeddings
|
16 |
+
from langchain.prompts.example_selector import (MaxMarginalRelevanceExampleSelector,
|
17 |
+
SemanticSimilarityExampleSelector)
|
18 |
+
import requests
|
19 |
+
from rdkit import Chem
|
20 |
+
import pandas as pd
|
21 |
+
import os
|
22 |
+
|
23 |
+
class MAPITools:
|
24 |
+
def __init__(self):
|
25 |
+
self.model = 'text-ada-001' #maybe change to gpt-4 when ready
|
26 |
+
self.k=10
|
27 |
+
|
28 |
+
def get_material_atoms(self, formula):
|
29 |
+
'''Receives a material formula and returns the atoms symbols present in it separated by comma.'''
|
30 |
+
import re
|
31 |
+
pattern = re.compile(r"([A-Z][a-z]*)(\d*)")
|
32 |
+
matches = pattern.findall(formula)
|
33 |
+
atoms = []
|
34 |
+
for m in matches:
|
35 |
+
atom, count = m
|
36 |
+
count = int(count) if count else 1
|
37 |
+
atoms.append((atom, count))
|
38 |
+
return ",".join([a[0] for a in atoms])
|
39 |
+
|
40 |
+
def check_prop_by_formula(self, formula):
|
41 |
+
raise NotImplementedError('Should be implemented in children classes')
|
42 |
+
|
43 |
+
def search_similars_by_atom(self, atoms):
|
44 |
+
'''This function receives a string with the atoms separated by comma as input and returns a list of similar materials'''
|
45 |
+
atoms = atoms.replace(" ", "")
|
46 |
+
with MPRester(os.getenv("MAPI_API_KEY")) as mpr:
|
47 |
+
docs = mpr.summary.search(elements=atoms.split(','), fields=["formula_pretty", self.prop])
|
48 |
+
return docs
|
49 |
+
|
50 |
+
def create_context_prompt(self, formula):
|
51 |
+
raise NotImplementedError('Should be implemented in children classes')
|
52 |
+
|
53 |
+
def LLM_predict(self, prompt):
|
54 |
+
''' This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion'''
|
55 |
+
llm = OpenAI(
|
56 |
+
model_name=self.model,
|
57 |
+
temperature=0.7,
|
58 |
+
n=1,
|
59 |
+
best_of=5,
|
60 |
+
top_p=1.0,
|
61 |
+
stop=["\n\n", "###", "#", "##"],
|
62 |
+
# model_kwargs=kwargs,
|
63 |
+
)
|
64 |
+
return llm.generate([prompt]).generations[0][0].text
|
65 |
+
|
66 |
+
def get_tools(self):
|
67 |
+
return [
|
68 |
+
Tool(
|
69 |
+
name = "Get atoms in material",
|
70 |
+
func = self.get_material_atoms,
|
71 |
+
description = (
|
72 |
+
"Receives a material formula and returns the atoms symbols present in it separated by comma."
|
73 |
+
)
|
74 |
+
),
|
75 |
+
Tool(
|
76 |
+
name = f"Checks if material is {self.prop_name} by formula",
|
77 |
+
func = self.check_prop_by_formula,
|
78 |
+
description = (
|
79 |
+
f"This functions searches in the material project's API for the formula and returns if it is {self.prop_name} or not."
|
80 |
+
)
|
81 |
+
),
|
82 |
+
# Tool(
|
83 |
+
# name = "Search similar materials by atom",
|
84 |
+
# func = self.search_similars_by_atom,
|
85 |
+
# description = (
|
86 |
+
# "This function receives a string with the atoms separated by comma as input and returns a list of similar materials."
|
87 |
+
# )
|
88 |
+
# ),
|
89 |
+
Tool(
|
90 |
+
name = f"Create {self.prop_name} context to LLM search",
|
91 |
+
func = self.create_context_prompt,
|
92 |
+
description = (
|
93 |
+
f"This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict if the material is {self.prop_name}."
|
94 |
+
if isinstance(self, MAPI_class_tools) else
|
95 |
+
f"This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict the {self.prop_name} of a material."
|
96 |
+
)
|
97 |
+
),
|
98 |
+
Tool(name = "LLM predictiom",
|
99 |
+
func = self.LLM_predict,
|
100 |
+
description = (
|
101 |
+
"This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion"
|
102 |
+
)
|
103 |
+
)
|
104 |
+
]
|
105 |
+
|
106 |
+
class MAPI_class_tools(MAPITools):
|
107 |
+
def __init__(self, prop, prop_name, p_label, n_label):
|
108 |
+
super().__init__()
|
109 |
+
self.prop = prop
|
110 |
+
self.prop_name = prop_name
|
111 |
+
self.p_label = p_label
|
112 |
+
self.n_label = n_label
|
113 |
+
|
114 |
+
def check_prop_by_formula(self, formula):
|
115 |
+
f''' This functions searches in the material project's API for the formula and returns if it is {self.prop_name} or not'''
|
116 |
+
with MPRester(os.getenv("MAPI_API_KEY")) as mpr:
|
117 |
+
docs = mpr.summary.search(formula=formula, fields=["formula_pretty", self.prop])
|
118 |
+
if docs:
|
119 |
+
if docs[0].formula_pretty == formula:
|
120 |
+
return self.p_label if docs[0].dict()[self.prop] else self.n_label
|
121 |
+
return f"Could not find any material while searching {formula}"
|
122 |
+
|
123 |
+
def create_context_prompt(self, formula):
|
124 |
+
'''This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict if the formula is a stable material '''
|
125 |
+
elements = self.get_material_atoms(formula)
|
126 |
+
similars = self.search_similars_by_atom(elements)
|
127 |
+
similars = [
|
128 |
+
{'formula': ex.formula_pretty,
|
129 |
+
'prop': self.p_label if ex.dict()[self.prop] else self.n_label
|
130 |
+
} for ex in similars
|
131 |
+
]
|
132 |
+
examples = pd.DataFrame(similars).drop_duplicates().to_dict(orient="records")
|
133 |
+
example_selector = MaxMarginalRelevanceExampleSelector.from_examples(
|
134 |
+
examples,
|
135 |
+
OpenAIEmbeddings(),
|
136 |
+
FAISS,
|
137 |
+
k=self.k,
|
138 |
+
)
|
139 |
+
|
140 |
+
prefix=(
|
141 |
+
f'You are a bot who can predict if a material is {self.prop_name}.\n'
|
142 |
+
f'Given this list of known materials and the information if they are {self.p_label} or {self.n_label}, \n'
|
143 |
+
f'you need to answer the question if the last material is {self.prop_name}:'
|
144 |
+
)
|
145 |
+
prompt_template=PromptTemplate(
|
146 |
+
input_variables=["formula", "prop"],
|
147 |
+
template=f"Is {{formula}} a {self.prop_name} material?@@@\n{{prop}}###",
|
148 |
+
)
|
149 |
+
suffix = f"Is {{formula}} a {self.prop_name} material?@@@\n"
|
150 |
+
prompt = FewShotPromptTemplate(
|
151 |
+
# examples=examples,
|
152 |
+
example_prompt=prompt_template,
|
153 |
+
example_selector=example_selector,
|
154 |
+
prefix=prefix,
|
155 |
+
suffix=suffix,
|
156 |
+
input_variables=["formula"])
|
157 |
+
|
158 |
+
return prompt.format(formula=formula)
|
159 |
+
|
160 |
+
class MAPI_reg_tools(MAPITools):
|
161 |
+
# TODO: deal with units
|
162 |
+
def __init__(self, prop, prop_name):
|
163 |
+
super().__init__()
|
164 |
+
self.prop = prop
|
165 |
+
self.prop_name = prop_name
|
166 |
+
|
167 |
+
def check_prop_by_formula(self, formula):
|
168 |
+
''' This functions searches in the material project's API for the formula and returns if it is stable or not'''
|
169 |
+
with MPRester(os.getenv("MAPI_API_KEY")) as mpr:
|
170 |
+
docs = mpr.summary.search(formula=formula, fields=["formula_pretty", self.prop])
|
171 |
+
if docs:
|
172 |
+
if docs[0].formula_pretty == formula:
|
173 |
+
return docs[0].dict()[self.prop]
|
174 |
+
elif docs[0].dict()[self.prop] is None:
|
175 |
+
return f"There is no record of {self.prop_name} for {formula}"
|
176 |
+
return f"Could not find any material while searching {formula}"
|
177 |
+
|
178 |
+
def create_context_prompt(self, formula):
|
179 |
+
f'''This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict the {self.prop_name} of the material '''
|
180 |
+
elements = self.get_material_atoms(formula)
|
181 |
+
similars = self.search_similars_by_atom(elements)
|
182 |
+
similars = [
|
183 |
+
{'formula': ex.formula_pretty,
|
184 |
+
'prop': f"{ex.dict()[self.prop]:2f}" if ex.dict()[self.prop] is not None else None
|
185 |
+
} for ex in similars
|
186 |
+
]
|
187 |
+
examples = pd.DataFrame(similars).drop_duplicates().dropna().to_dict(orient="records")
|
188 |
+
|
189 |
+
example_selector = MaxMarginalRelevanceExampleSelector.from_examples(
|
190 |
+
examples,
|
191 |
+
OpenAIEmbeddings(),
|
192 |
+
FAISS,
|
193 |
+
k=self.k,
|
194 |
+
)
|
195 |
+
|
196 |
+
prefix=(
|
197 |
+
f'You are a bot who can predict the {self.prop_name} of a material .\n'
|
198 |
+
f'Given this list of known materials and the measurement of their {self.prop_name}, \n'
|
199 |
+
f'you need to answer the what is the {self.prop_name} of the material:'
|
200 |
+
'The answer should be numeric and finish with ###'
|
201 |
+
)
|
202 |
+
prompt_template=PromptTemplate(
|
203 |
+
input_variables=["formula", "prop"],
|
204 |
+
template=f"What is the {self.prop_name} for {{formula}}?@@@\n{{prop}}###",
|
205 |
+
)
|
206 |
+
suffix = f"What is the {self.prop_name} for {{formula}}?@@@\n"
|
207 |
+
prompt = FewShotPromptTemplate(
|
208 |
+
# examples=examples,
|
209 |
+
example_prompt=prompt_template,
|
210 |
+
example_selector=example_selector,
|
211 |
+
prefix=prefix,
|
212 |
+
suffix=suffix,
|
213 |
+
input_variables=["formula"])
|
214 |
+
|
215 |
+
return prompt.format(formula=formula)
|
utils.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.agents import Tool, tool
|
2 |
+
import requests
|
3 |
+
from langchain import OpenAI
|
4 |
+
from langchain import LLMMathChain, SerpAPIWrapper
|
5 |
+
from rdkit import Chem
|
6 |
+
|
7 |
+
@tool
|
8 |
+
def query2smiles(text):
|
9 |
+
'''This function queries the one given molecule name and returns a SMILES string from the record'''
|
10 |
+
try:#query the PubChem database
|
11 |
+
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/' + text + '/property/IsomericSMILES/JSON')
|
12 |
+
#convert the response to a json object
|
13 |
+
data = r.json()
|
14 |
+
#return the SMILES string
|
15 |
+
smi = data['PropertyTable']['Properties'][0]['IsomericSMILES']
|
16 |
+
# remove salts
|
17 |
+
return smi
|
18 |
+
except:
|
19 |
+
f"Could not find the IUPAC name for {text}"
|
20 |
+
|
21 |
+
@tool
|
22 |
+
def smiles2IUPAC(text):
|
23 |
+
'''This function queries the one given smiles name and returns a IUPAC name from the record'''
|
24 |
+
#query the PubChem database
|
25 |
+
try:
|
26 |
+
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/' + text + '/property/IUPACName/JSON')
|
27 |
+
data = r.json()
|
28 |
+
smi = data["PropertyTable"]["Properties"][0]["IUPACName"]
|
29 |
+
return smi
|
30 |
+
except:
|
31 |
+
return f"Could not find the IUPAC name for {text}"
|
32 |
+
|
33 |
+
@tool
|
34 |
+
def formula2IUPAC(text):
|
35 |
+
'''This function queries the one given chemical formula and returns a material name from the record.'''
|
36 |
+
try:
|
37 |
+
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/formula/' + text + '/property/IUPACName/JSON')
|
38 |
+
data = r.json()
|
39 |
+
print(data)
|
40 |
+
smi = data["PropertyTable"]["Properties"][0]["IUPACName"]
|
41 |
+
return smi
|
42 |
+
except:
|
43 |
+
return f"Could not find the IUPAC name for {text}"
|
44 |
+
|
45 |
+
@tool
|
46 |
+
def name2formula(text):
|
47 |
+
'''This function queries the one given material name and returns a chemical formula from the record.'''
|
48 |
+
try:
|
49 |
+
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/' + text + '/property/MolecularFormula/JSON')
|
50 |
+
data = r.json()
|
51 |
+
print(data)
|
52 |
+
smi = data["PropertyTable"]["Properties"][0]["MolecularFormula"]
|
53 |
+
return smi
|
54 |
+
except:
|
55 |
+
return f"Could not find the molecular formula for {text}"
|
56 |
+
|
57 |
+
@tool
|
58 |
+
def canonicalizeSMILES(smiles):
|
59 |
+
'''Given a smiles representation, this function returns a canonicalized version of the same smiles.
|
60 |
+
It's better to search for molecules in its canonicalized form'''
|
61 |
+
return Chem.MolToSmiles(Chem.MolFromSmiles(smiles))
|
62 |
+
|
63 |
+
@tool
|
64 |
+
def web_search(keywords, search_engine="google"):
|
65 |
+
'''Useful to do a simple google search.
|
66 |
+
Use this tool to find general information from websites.
|
67 |
+
Use keywords for your search.
|
68 |
+
'''
|
69 |
+
return SerpAPIWrapper(
|
70 |
+
serpapi_api_key=os.getenv("SERP_API_KEY"),
|
71 |
+
search_engine=search_engine
|
72 |
+
).run(keywords)
|
73 |
+
|
74 |
+
@tool
|
75 |
+
def LLM_predict(prompt):
|
76 |
+
''' This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion'''
|
77 |
+
llm = OpenAI(
|
78 |
+
model_name='text-ada-001', #TODO: Maybe change to gpt-4 when ready
|
79 |
+
temperature=0.7,
|
80 |
+
n=1,
|
81 |
+
best_of=5,
|
82 |
+
top_p=1.0,
|
83 |
+
stop=["\n\n", "###", "#", "##"],
|
84 |
+
# model_kwargs=kwargs,
|
85 |
+
)
|
86 |
+
return llm.generate([prompt]).generations[0][0].text
|
87 |
+
|
88 |
+
common_tools = [
|
89 |
+
query2smiles,
|
90 |
+
smiles2IUPAC,
|
91 |
+
# formula2IUPAC,
|
92 |
+
# name2formula,
|
93 |
+
canonicalizeSMILES,
|
94 |
+
web_search,
|
95 |
+
LLM_predict
|
96 |
+
]
|