MAPI_LLM / utils.py
maykcaldas's picture
Upload 7 files
77cbf82
from langchain.agents import Tool, tool
import requests
from langchain import OpenAI
from langchain import LLMMathChain, SerpAPIWrapper
import os
from rdkit import Chem
@tool
def query2smiles(text):
'''This function queries the one given molecule name and returns a SMILES string from the record'''
try:#query the PubChem database
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/' + text + '/property/IsomericSMILES/JSON')
#convert the response to a json object
data = r.json()
#return the SMILES string
smi = data['PropertyTable']['Properties'][0]['IsomericSMILES']
# remove salts
return smi
except:
f"Could not find the IUPAC name for {text}"
@tool
def smiles2IUPAC(text):
'''This function queries the one given smiles name and returns a IUPAC name from the record'''
#query the PubChem database
try:
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/' + text + '/property/IUPACName/JSON')
data = r.json()
smi = data["PropertyTable"]["Properties"][0]["IUPACName"]
return smi
except:
return f"Could not find the IUPAC name for {text}"
@tool
def formula2IUPAC(text):
'''This function queries the one given chemical formula and returns a material name from the record.'''
try:
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/formula/' + text + '/property/IUPACName/JSON')
data = r.json()
print(data)
smi = data["PropertyTable"]["Properties"][0]["IUPACName"]
return smi
except:
return f"Could not find the IUPAC name for {text}"
@tool
def name2formula(text):
'''This function queries the one given material name and returns a chemical formula from the record.'''
try:
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/' + text + '/property/MolecularFormula/JSON')
data = r.json()
print(data)
smi = data["PropertyTable"]["Properties"][0]["MolecularFormula"]
return smi
except:
return f"Could not find the molecular formula for {text}"
@tool
def canonicalizeSMILES(smiles):
'''Given a smiles representation, this function returns a canonicalized version of the same smiles.
It's better to search for molecules in its canonicalized form'''
return Chem.MolToSmiles(Chem.MolFromSmiles(smiles))
@tool
def web_search(keywords, search_engine="google"):
'''Useful to do a simple google search.
Use this tool to find general information from websites.
Use keywords for your search.
'''
return SerpAPIWrapper(
serpapi_api_key=os.getenv("SERP_API_KEY"),
search_engine=search_engine
).run(keywords)
@tool
def LLM_predict(prompt):
''' This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion'''
llm = OpenAI(
model_name='text-ada-001', #TODO: Maybe change to gpt-4 when ready
temperature=0.7,
n=1,
best_of=5,
top_p=1.0,
stop=["\n\n", "###", "#", "##"],
# model_kwargs=kwargs,
)
return llm.generate([prompt]).generations[0][0].text
common_tools = [
query2smiles,
smiles2IUPAC,
# formula2IUPAC,
# name2formula,
canonicalizeSMILES,
web_search,
LLM_predict
]