Spaces:
Build error
Build error
from langchain.agents import Tool, tool | |
import requests | |
from langchain import OpenAI | |
from langchain import LLMMathChain, SerpAPIWrapper | |
import os | |
from rdkit import Chem | |
def query2smiles(text): | |
'''This function queries the one given molecule name and returns a SMILES string from the record''' | |
try:#query the PubChem database | |
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/' + text + '/property/IsomericSMILES/JSON') | |
#convert the response to a json object | |
data = r.json() | |
#return the SMILES string | |
smi = data['PropertyTable']['Properties'][0]['IsomericSMILES'] | |
# remove salts | |
return smi | |
except: | |
f"Could not find the IUPAC name for {text}" | |
def smiles2IUPAC(text): | |
'''This function queries the one given smiles name and returns a IUPAC name from the record''' | |
#query the PubChem database | |
try: | |
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/' + text + '/property/IUPACName/JSON') | |
data = r.json() | |
smi = data["PropertyTable"]["Properties"][0]["IUPACName"] | |
return smi | |
except: | |
return f"Could not find the IUPAC name for {text}" | |
def formula2IUPAC(text): | |
'''This function queries the one given chemical formula and returns a material name from the record.''' | |
try: | |
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/formula/' + text + '/property/IUPACName/JSON') | |
data = r.json() | |
print(data) | |
smi = data["PropertyTable"]["Properties"][0]["IUPACName"] | |
return smi | |
except: | |
return f"Could not find the IUPAC name for {text}" | |
def name2formula(text): | |
'''This function queries the one given material name and returns a chemical formula from the record.''' | |
try: | |
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/' + text + '/property/MolecularFormula/JSON') | |
data = r.json() | |
print(data) | |
smi = data["PropertyTable"]["Properties"][0]["MolecularFormula"] | |
return smi | |
except: | |
return f"Could not find the molecular formula for {text}" | |
def canonicalizeSMILES(smiles): | |
'''Given a smiles representation, this function returns a canonicalized version of the same smiles. | |
It's better to search for molecules in its canonicalized form''' | |
return Chem.MolToSmiles(Chem.MolFromSmiles(smiles)) | |
def web_search(keywords, search_engine="google"): | |
'''Useful to do a simple google search. | |
Use this tool to find general information from websites. | |
Use keywords for your search. | |
''' | |
return SerpAPIWrapper( | |
serpapi_api_key=os.getenv("SERP_API_KEY"), | |
search_engine=search_engine | |
).run(keywords) | |
def LLM_predict(prompt): | |
''' This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion''' | |
llm = OpenAI( | |
model_name='text-ada-001', #TODO: Maybe change to gpt-4 when ready | |
temperature=0.7, | |
n=1, | |
best_of=5, | |
top_p=1.0, | |
stop=["\n\n", "###", "#", "##"], | |
# model_kwargs=kwargs, | |
) | |
return llm.generate([prompt]).generations[0][0].text | |
common_tools = [ | |
query2smiles, | |
smiles2IUPAC, | |
# formula2IUPAC, | |
# name2formula, | |
canonicalizeSMILES, | |
web_search, | |
LLM_predict | |
] |