Spaces:
Sleeping
Sleeping
from groq import Groq | |
import groq | |
import streamlit as st | |
from openai import OpenAI | |
import json | |
import streamlit.components.v1 as components | |
import requests | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from youtubesearchpython import VideosSearch | |
from rdkit import Chem | |
from rdkit.Chem import Draw, AllChem | |
import os | |
import queue | |
import re | |
import tempfile | |
import threading | |
import requests | |
from bs4 import BeautifulSoup | |
from embedchain import App | |
from embedchain.config import BaseLlmConfig | |
from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield, | |
generate) | |
client_groq = Groq(api_key=os.getenv('GROQ_API')) | |
client_openai = OpenAI(api_key=os.getenv('OPENAI_API')) | |
link_custom_functions = [ | |
{ | |
'name': 'extract_website_url', | |
'description': 'Get the website url', | |
'parameters': { | |
'type': 'object', | |
'properties': { | |
'link': {'type': 'string', 'description': 'website url'}, | |
} | |
} | |
} | |
] | |
def embedchain_bot(db_path, api_key): | |
return App.from_config( | |
config={ | |
"llm": { | |
"provider": "openai", | |
"config": { | |
"model": "gpt-3.5-turbo-1106", | |
"temperature": 0.5, | |
"max_tokens": 1000, | |
"top_p": 1, | |
"stream": True, | |
"api_key": api_key, | |
}, | |
}, | |
"vectordb": { | |
"provider": "chroma", | |
"config": {"collection_name": "chat-pdf", "dir": db_path, "allow_reset": True}, | |
}, | |
"embedder": {"provider": "openai", "config": {"api_key": api_key}}, | |
"chunker": {"chunk_size": 2000, "chunk_overlap": 0, "length_function": "len"}, | |
} | |
) | |
def get_db_path(): | |
tmpdirname = tempfile.mkdtemp() | |
return tmpdirname | |
def get_ec_app(api_key): | |
if "app" in st.session_state: | |
print("Found app in session state") | |
app = st.session_state.app | |
else: | |
print("Creating app") | |
db_path = get_db_path() | |
app = embedchain_bot(db_path, api_key) | |
st.session_state.app = app | |
return app | |
def groq_response(content, prompt): | |
try: | |
response = client_groq.chat.completions.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": content + prompt, | |
} | |
], | |
model="mixtral-8x7b-32768", | |
) | |
return response.choices[0].message.content | |
except groq.APIConnectionError as e: | |
st.error("The server could not be reached, please try again later.") | |
except groq.RateLimitError as e: | |
st.error("You have exceeded the rate limit for the demo version, please try again in some time.") | |
# Initialize your clients with API keys | |
client_openai = OpenAI(api_key=os.getenv('OPENAI_API')) | |
client_groq = Groq(api_key=os.getenv('GROQ_API')) | |
client_groq_one = Groq(api_key=os.getenv('GROQ_API')) | |
# Define your custom functions for OpenAI | |
scenario_custom_functions = [ | |
{ | |
'name': 'extract_scenario_info', | |
'description': 'Get the individual scenarios text', | |
'parameters': { | |
'type': 'object', | |
'properties': { | |
'scenario_1': {'type': 'string', 'description': 'scenario number 1 full text'}, | |
'scenario_2': {'type': 'string', 'description': 'scenario number 2 full text'}, | |
'scenario_3': {'type': 'string', 'description': 'scenario number 3 full text'}, | |
'scenario_4': {'type': 'string', 'description': 'scenario number 4 full text'}, | |
} | |
} | |
} | |
] | |
scenario_keyword_functions = [ | |
{ | |
'name': 'extract_scenario_info', | |
'description': 'Get the individual scenarios text', | |
'parameters': { | |
'type': 'object', | |
'properties': { | |
'keyword_1': {'type': 'string', 'description': 'keyword 1'}, | |
'keyword_2': {'type': 'string', 'description': 'keyword 2'}, | |
'keyword_3': {'type': 'string', 'description': 'keyword 3'}, | |
'keyword_4': {'type': 'string', 'description': 'keyword 4'}, | |
} | |
} | |
} | |
] | |
video_custom_functions = [ | |
{ | |
'name': 'extract_video_id', | |
'description': 'Get the video ID', | |
'parameters': { | |
'type': 'object', | |
'properties': { | |
'video_id': {'type': 'string', 'description': 'video ID'}, | |
} | |
} | |
} | |
] | |
# Initialize a string to store all transcripts | |
all_video_transcripts = "" | |
molecule_custom_functions = [ | |
{ | |
'name': 'extract_molecule_info', | |
'description': 'Get the molecule name', | |
'parameters': { | |
'type': 'object', | |
'properties': { | |
'molecule_name': {'type': 'string', 'description': 'name of the molecule'}, | |
} | |
} | |
} | |
] | |
keyword_custom_functions = [ | |
{ | |
'name': 'extract_keyword_info', | |
'description': 'Get the search query keyword', | |
'parameters': { | |
'type': 'object', | |
'properties': { | |
'keyword': {'type': 'string', 'description': 'keyword of teh search query'}, | |
} | |
} | |
} | |
] | |
# Example SMILES strings for each component - replace these with the actual values retrieved from your API calls | |
reactant_1_smiles = 'your_reactant_1_smiles_here' | |
reactant_2_smiles = 'your_reactant_2_smiles_here' # This might be an empty string if not present | |
reagent_3_smiles = 'your_reagent_3_smiles_here' | |
product_4_smiles = 'your_product_4_smiles_here' | |
product_5_smiles = 'your_product_5_smiles_here' | |
molecule_custom_functions_reaction = [ | |
{ | |
'name': 'extract_molecules_info', | |
'description': 'Get the name of the individual molecules', | |
'parameters': { | |
'type': 'object', | |
'properties': { | |
'reactant_1': {'type': 'string', 'description': 'reactant number 1 '}, | |
'reactant_2': {'type': 'string', 'description': 'reactant number 2 '}, | |
'reagent_3': {'type': 'string', 'description': 'reagent number 1 '}, | |
'product_4': {'type': 'string', 'description': 'product number 1'}, | |
'product_5': {'type': 'string', 'description': 'product number 2'}, | |
} | |
} | |
} | |
] | |
# Streamlit UI | |
st.title("Stereo World Updated π") | |
image_variable = None | |
# Session states initialization | |
if 'prompt' not in st.session_state: | |
st.session_state.prompt = '' | |
if 'selected_options' not in st.session_state: | |
st.session_state.selected_options = [] | |
if 'selected_options_reaction' not in st.session_state: | |
st.session_state.selected_options_reaction = [] | |
# User inputs | |
st.session_state.selected_options = st.multiselect("Select options", ["fun based", "context based", "real world based", "conceptual textbook based"]) | |
st.session_state.prompt = st.text_input("Enter your prompt") | |
check_box = st.checkbox("Open Chem Sketcher") | |
with st.sidebar: | |
st.sidebar.title("Chat with the assistant π€") | |
# Input for search query | |
search_query = st.sidebar.text_input("Enter your video search query") | |
reaction_query = st.sidebar.text_input("Enter your reaction search query") | |
name_reaction = st.checkbox("I am searching a name reaction") | |
if reaction_query: | |
prompt = reaction_query | |
content = "please give complete step by step reaction along with the complete name of the molecules for the reaction, the requested reaction is : " | |
response = groq_response(content, prompt) | |
response_functions = client_openai.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[{'role': 'user', 'content': response}], | |
functions=molecule_custom_functions_reaction, | |
function_call='auto' | |
) | |
data = json.loads(response_functions.choices[0].message.function_call.arguments) | |
reactant_1 = data.get('reactant_1', '') | |
reactant_2 = data.get('reactant_2', '') | |
reagent_3 = data.get('reagent_3', '') | |
product_4 = data.get('product_4', '') | |
product_5 = data.get('product_5', '') | |
reactant_1_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").status_code == 200 else '' | |
reactant_2_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").status_code == 200 else '' | |
reagent_3_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").status_code == 200 else '' | |
product_4_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").status_code == 200 else '' | |
product_5_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").status_code == 200 else '' | |
#st.write("Reactant 1: ", reactant_1_smiles) | |
#st.write("Reactant 2: ", reactant_2_smiles) | |
#st.write("Reagent 3: ", reagent_3_smiles) | |
#st.write("Product 4: ", product_4_smiles) | |
#st.write("Product 5: ", product_5_smiles) | |
# Building the reaction SMILES string dynamically based on available components | |
# Building the reaction SMILES string | |
reaction_components = [] | |
# Adding reactants | |
reactants = [reactant for reactant in [reactant_1_smiles, reactant_2_smiles] if reactant] | |
if reactants: | |
reaction_components.append('.'.join(reactants)) | |
else: | |
reaction_components.append('') | |
# Adding reagents | |
reagents = [reagent for reagent in [reagent_3_smiles] if reagent] | |
if reagents: | |
reaction_components.append('.'.join(reagents)) | |
else: | |
reaction_components.append('') | |
# Adding products | |
products = [product for product in [product_4_smiles, product_5_smiles] if product] | |
if products: | |
reaction_components.append('.'.join(products)) | |
else: | |
reaction_components.append('') | |
reaction_smiles = '>'.join(reaction_components) | |
try: | |
# Generate the reaction from SMILES | |
rxn = AllChem.ReactionFromSmarts(reaction_smiles, useSmiles=True) | |
# Draw the reaction | |
d2d = Draw.MolDraw2DCairo(800, 300) # Adjust size as needed | |
d2d.DrawReaction(rxn) | |
png = d2d.GetDrawingText() | |
# Save the drawing to a file | |
with open('reaction_image.png', 'wb+') as f: | |
f.write(png) | |
image_variable = png | |
#st.image('reaction_image.png') | |
except Exception as e: | |
st.write(f"An error occurred: {e}") | |
if search_query: | |
prompt = search_query | |
content = "please correct the spelling and write teh precise one search keyword for and only give teh keyword, only 1 and nothing else other that that : " | |
response = groq_response(content, prompt) | |
response_functions = client_openai.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[{'role': 'user', 'content': response}], | |
functions=keyword_custom_functions, | |
function_call='auto' | |
) | |
data = json.loads(response_functions.choices[0].message.function_call.arguments) | |
keyword = data['keyword'] | |
st.sidebar.write(keyword) | |
# Perform the search | |
videosSearch = VideosSearch(search_query, limit=3) | |
video_one = VideosSearch(search_query, limit=1) | |
for video in video_one.result()['result']: | |
video_one_id = video['id'] | |
for video in videosSearch.result()['result']: | |
video_id = video['id'] # Extract video ID | |
# Display the video thumbnail | |
#st.image(video['thumbnails'][0]['url']) | |
# Display the video title | |
#st.write(f"**{video['title']}**") | |
try: | |
# Fetch the transcript for the video ID | |
transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) | |
# Concatenating all text from the transcript | |
transcript_text = "\n".join([t['text'] for t in transcript_list]) | |
# Concatenate the transcript to the all_video_transcripts variable | |
all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{transcript_text}\n---\n" | |
except Exception as e: | |
error_message = "Transcript not available or error in fetching transcript." | |
# Concatenate the error message to the all_video_transcripts variable | |
all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{error_message}\n---\n" | |
# At this point, all_video_transcripts contains transcripts for all videos concatenated as a single string. | |
# You can display it or process it as needed. | |
# Here's an example of displaying the combined transcripts: | |
video_id = "" | |
if all_video_transcripts: | |
#st.text_area("All Video Transcripts", all_video_transcripts, height=300) | |
prompt = all_video_transcripts | |
content = "write a one sentence summary for the the given videos and always preserve and give me the vido_id always " | |
video_compression = groq_response(content, prompt) | |
compressed_transcripts = video_compression | |
prompt = compressed_transcripts | |
content = "give me the best video with maximum content and the best keywords from the transcript and always preserve and give me teh vido_id always " | |
chat_completion = groq_response(content, prompt) | |
#st.write(chat_completion.choices[0].message.content) | |
video_id_fetch = chat_completion | |
#st.write(video_id_fetch) | |
response_functions = client_openai.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[{'role': 'user', 'content': video_id_fetch}], | |
functions=video_custom_functions, | |
function_call='auto' | |
) | |
data = json.loads(response_functions.choices[0].message.function_call.arguments) | |
video_id = data['video_id'] | |
st.video(f"https://www.youtube.com/watch?v={video_id}") | |
messages = st.container(height=630) | |
if image_variable: | |
messages.chat_message("assistant").write(f"When you react {reactant_1} with {reactant_2} using {reagent_3}, you get {product_4} and {product_5}" + " here is the reaction in 2D bond representation:") | |
messages.image(image_variable) | |
if check_box: | |
messages.chat_message("assistant").write("Here is the Chem Sketcher for you to draw the molecule:") | |
with messages.chat_message("assistant"): | |
components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org", height=600) | |
prompt_sidebar = st.chat_input("Say something") | |
if prompt_sidebar: | |
messages.chat_message("user").write(prompt_sidebar) | |
prompt = prompt_sidebar | |
sidebar_chat = groq_response("please answer thsi query : ", prompt) | |
response_functions = client_openai.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[{'role': 'user', 'content': prompt_sidebar}], | |
functions=molecule_custom_functions, | |
function_call='auto' | |
) | |
try: | |
arguments = response_functions.choices[0].message.function_call.arguments | |
if arguments is not None: | |
data = json.loads(arguments) | |
molecule_name = data['molecule_name',''] | |
if molecule_name: | |
response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT") | |
cid = response.text | |
with messages.chat_message("assistant"): | |
st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too π:") | |
components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}") | |
messages.chat_message("assistant").write(sidebar_chat) | |
else: | |
data = json.loads(arguments) | |
molecule_name = data['molecule_name',''] | |
if molecule_name: | |
response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT") | |
cid = response.text | |
with messages.chat_message("assistant"): | |
st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too π:") | |
components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}") | |
messages.chat_message("assistant").write(sidebar_chat) | |
data = {} # or set to None or a default value as needed | |
except (IndexError, KeyError, TypeError) as e: | |
#print(f"Error accessing the data: {e}") | |
messages.chat_message("assistant").write(sidebar_chat) | |
data = {} | |
if st.session_state.prompt: | |
prompt = st.session_state.prompt | |
selected_options = " ".join(st.session_state.selected_options) | |
messages = [ | |
{"role": "user", "content": f"create a {selected_options} scenarios based task question for learning stereochemistry, create 4 scenarios each time and number them: {prompt}"}, | |
] | |
chat_completion = client_groq.chat.completions.create( | |
messages=messages, | |
model="mixtral-8x7b-32768", | |
) | |
response = chat_completion.choices[0].message.content | |
if response: | |
response_functions = client_openai.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[{'role': 'user', 'content': response}], | |
functions=scenario_custom_functions, | |
function_call='auto' | |
) | |
data = json.loads(response_functions.choices[0].message.function_call.arguments) | |
# Tabs for scenarios | |
scenario_tabs = ['Scenario 1', 'Scenario 2', 'Scenario 3', 'Scenario 4'] | |
tabs = st.tabs(scenario_tabs) | |
for i, tab in enumerate(tabs): | |
with tab: | |
st.header(scenario_tabs[i]) | |
scenario_text = data[f'scenario_{i+1}'] | |
st.write(scenario_text) | |
prompt = scenario_text | |
content = "subdivide this scenario into three subquestions and only give the questions. The scenario is: " | |
chat_completion_subquestions = groq_response(content, prompt) | |
scenario_generated = chat_completion_subquestions | |
st.write(scenario_generated) | |
prompt = scenario_generated | |
content = "give a sample ideal step-by-step format to attempt to answer this scenario question as a hint. Scenario: " | |
chat_completion_hint = groq_response(content, prompt) | |
st.text_area("Enter your answer here", key=f'answer_{i}') | |
with st.expander("See hint for answering the question" + str(i+1) + "π"): | |
st.write(chat_completion_hint) | |
# Upload PDF button | |
uploaded_file = st.file_uploader("Upload your answer (PDF)", type="pdf", key=f"pdf_uploader_{i}") | |
if uploaded_file is not None: | |
st.success("File uploaded successfully!") | |
col1, col2 = st.columns(2) | |
with col1: | |
with st.expander("See explanation 3D"): | |
components.iframe("https://embed.molview.org/v1/?mode=balls&cid=124527813") | |
with col2: | |
with st.expander("See explanation 2D"): | |
components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org") | |
# Example of error handling with client_groq API calls |