smitathkr1's picture
Update app.py
fba478d verified
raw
history blame contribute delete
No virus
21.4 kB
from groq import Groq
import groq
import streamlit as st
from openai import OpenAI
import json
import streamlit.components.v1 as components
import requests
from youtube_transcript_api import YouTubeTranscriptApi
from youtubesearchpython import VideosSearch
from rdkit import Chem
from rdkit.Chem import Draw, AllChem
import os
import queue
import re
import tempfile
import threading
import requests
from bs4 import BeautifulSoup
from embedchain import App
from embedchain.config import BaseLlmConfig
from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield,
generate)
client_groq = Groq(api_key=os.getenv('GROQ_API'))
client_openai = OpenAI(api_key=os.getenv('OPENAI_API'))
link_custom_functions = [
{
'name': 'extract_website_url',
'description': 'Get the website url',
'parameters': {
'type': 'object',
'properties': {
'link': {'type': 'string', 'description': 'website url'},
}
}
}
]
def embedchain_bot(db_path, api_key):
return App.from_config(
config={
"llm": {
"provider": "openai",
"config": {
"model": "gpt-3.5-turbo-1106",
"temperature": 0.5,
"max_tokens": 1000,
"top_p": 1,
"stream": True,
"api_key": api_key,
},
},
"vectordb": {
"provider": "chroma",
"config": {"collection_name": "chat-pdf", "dir": db_path, "allow_reset": True},
},
"embedder": {"provider": "openai", "config": {"api_key": api_key}},
"chunker": {"chunk_size": 2000, "chunk_overlap": 0, "length_function": "len"},
}
)
def get_db_path():
tmpdirname = tempfile.mkdtemp()
return tmpdirname
def get_ec_app(api_key):
if "app" in st.session_state:
print("Found app in session state")
app = st.session_state.app
else:
print("Creating app")
db_path = get_db_path()
app = embedchain_bot(db_path, api_key)
st.session_state.app = app
return app
def groq_response(content, prompt):
try:
response = client_groq.chat.completions.create(
messages=[
{
"role": "user",
"content": content + prompt,
}
],
model="mixtral-8x7b-32768",
)
return response.choices[0].message.content
except groq.APIConnectionError as e:
st.error("The server could not be reached, please try again later.")
except groq.RateLimitError as e:
st.error("You have exceeded the rate limit for the demo version, please try again in some time.")
# Initialize your clients with API keys
client_openai = OpenAI(api_key=os.getenv('OPENAI_API'))
client_groq = Groq(api_key=os.getenv('GROQ_API'))
client_groq_one = Groq(api_key=os.getenv('GROQ_API'))
# Define your custom functions for OpenAI
scenario_custom_functions = [
{
'name': 'extract_scenario_info',
'description': 'Get the individual scenarios text',
'parameters': {
'type': 'object',
'properties': {
'scenario_1': {'type': 'string', 'description': 'scenario number 1 full text'},
'scenario_2': {'type': 'string', 'description': 'scenario number 2 full text'},
'scenario_3': {'type': 'string', 'description': 'scenario number 3 full text'},
'scenario_4': {'type': 'string', 'description': 'scenario number 4 full text'},
}
}
}
]
scenario_keyword_functions = [
{
'name': 'extract_scenario_info',
'description': 'Get the individual scenarios text',
'parameters': {
'type': 'object',
'properties': {
'keyword_1': {'type': 'string', 'description': 'keyword 1'},
'keyword_2': {'type': 'string', 'description': 'keyword 2'},
'keyword_3': {'type': 'string', 'description': 'keyword 3'},
'keyword_4': {'type': 'string', 'description': 'keyword 4'},
}
}
}
]
video_custom_functions = [
{
'name': 'extract_video_id',
'description': 'Get the video ID',
'parameters': {
'type': 'object',
'properties': {
'video_id': {'type': 'string', 'description': 'video ID'},
}
}
}
]
# Initialize a string to store all transcripts
all_video_transcripts = ""
molecule_custom_functions = [
{
'name': 'extract_molecule_info',
'description': 'Get the molecule name',
'parameters': {
'type': 'object',
'properties': {
'molecule_name': {'type': 'string', 'description': 'name of the molecule'},
}
}
}
]
keyword_custom_functions = [
{
'name': 'extract_keyword_info',
'description': 'Get the search query keyword',
'parameters': {
'type': 'object',
'properties': {
'keyword': {'type': 'string', 'description': 'keyword of teh search query'},
}
}
}
]
# Example SMILES strings for each component - replace these with the actual values retrieved from your API calls
reactant_1_smiles = 'your_reactant_1_smiles_here'
reactant_2_smiles = 'your_reactant_2_smiles_here' # This might be an empty string if not present
reagent_3_smiles = 'your_reagent_3_smiles_here'
product_4_smiles = 'your_product_4_smiles_here'
product_5_smiles = 'your_product_5_smiles_here'
molecule_custom_functions_reaction = [
{
'name': 'extract_molecules_info',
'description': 'Get the name of the individual molecules',
'parameters': {
'type': 'object',
'properties': {
'reactant_1': {'type': 'string', 'description': 'reactant number 1 '},
'reactant_2': {'type': 'string', 'description': 'reactant number 2 '},
'reagent_3': {'type': 'string', 'description': 'reagent number 1 '},
'product_4': {'type': 'string', 'description': 'product number 1'},
'product_5': {'type': 'string', 'description': 'product number 2'},
}
}
}
]
# Streamlit UI
st.title("Stereo World Updated 🌍")
image_variable = None
# Session states initialization
if 'prompt' not in st.session_state:
st.session_state.prompt = ''
if 'selected_options' not in st.session_state:
st.session_state.selected_options = []
if 'selected_options_reaction' not in st.session_state:
st.session_state.selected_options_reaction = []
# User inputs
st.session_state.selected_options = st.multiselect("Select options", ["fun based", "context based", "real world based", "conceptual textbook based"])
st.session_state.prompt = st.text_input("Enter your prompt")
check_box = st.checkbox("Open Chem Sketcher")
with st.sidebar:
st.sidebar.title("Chat with the assistant πŸ€–")
# Input for search query
search_query = st.sidebar.text_input("Enter your video search query")
reaction_query = st.sidebar.text_input("Enter your reaction search query")
name_reaction = st.checkbox("I am searching a name reaction")
if reaction_query:
prompt = reaction_query
content = "please give complete step by step reaction along with the complete name of the molecules for the reaction, the requested reaction is : "
response = groq_response(content, prompt)
response_functions = client_openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{'role': 'user', 'content': response}],
functions=molecule_custom_functions_reaction,
function_call='auto'
)
data = json.loads(response_functions.choices[0].message.function_call.arguments)
reactant_1 = data.get('reactant_1', '')
reactant_2 = data.get('reactant_2', '')
reagent_3 = data.get('reagent_3', '')
product_4 = data.get('product_4', '')
product_5 = data.get('product_5', '')
reactant_1_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").status_code == 200 else ''
reactant_2_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").status_code == 200 else ''
reagent_3_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").status_code == 200 else ''
product_4_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").status_code == 200 else ''
product_5_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").status_code == 200 else ''
#st.write("Reactant 1: ", reactant_1_smiles)
#st.write("Reactant 2: ", reactant_2_smiles)
#st.write("Reagent 3: ", reagent_3_smiles)
#st.write("Product 4: ", product_4_smiles)
#st.write("Product 5: ", product_5_smiles)
# Building the reaction SMILES string dynamically based on available components
# Building the reaction SMILES string
reaction_components = []
# Adding reactants
reactants = [reactant for reactant in [reactant_1_smiles, reactant_2_smiles] if reactant]
if reactants:
reaction_components.append('.'.join(reactants))
else:
reaction_components.append('')
# Adding reagents
reagents = [reagent for reagent in [reagent_3_smiles] if reagent]
if reagents:
reaction_components.append('.'.join(reagents))
else:
reaction_components.append('')
# Adding products
products = [product for product in [product_4_smiles, product_5_smiles] if product]
if products:
reaction_components.append('.'.join(products))
else:
reaction_components.append('')
reaction_smiles = '>'.join(reaction_components)
try:
# Generate the reaction from SMILES
rxn = AllChem.ReactionFromSmarts(reaction_smiles, useSmiles=True)
# Draw the reaction
d2d = Draw.MolDraw2DCairo(800, 300) # Adjust size as needed
d2d.DrawReaction(rxn)
png = d2d.GetDrawingText()
# Save the drawing to a file
with open('reaction_image.png', 'wb+') as f:
f.write(png)
image_variable = png
#st.image('reaction_image.png')
except Exception as e:
st.write(f"An error occurred: {e}")
if search_query:
prompt = search_query
content = "please correct the spelling and write teh precise one search keyword for and only give teh keyword, only 1 and nothing else other that that : "
response = groq_response(content, prompt)
response_functions = client_openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{'role': 'user', 'content': response}],
functions=keyword_custom_functions,
function_call='auto'
)
data = json.loads(response_functions.choices[0].message.function_call.arguments)
keyword = data['keyword']
st.sidebar.write(keyword)
# Perform the search
videosSearch = VideosSearch(search_query, limit=3)
video_one = VideosSearch(search_query, limit=1)
for video in video_one.result()['result']:
video_one_id = video['id']
for video in videosSearch.result()['result']:
video_id = video['id'] # Extract video ID
# Display the video thumbnail
#st.image(video['thumbnails'][0]['url'])
# Display the video title
#st.write(f"**{video['title']}**")
try:
# Fetch the transcript for the video ID
transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
# Concatenating all text from the transcript
transcript_text = "\n".join([t['text'] for t in transcript_list])
# Concatenate the transcript to the all_video_transcripts variable
all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{transcript_text}\n---\n"
except Exception as e:
error_message = "Transcript not available or error in fetching transcript."
# Concatenate the error message to the all_video_transcripts variable
all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{error_message}\n---\n"
# At this point, all_video_transcripts contains transcripts for all videos concatenated as a single string.
# You can display it or process it as needed.
# Here's an example of displaying the combined transcripts:
video_id = ""
if all_video_transcripts:
#st.text_area("All Video Transcripts", all_video_transcripts, height=300)
prompt = all_video_transcripts
content = "write a one sentence summary for the the given videos and always preserve and give me the vido_id always "
video_compression = groq_response(content, prompt)
compressed_transcripts = video_compression
prompt = compressed_transcripts
content = "give me the best video with maximum content and the best keywords from the transcript and always preserve and give me teh vido_id always "
chat_completion = groq_response(content, prompt)
#st.write(chat_completion.choices[0].message.content)
video_id_fetch = chat_completion
#st.write(video_id_fetch)
response_functions = client_openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{'role': 'user', 'content': video_id_fetch}],
functions=video_custom_functions,
function_call='auto'
)
data = json.loads(response_functions.choices[0].message.function_call.arguments)
video_id = data['video_id']
st.video(f"https://www.youtube.com/watch?v={video_id}")
messages = st.container(height=630)
if image_variable:
messages.chat_message("assistant").write(f"When you react {reactant_1} with {reactant_2} using {reagent_3}, you get {product_4} and {product_5}" + " here is the reaction in 2D bond representation:")
messages.image(image_variable)
if check_box:
messages.chat_message("assistant").write("Here is the Chem Sketcher for you to draw the molecule:")
with messages.chat_message("assistant"):
components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org", height=600)
prompt_sidebar = st.chat_input("Say something")
if prompt_sidebar:
messages.chat_message("user").write(prompt_sidebar)
prompt = prompt_sidebar
sidebar_chat = groq_response("please answer thsi query : ", prompt)
response_functions = client_openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{'role': 'user', 'content': prompt_sidebar}],
functions=molecule_custom_functions,
function_call='auto'
)
try:
arguments = response_functions.choices[0].message.function_call.arguments
if arguments is not None:
data = json.loads(arguments)
molecule_name = data['molecule_name','']
if molecule_name:
response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT")
cid = response.text
with messages.chat_message("assistant"):
st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too πŸ˜‰:")
components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}")
messages.chat_message("assistant").write(sidebar_chat)
else:
data = json.loads(arguments)
molecule_name = data['molecule_name','']
if molecule_name:
response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT")
cid = response.text
with messages.chat_message("assistant"):
st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too πŸ˜‰:")
components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}")
messages.chat_message("assistant").write(sidebar_chat)
data = {} # or set to None or a default value as needed
except (IndexError, KeyError, TypeError) as e:
#print(f"Error accessing the data: {e}")
messages.chat_message("assistant").write(sidebar_chat)
data = {}
if st.session_state.prompt:
prompt = st.session_state.prompt
selected_options = " ".join(st.session_state.selected_options)
messages = [
{"role": "user", "content": f"create a {selected_options} scenarios based task question for learning stereochemistry, create 4 scenarios each time and number them: {prompt}"},
]
chat_completion = client_groq.chat.completions.create(
messages=messages,
model="mixtral-8x7b-32768",
)
response = chat_completion.choices[0].message.content
if response:
response_functions = client_openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{'role': 'user', 'content': response}],
functions=scenario_custom_functions,
function_call='auto'
)
data = json.loads(response_functions.choices[0].message.function_call.arguments)
# Tabs for scenarios
scenario_tabs = ['Scenario 1', 'Scenario 2', 'Scenario 3', 'Scenario 4']
tabs = st.tabs(scenario_tabs)
for i, tab in enumerate(tabs):
with tab:
st.header(scenario_tabs[i])
scenario_text = data[f'scenario_{i+1}']
st.write(scenario_text)
prompt = scenario_text
content = "subdivide this scenario into three subquestions and only give the questions. The scenario is: "
chat_completion_subquestions = groq_response(content, prompt)
scenario_generated = chat_completion_subquestions
st.write(scenario_generated)
prompt = scenario_generated
content = "give a sample ideal step-by-step format to attempt to answer this scenario question as a hint. Scenario: "
chat_completion_hint = groq_response(content, prompt)
st.text_area("Enter your answer here", key=f'answer_{i}')
with st.expander("See hint for answering the question" + str(i+1) + "πŸ˜€"):
st.write(chat_completion_hint)
# Upload PDF button
uploaded_file = st.file_uploader("Upload your answer (PDF)", type="pdf", key=f"pdf_uploader_{i}")
if uploaded_file is not None:
st.success("File uploaded successfully!")
col1, col2 = st.columns(2)
with col1:
with st.expander("See explanation 3D"):
components.iframe("https://embed.molview.org/v1/?mode=balls&cid=124527813")
with col2:
with st.expander("See explanation 2D"):
components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org")
# Example of error handling with client_groq API calls