from groq import Groq import groq import streamlit as st from openai import OpenAI import json import streamlit.components.v1 as components import requests from youtube_transcript_api import YouTubeTranscriptApi from youtubesearchpython import VideosSearch from rdkit import Chem from rdkit.Chem import Draw, AllChem import os import queue import re import tempfile import threading import requests from bs4 import BeautifulSoup from embedchain import App from embedchain.config import BaseLlmConfig from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield, generate) client_groq = Groq(api_key=os.getenv('GROQ_API')) client_openai = OpenAI(api_key=os.getenv('OPENAI_API')) link_custom_functions = [ { 'name': 'extract_website_url', 'description': 'Get the website url', 'parameters': { 'type': 'object', 'properties': { 'link': {'type': 'string', 'description': 'website url'}, } } } ] def embedchain_bot(db_path, api_key): return App.from_config( config={ "llm": { "provider": "openai", "config": { "model": "gpt-3.5-turbo-1106", "temperature": 0.5, "max_tokens": 1000, "top_p": 1, "stream": True, "api_key": api_key, }, }, "vectordb": { "provider": "chroma", "config": {"collection_name": "chat-pdf", "dir": db_path, "allow_reset": True}, }, "embedder": {"provider": "openai", "config": {"api_key": api_key}}, "chunker": {"chunk_size": 2000, "chunk_overlap": 0, "length_function": "len"}, } ) def get_db_path(): tmpdirname = tempfile.mkdtemp() return tmpdirname def get_ec_app(api_key): if "app" in st.session_state: print("Found app in session state") app = st.session_state.app else: print("Creating app") db_path = get_db_path() app = embedchain_bot(db_path, api_key) st.session_state.app = app return app def groq_response(content, prompt): try: response = client_groq.chat.completions.create( messages=[ { "role": "user", "content": content + prompt, } ], model="mixtral-8x7b-32768", ) return response.choices[0].message.content except groq.APIConnectionError as e: st.error("The server could not be reached, please try again later.") except groq.RateLimitError as e: st.error("You have exceeded the rate limit for the demo version, please try again in some time.") # Initialize your clients with API keys client_openai = OpenAI(api_key=os.getenv('OPENAI_API')) client_groq = Groq(api_key=os.getenv('GROQ_API')) client_groq_one = Groq(api_key=os.getenv('GROQ_API')) # Define your custom functions for OpenAI scenario_custom_functions = [ { 'name': 'extract_scenario_info', 'description': 'Get the individual scenarios text', 'parameters': { 'type': 'object', 'properties': { 'scenario_1': {'type': 'string', 'description': 'scenario number 1 full text'}, 'scenario_2': {'type': 'string', 'description': 'scenario number 2 full text'}, 'scenario_3': {'type': 'string', 'description': 'scenario number 3 full text'}, 'scenario_4': {'type': 'string', 'description': 'scenario number 4 full text'}, } } } ] scenario_keyword_functions = [ { 'name': 'extract_scenario_info', 'description': 'Get the individual scenarios text', 'parameters': { 'type': 'object', 'properties': { 'keyword_1': {'type': 'string', 'description': 'keyword 1'}, 'keyword_2': {'type': 'string', 'description': 'keyword 2'}, 'keyword_3': {'type': 'string', 'description': 'keyword 3'}, 'keyword_4': {'type': 'string', 'description': 'keyword 4'}, } } } ] video_custom_functions = [ { 'name': 'extract_video_id', 'description': 'Get the video ID', 'parameters': { 'type': 'object', 'properties': { 'video_id': {'type': 'string', 'description': 'video ID'}, } } } ] # Initialize a string to store all transcripts all_video_transcripts = "" molecule_custom_functions = [ { 'name': 'extract_molecule_info', 'description': 'Get the molecule name', 'parameters': { 'type': 'object', 'properties': { 'molecule_name': {'type': 'string', 'description': 'name of the molecule'}, } } } ] keyword_custom_functions = [ { 'name': 'extract_keyword_info', 'description': 'Get the search query keyword', 'parameters': { 'type': 'object', 'properties': { 'keyword': {'type': 'string', 'description': 'keyword of teh search query'}, } } } ] # Example SMILES strings for each component - replace these with the actual values retrieved from your API calls reactant_1_smiles = 'your_reactant_1_smiles_here' reactant_2_smiles = 'your_reactant_2_smiles_here' # This might be an empty string if not present reagent_3_smiles = 'your_reagent_3_smiles_here' product_4_smiles = 'your_product_4_smiles_here' product_5_smiles = 'your_product_5_smiles_here' molecule_custom_functions_reaction = [ { 'name': 'extract_molecules_info', 'description': 'Get the name of the individual molecules', 'parameters': { 'type': 'object', 'properties': { 'reactant_1': {'type': 'string', 'description': 'reactant number 1 '}, 'reactant_2': {'type': 'string', 'description': 'reactant number 2 '}, 'reagent_3': {'type': 'string', 'description': 'reagent number 1 '}, 'product_4': {'type': 'string', 'description': 'product number 1'}, 'product_5': {'type': 'string', 'description': 'product number 2'}, } } } ] # Streamlit UI st.title("Stereo World Updated 🌍") image_variable = None # Session states initialization if 'prompt' not in st.session_state: st.session_state.prompt = '' if 'selected_options' not in st.session_state: st.session_state.selected_options = [] if 'selected_options_reaction' not in st.session_state: st.session_state.selected_options_reaction = [] # User inputs st.session_state.selected_options = st.multiselect("Select options", ["fun based", "context based", "real world based", "conceptual textbook based"]) st.session_state.prompt = st.text_input("Enter your prompt") check_box = st.checkbox("Open Chem Sketcher") with st.sidebar: st.sidebar.title("Chat with the assistant 🤖") # Input for search query search_query = st.sidebar.text_input("Enter your video search query") reaction_query = st.sidebar.text_input("Enter your reaction search query") name_reaction = st.checkbox("I am searching a name reaction") if reaction_query: prompt = reaction_query content = "please give complete step by step reaction along with the complete name of the molecules for the reaction, the requested reaction is : " response = groq_response(content, prompt) response_functions = client_openai.chat.completions.create( model="gpt-3.5-turbo", messages=[{'role': 'user', 'content': response}], functions=molecule_custom_functions_reaction, function_call='auto' ) data = json.loads(response_functions.choices[0].message.function_call.arguments) reactant_1 = data.get('reactant_1', '') reactant_2 = data.get('reactant_2', '') reagent_3 = data.get('reagent_3', '') product_4 = data.get('product_4', '') product_5 = data.get('product_5', '') reactant_1_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").status_code == 200 else '' reactant_2_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").status_code == 200 else '' reagent_3_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").status_code == 200 else '' product_4_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").status_code == 200 else '' product_5_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").status_code == 200 else '' #st.write("Reactant 1: ", reactant_1_smiles) #st.write("Reactant 2: ", reactant_2_smiles) #st.write("Reagent 3: ", reagent_3_smiles) #st.write("Product 4: ", product_4_smiles) #st.write("Product 5: ", product_5_smiles) # Building the reaction SMILES string dynamically based on available components # Building the reaction SMILES string reaction_components = [] # Adding reactants reactants = [reactant for reactant in [reactant_1_smiles, reactant_2_smiles] if reactant] if reactants: reaction_components.append('.'.join(reactants)) else: reaction_components.append('') # Adding reagents reagents = [reagent for reagent in [reagent_3_smiles] if reagent] if reagents: reaction_components.append('.'.join(reagents)) else: reaction_components.append('') # Adding products products = [product for product in [product_4_smiles, product_5_smiles] if product] if products: reaction_components.append('.'.join(products)) else: reaction_components.append('') reaction_smiles = '>'.join(reaction_components) try: # Generate the reaction from SMILES rxn = AllChem.ReactionFromSmarts(reaction_smiles, useSmiles=True) # Draw the reaction d2d = Draw.MolDraw2DCairo(800, 300) # Adjust size as needed d2d.DrawReaction(rxn) png = d2d.GetDrawingText() # Save the drawing to a file with open('reaction_image.png', 'wb+') as f: f.write(png) image_variable = png #st.image('reaction_image.png') except Exception as e: st.write(f"An error occurred: {e}") if search_query: prompt = search_query content = "please correct the spelling and write teh precise one search keyword for and only give teh keyword, only 1 and nothing else other that that : " response = groq_response(content, prompt) response_functions = client_openai.chat.completions.create( model="gpt-3.5-turbo", messages=[{'role': 'user', 'content': response}], functions=keyword_custom_functions, function_call='auto' ) data = json.loads(response_functions.choices[0].message.function_call.arguments) keyword = data['keyword'] st.sidebar.write(keyword) # Perform the search videosSearch = VideosSearch(search_query, limit=3) video_one = VideosSearch(search_query, limit=1) for video in video_one.result()['result']: video_one_id = video['id'] for video in videosSearch.result()['result']: video_id = video['id'] # Extract video ID # Display the video thumbnail #st.image(video['thumbnails'][0]['url']) # Display the video title #st.write(f"**{video['title']}**") try: # Fetch the transcript for the video ID transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) # Concatenating all text from the transcript transcript_text = "\n".join([t['text'] for t in transcript_list]) # Concatenate the transcript to the all_video_transcripts variable all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{transcript_text}\n---\n" except Exception as e: error_message = "Transcript not available or error in fetching transcript." # Concatenate the error message to the all_video_transcripts variable all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{error_message}\n---\n" # At this point, all_video_transcripts contains transcripts for all videos concatenated as a single string. # You can display it or process it as needed. # Here's an example of displaying the combined transcripts: video_id = "" if all_video_transcripts: #st.text_area("All Video Transcripts", all_video_transcripts, height=300) prompt = all_video_transcripts content = "write a one sentence summary for the the given videos and always preserve and give me the vido_id always " video_compression = groq_response(content, prompt) compressed_transcripts = video_compression prompt = compressed_transcripts content = "give me the best video with maximum content and the best keywords from the transcript and always preserve and give me teh vido_id always " chat_completion = groq_response(content, prompt) #st.write(chat_completion.choices[0].message.content) video_id_fetch = chat_completion #st.write(video_id_fetch) response_functions = client_openai.chat.completions.create( model="gpt-3.5-turbo", messages=[{'role': 'user', 'content': video_id_fetch}], functions=video_custom_functions, function_call='auto' ) data = json.loads(response_functions.choices[0].message.function_call.arguments) video_id = data['video_id'] st.video(f"https://www.youtube.com/watch?v={video_id}") messages = st.container(height=630) if image_variable: messages.chat_message("assistant").write(f"When you react {reactant_1} with {reactant_2} using {reagent_3}, you get {product_4} and {product_5}" + " here is the reaction in 2D bond representation:") messages.image(image_variable) if check_box: messages.chat_message("assistant").write("Here is the Chem Sketcher for you to draw the molecule:") with messages.chat_message("assistant"): components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org", height=600) prompt_sidebar = st.chat_input("Say something") if prompt_sidebar: messages.chat_message("user").write(prompt_sidebar) prompt = prompt_sidebar sidebar_chat = groq_response("please answer thsi query : ", prompt) response_functions = client_openai.chat.completions.create( model="gpt-3.5-turbo", messages=[{'role': 'user', 'content': prompt_sidebar}], functions=molecule_custom_functions, function_call='auto' ) try: arguments = response_functions.choices[0].message.function_call.arguments if arguments is not None: data = json.loads(arguments) molecule_name = data['molecule_name',''] if molecule_name: response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT") cid = response.text with messages.chat_message("assistant"): st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too 😉:") components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}") messages.chat_message("assistant").write(sidebar_chat) else: data = json.loads(arguments) molecule_name = data['molecule_name',''] if molecule_name: response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT") cid = response.text with messages.chat_message("assistant"): st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too 😉:") components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}") messages.chat_message("assistant").write(sidebar_chat) data = {} # or set to None or a default value as needed except (IndexError, KeyError, TypeError) as e: #print(f"Error accessing the data: {e}") messages.chat_message("assistant").write(sidebar_chat) data = {} if st.session_state.prompt: prompt = st.session_state.prompt selected_options = " ".join(st.session_state.selected_options) messages = [ {"role": "user", "content": f"create a {selected_options} scenarios based task question for learning stereochemistry, create 4 scenarios each time and number them: {prompt}"}, ] chat_completion = client_groq.chat.completions.create( messages=messages, model="mixtral-8x7b-32768", ) response = chat_completion.choices[0].message.content if response: response_functions = client_openai.chat.completions.create( model="gpt-3.5-turbo", messages=[{'role': 'user', 'content': response}], functions=scenario_custom_functions, function_call='auto' ) data = json.loads(response_functions.choices[0].message.function_call.arguments) # Tabs for scenarios scenario_tabs = ['Scenario 1', 'Scenario 2', 'Scenario 3', 'Scenario 4'] tabs = st.tabs(scenario_tabs) for i, tab in enumerate(tabs): with tab: st.header(scenario_tabs[i]) scenario_text = data[f'scenario_{i+1}'] st.write(scenario_text) prompt = scenario_text content = "subdivide this scenario into three subquestions and only give the questions. The scenario is: " chat_completion_subquestions = groq_response(content, prompt) scenario_generated = chat_completion_subquestions st.write(scenario_generated) prompt = scenario_generated content = "give a sample ideal step-by-step format to attempt to answer this scenario question as a hint. Scenario: " chat_completion_hint = groq_response(content, prompt) st.text_area("Enter your answer here", key=f'answer_{i}') with st.expander("See hint for answering the question" + str(i+1) + "😀"): st.write(chat_completion_hint) # Upload PDF button uploaded_file = st.file_uploader("Upload your answer (PDF)", type="pdf", key=f"pdf_uploader_{i}") if uploaded_file is not None: st.success("File uploaded successfully!") col1, col2 = st.columns(2) with col1: with st.expander("See explanation 3D"): components.iframe("https://embed.molview.org/v1/?mode=balls&cid=124527813") with col2: with st.expander("See explanation 2D"): components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org") # Example of error handling with client_groq API calls