Spaces:
Build error
Build error
| from groq import Groq | |
| import groq | |
| import streamlit as st | |
| from openai import OpenAI | |
| import json | |
| import streamlit.components.v1 as components | |
| import requests | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from youtubesearchpython import VideosSearch | |
| from rdkit import Chem | |
| from rdkit.Chem import Draw, AllChem | |
| import os | |
| import queue | |
| import re | |
| import tempfile | |
| import threading | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from embedchain import App | |
| from embedchain.config import BaseLlmConfig | |
| from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield, | |
| generate) | |
| client_groq = Groq(api_key=os.getenv('GROQ_API')) | |
| client_openai = OpenAI(api_key=os.getenv('OPENAI_API')) | |
| link_custom_functions = [ | |
| { | |
| 'name': 'extract_website_url', | |
| 'description': 'Get the website url', | |
| 'parameters': { | |
| 'type': 'object', | |
| 'properties': { | |
| 'link': {'type': 'string', 'description': 'website url'}, | |
| } | |
| } | |
| } | |
| ] | |
| def embedchain_bot(db_path, api_key): | |
| return App.from_config( | |
| config={ | |
| "llm": { | |
| "provider": "openai", | |
| "config": { | |
| "model": "gpt-3.5-turbo-1106", | |
| "temperature": 0.5, | |
| "max_tokens": 1000, | |
| "top_p": 1, | |
| "stream": True, | |
| "api_key": api_key, | |
| }, | |
| }, | |
| "vectordb": { | |
| "provider": "chroma", | |
| "config": {"collection_name": "chat-pdf", "dir": db_path, "allow_reset": True}, | |
| }, | |
| "embedder": {"provider": "openai", "config": {"api_key": api_key}}, | |
| "chunker": {"chunk_size": 2000, "chunk_overlap": 0, "length_function": "len"}, | |
| } | |
| ) | |
| def get_db_path(): | |
| tmpdirname = tempfile.mkdtemp() | |
| return tmpdirname | |
| def get_ec_app(api_key): | |
| if "app" in st.session_state: | |
| print("Found app in session state") | |
| app = st.session_state.app | |
| else: | |
| print("Creating app") | |
| db_path = get_db_path() | |
| app = embedchain_bot(db_path, api_key) | |
| st.session_state.app = app | |
| return app | |
| def groq_response(content, prompt): | |
| try: | |
| response = client_groq.chat.completions.create( | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": content + prompt, | |
| } | |
| ], | |
| model="mixtral-8x7b-32768", | |
| ) | |
| return response.choices[0].message.content | |
| except groq.APIConnectionError as e: | |
| st.error("The server could not be reached, please try again later.") | |
| except groq.RateLimitError as e: | |
| st.error("You have exceeded the rate limit for the demo version, please try again in some time.") | |
| # Initialize your clients with API keys | |
| client_openai = OpenAI(api_key=os.getenv('OPENAI_API')) | |
| client_groq = Groq(api_key=os.getenv('GROQ_API')) | |
| client_groq_one = Groq(api_key=os.getenv('GROQ_API')) | |
| # Define your custom functions for OpenAI | |
| scenario_custom_functions = [ | |
| { | |
| 'name': 'extract_scenario_info', | |
| 'description': 'Get the individual scenarios text', | |
| 'parameters': { | |
| 'type': 'object', | |
| 'properties': { | |
| 'scenario_1': {'type': 'string', 'description': 'scenario number 1 full text'}, | |
| 'scenario_2': {'type': 'string', 'description': 'scenario number 2 full text'}, | |
| 'scenario_3': {'type': 'string', 'description': 'scenario number 3 full text'}, | |
| 'scenario_4': {'type': 'string', 'description': 'scenario number 4 full text'}, | |
| } | |
| } | |
| } | |
| ] | |
| scenario_keyword_functions = [ | |
| { | |
| 'name': 'extract_scenario_info', | |
| 'description': 'Get the individual scenarios text', | |
| 'parameters': { | |
| 'type': 'object', | |
| 'properties': { | |
| 'keyword_1': {'type': 'string', 'description': 'keyword 1'}, | |
| 'keyword_2': {'type': 'string', 'description': 'keyword 2'}, | |
| 'keyword_3': {'type': 'string', 'description': 'keyword 3'}, | |
| 'keyword_4': {'type': 'string', 'description': 'keyword 4'}, | |
| } | |
| } | |
| } | |
| ] | |
| video_custom_functions = [ | |
| { | |
| 'name': 'extract_video_id', | |
| 'description': 'Get the video ID', | |
| 'parameters': { | |
| 'type': 'object', | |
| 'properties': { | |
| 'video_id': {'type': 'string', 'description': 'video ID'}, | |
| } | |
| } | |
| } | |
| ] | |
| # Initialize a string to store all transcripts | |
| all_video_transcripts = "" | |
| molecule_custom_functions = [ | |
| { | |
| 'name': 'extract_molecule_info', | |
| 'description': 'Get the molecule name', | |
| 'parameters': { | |
| 'type': 'object', | |
| 'properties': { | |
| 'molecule_name': {'type': 'string', 'description': 'name of the molecule'}, | |
| } | |
| } | |
| } | |
| ] | |
| keyword_custom_functions = [ | |
| { | |
| 'name': 'extract_keyword_info', | |
| 'description': 'Get the search query keyword', | |
| 'parameters': { | |
| 'type': 'object', | |
| 'properties': { | |
| 'keyword': {'type': 'string', 'description': 'keyword of teh search query'}, | |
| } | |
| } | |
| } | |
| ] | |
| # Example SMILES strings for each component - replace these with the actual values retrieved from your API calls | |
| reactant_1_smiles = 'your_reactant_1_smiles_here' | |
| reactant_2_smiles = 'your_reactant_2_smiles_here' # This might be an empty string if not present | |
| reagent_3_smiles = 'your_reagent_3_smiles_here' | |
| product_4_smiles = 'your_product_4_smiles_here' | |
| product_5_smiles = 'your_product_5_smiles_here' | |
| molecule_custom_functions_reaction = [ | |
| { | |
| 'name': 'extract_molecules_info', | |
| 'description': 'Get the name of the individual molecules', | |
| 'parameters': { | |
| 'type': 'object', | |
| 'properties': { | |
| 'reactant_1': {'type': 'string', 'description': 'reactant number 1 '}, | |
| 'reactant_2': {'type': 'string', 'description': 'reactant number 2 '}, | |
| 'reagent_3': {'type': 'string', 'description': 'reagent number 1 '}, | |
| 'product_4': {'type': 'string', 'description': 'product number 1'}, | |
| 'product_5': {'type': 'string', 'description': 'product number 2'}, | |
| } | |
| } | |
| } | |
| ] | |
| # Streamlit UI | |
| st.title("Stereo World Updated π") | |
| image_variable = None | |
| # Session states initialization | |
| if 'prompt' not in st.session_state: | |
| st.session_state.prompt = '' | |
| if 'selected_options' not in st.session_state: | |
| st.session_state.selected_options = [] | |
| if 'selected_options_reaction' not in st.session_state: | |
| st.session_state.selected_options_reaction = [] | |
| # User inputs | |
| st.session_state.selected_options = st.multiselect("Select options", ["fun based", "context based", "real world based", "conceptual textbook based"]) | |
| st.session_state.prompt = st.text_input("Enter your prompt") | |
| check_box = st.checkbox("Open Chem Sketcher") | |
| with st.sidebar: | |
| st.sidebar.title("Chat with the assistant π€") | |
| # Input for search query | |
| search_query = st.sidebar.text_input("Enter your video search query") | |
| reaction_query = st.sidebar.text_input("Enter your reaction search query") | |
| name_reaction = st.checkbox("I am searching a name reaction") | |
| if reaction_query: | |
| prompt = reaction_query | |
| content = "please give complete step by step reaction along with the complete name of the molecules for the reaction, the requested reaction is : " | |
| response = groq_response(content, prompt) | |
| response_functions = client_openai.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[{'role': 'user', 'content': response}], | |
| functions=molecule_custom_functions_reaction, | |
| function_call='auto' | |
| ) | |
| data = json.loads(response_functions.choices[0].message.function_call.arguments) | |
| reactant_1 = data.get('reactant_1', '') | |
| reactant_2 = data.get('reactant_2', '') | |
| reagent_3 = data.get('reagent_3', '') | |
| product_4 = data.get('product_4', '') | |
| product_5 = data.get('product_5', '') | |
| reactant_1_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").status_code == 200 else '' | |
| reactant_2_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").status_code == 200 else '' | |
| reagent_3_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").status_code == 200 else '' | |
| product_4_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").status_code == 200 else '' | |
| product_5_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").status_code == 200 else '' | |
| #st.write("Reactant 1: ", reactant_1_smiles) | |
| #st.write("Reactant 2: ", reactant_2_smiles) | |
| #st.write("Reagent 3: ", reagent_3_smiles) | |
| #st.write("Product 4: ", product_4_smiles) | |
| #st.write("Product 5: ", product_5_smiles) | |
| # Building the reaction SMILES string dynamically based on available components | |
| # Building the reaction SMILES string | |
| reaction_components = [] | |
| # Adding reactants | |
| reactants = [reactant for reactant in [reactant_1_smiles, reactant_2_smiles] if reactant] | |
| if reactants: | |
| reaction_components.append('.'.join(reactants)) | |
| else: | |
| reaction_components.append('') | |
| # Adding reagents | |
| reagents = [reagent for reagent in [reagent_3_smiles] if reagent] | |
| if reagents: | |
| reaction_components.append('.'.join(reagents)) | |
| else: | |
| reaction_components.append('') | |
| # Adding products | |
| products = [product for product in [product_4_smiles, product_5_smiles] if product] | |
| if products: | |
| reaction_components.append('.'.join(products)) | |
| else: | |
| reaction_components.append('') | |
| reaction_smiles = '>'.join(reaction_components) | |
| try: | |
| # Generate the reaction from SMILES | |
| rxn = AllChem.ReactionFromSmarts(reaction_smiles, useSmiles=True) | |
| # Draw the reaction | |
| d2d = Draw.MolDraw2DCairo(800, 300) # Adjust size as needed | |
| d2d.DrawReaction(rxn) | |
| png = d2d.GetDrawingText() | |
| # Save the drawing to a file | |
| with open('reaction_image.png', 'wb+') as f: | |
| f.write(png) | |
| image_variable = png | |
| #st.image('reaction_image.png') | |
| except Exception as e: | |
| st.write(f"An error occurred: {e}") | |
| if search_query: | |
| prompt = search_query | |
| content = "please correct the spelling and write teh precise one search keyword for and only give teh keyword, only 1 and nothing else other that that : " | |
| response = groq_response(content, prompt) | |
| response_functions = client_openai.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[{'role': 'user', 'content': response}], | |
| functions=keyword_custom_functions, | |
| function_call='auto' | |
| ) | |
| data = json.loads(response_functions.choices[0].message.function_call.arguments) | |
| keyword = data['keyword'] | |
| st.sidebar.write(keyword) | |
| # Perform the search | |
| videosSearch = VideosSearch(search_query, limit=3) | |
| video_one = VideosSearch(search_query, limit=1) | |
| for video in video_one.result()['result']: | |
| video_one_id = video['id'] | |
| for video in videosSearch.result()['result']: | |
| video_id = video['id'] # Extract video ID | |
| # Display the video thumbnail | |
| #st.image(video['thumbnails'][0]['url']) | |
| # Display the video title | |
| #st.write(f"**{video['title']}**") | |
| try: | |
| # Fetch the transcript for the video ID | |
| transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) | |
| # Concatenating all text from the transcript | |
| transcript_text = "\n".join([t['text'] for t in transcript_list]) | |
| # Concatenate the transcript to the all_video_transcripts variable | |
| all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{transcript_text}\n---\n" | |
| except Exception as e: | |
| error_message = "Transcript not available or error in fetching transcript." | |
| # Concatenate the error message to the all_video_transcripts variable | |
| all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{error_message}\n---\n" | |
| # At this point, all_video_transcripts contains transcripts for all videos concatenated as a single string. | |
| # You can display it or process it as needed. | |
| # Here's an example of displaying the combined transcripts: | |
| video_id = "" | |
| if all_video_transcripts: | |
| #st.text_area("All Video Transcripts", all_video_transcripts, height=300) | |
| prompt = all_video_transcripts | |
| content = "write a one sentence summary for the the given videos and always preserve and give me the vido_id always " | |
| video_compression = groq_response(content, prompt) | |
| compressed_transcripts = video_compression | |
| prompt = compressed_transcripts | |
| content = "give me the best video with maximum content and the best keywords from the transcript and always preserve and give me teh vido_id always " | |
| chat_completion = groq_response(content, prompt) | |
| #st.write(chat_completion.choices[0].message.content) | |
| video_id_fetch = chat_completion | |
| #st.write(video_id_fetch) | |
| response_functions = client_openai.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[{'role': 'user', 'content': video_id_fetch}], | |
| functions=video_custom_functions, | |
| function_call='auto' | |
| ) | |
| data = json.loads(response_functions.choices[0].message.function_call.arguments) | |
| video_id = data['video_id'] | |
| st.video(f"https://www.youtube.com/watch?v={video_id}") | |
| messages = st.container(height=630) | |
| if image_variable: | |
| messages.chat_message("assistant").write(f"When you react {reactant_1} with {reactant_2} using {reagent_3}, you get {product_4} and {product_5}" + " here is the reaction in 2D bond representation:") | |
| messages.image(image_variable) | |
| if check_box: | |
| messages.chat_message("assistant").write("Here is the Chem Sketcher for you to draw the molecule:") | |
| with messages.chat_message("assistant"): | |
| components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org", height=600) | |
| prompt_sidebar = st.chat_input("Say something") | |
| if prompt_sidebar: | |
| messages.chat_message("user").write(prompt_sidebar) | |
| prompt = prompt_sidebar | |
| sidebar_chat = groq_response("please answer thsi query : ", prompt) | |
| response_functions = client_openai.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[{'role': 'user', 'content': prompt_sidebar}], | |
| functions=molecule_custom_functions, | |
| function_call='auto' | |
| ) | |
| try: | |
| arguments = response_functions.choices[0].message.function_call.arguments | |
| if arguments is not None: | |
| data = json.loads(arguments) | |
| molecule_name = data['molecule_name',''] | |
| if molecule_name: | |
| response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT") | |
| cid = response.text | |
| with messages.chat_message("assistant"): | |
| st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too π:") | |
| components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}") | |
| messages.chat_message("assistant").write(sidebar_chat) | |
| else: | |
| data = json.loads(arguments) | |
| molecule_name = data['molecule_name',''] | |
| if molecule_name: | |
| response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT") | |
| cid = response.text | |
| with messages.chat_message("assistant"): | |
| st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too π:") | |
| components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}") | |
| messages.chat_message("assistant").write(sidebar_chat) | |
| data = {} # or set to None or a default value as needed | |
| except (IndexError, KeyError, TypeError) as e: | |
| #print(f"Error accessing the data: {e}") | |
| messages.chat_message("assistant").write(sidebar_chat) | |
| data = {} | |
| if st.session_state.prompt: | |
| prompt = st.session_state.prompt | |
| selected_options = " ".join(st.session_state.selected_options) | |
| messages = [ | |
| {"role": "user", "content": f"create a {selected_options} scenarios based task question for learning stereochemistry, create 4 scenarios each time and number them: {prompt}"}, | |
| ] | |
| chat_completion = client_groq.chat.completions.create( | |
| messages=messages, | |
| model="mixtral-8x7b-32768", | |
| ) | |
| response = chat_completion.choices[0].message.content | |
| if response: | |
| response_functions = client_openai.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[{'role': 'user', 'content': response}], | |
| functions=scenario_custom_functions, | |
| function_call='auto' | |
| ) | |
| data = json.loads(response_functions.choices[0].message.function_call.arguments) | |
| # Tabs for scenarios | |
| scenario_tabs = ['Scenario 1', 'Scenario 2', 'Scenario 3', 'Scenario 4'] | |
| tabs = st.tabs(scenario_tabs) | |
| for i, tab in enumerate(tabs): | |
| with tab: | |
| st.header(scenario_tabs[i]) | |
| scenario_text = data[f'scenario_{i+1}'] | |
| st.write(scenario_text) | |
| prompt = scenario_text | |
| content = "subdivide this scenario into three subquestions and only give the questions. The scenario is: " | |
| chat_completion_subquestions = groq_response(content, prompt) | |
| scenario_generated = chat_completion_subquestions | |
| st.write(scenario_generated) | |
| prompt = scenario_generated | |
| content = "give a sample ideal step-by-step format to attempt to answer this scenario question as a hint. Scenario: " | |
| chat_completion_hint = groq_response(content, prompt) | |
| st.text_area("Enter your answer here", key=f'answer_{i}') | |
| with st.expander("See hint for answering the question" + str(i+1) + "π"): | |
| st.write(chat_completion_hint) | |
| # Upload PDF button | |
| uploaded_file = st.file_uploader("Upload your answer (PDF)", type="pdf", key=f"pdf_uploader_{i}") | |
| if uploaded_file is not None: | |
| st.success("File uploaded successfully!") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| with st.expander("See explanation 3D"): | |
| components.iframe("https://embed.molview.org/v1/?mode=balls&cid=124527813") | |
| with col2: | |
| with st.expander("See explanation 2D"): | |
| components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org") | |
| # Example of error handling with client_groq API calls |