Spaces:

smitathkr1
/

ai-learning-platform

Sleeping

App Files Files Community

ai-learning-platform / app.py

smitathkr1

Update app.py

fba478d verified 6 months ago

raw

history blame

21.4 kB

	from groq import Groq
	import groq
	import streamlit as st
	from openai import OpenAI
	import json
	import streamlit.components.v1 as components
	import requests
	from youtube_transcript_api import YouTubeTranscriptApi
	from youtubesearchpython import VideosSearch
	from rdkit import Chem
	from rdkit.Chem import Draw, AllChem
	import os
	import queue
	import re
	import tempfile
	import threading
	import requests
	from bs4 import BeautifulSoup
	from embedchain import App
	from embedchain.config import BaseLlmConfig
	from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield,
	generate)


	client_groq = Groq(api_key=os.getenv('GROQ_API'))
	client_openai = OpenAI(api_key=os.getenv('OPENAI_API'))

	link_custom_functions = [
	{
	'name': 'extract_website_url',
	'description': 'Get the website url',
	'parameters': {
	'type': 'object',
	'properties': {
	'link': {'type': 'string', 'description': 'website url'},
	}
	}
	}
	]

	def embedchain_bot(db_path, api_key):
	return App.from_config(
	config={
	"llm": {
	"provider": "openai",
	"config": {
	"model": "gpt-3.5-turbo-1106",
	"temperature": 0.5,
	"max_tokens": 1000,
	"top_p": 1,
	"stream": True,
	"api_key": api_key,
	},
	},
	"vectordb": {
	"provider": "chroma",
	"config": {"collection_name": "chat-pdf", "dir": db_path, "allow_reset": True},
	},
	"embedder": {"provider": "openai", "config": {"api_key": api_key}},
	"chunker": {"chunk_size": 2000, "chunk_overlap": 0, "length_function": "len"},
	}
	)


	def get_db_path():
	tmpdirname = tempfile.mkdtemp()
	return tmpdirname


	def get_ec_app(api_key):
	if "app" in st.session_state:
	print("Found app in session state")
	app = st.session_state.app
	else:
	print("Creating app")
	db_path = get_db_path()
	app = embedchain_bot(db_path, api_key)
	st.session_state.app = app
	return app

	def groq_response(content, prompt):
	try:
	response = client_groq.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": content + prompt,
	}
	],
	model="mixtral-8x7b-32768",
	)
	return response.choices[0].message.content
	except groq.APIConnectionError as e:
	st.error("The server could not be reached, please try again later.")
	except groq.RateLimitError as e:
	st.error("You have exceeded the rate limit for the demo version, please try again in some time.")


	# Initialize your clients with API keys
	client_openai = OpenAI(api_key=os.getenv('OPENAI_API'))
	client_groq = Groq(api_key=os.getenv('GROQ_API'))
	client_groq_one = Groq(api_key=os.getenv('GROQ_API'))

	# Define your custom functions for OpenAI
	scenario_custom_functions = [
	{
	'name': 'extract_scenario_info',
	'description': 'Get the individual scenarios text',
	'parameters': {
	'type': 'object',
	'properties': {
	'scenario_1': {'type': 'string', 'description': 'scenario number 1 full text'},
	'scenario_2': {'type': 'string', 'description': 'scenario number 2 full text'},
	'scenario_3': {'type': 'string', 'description': 'scenario number 3 full text'},
	'scenario_4': {'type': 'string', 'description': 'scenario number 4 full text'},
	}
	}
	}
	]

	scenario_keyword_functions = [
	{
	'name': 'extract_scenario_info',
	'description': 'Get the individual scenarios text',
	'parameters': {
	'type': 'object',
	'properties': {
	'keyword_1': {'type': 'string', 'description': 'keyword 1'},
	'keyword_2': {'type': 'string', 'description': 'keyword 2'},
	'keyword_3': {'type': 'string', 'description': 'keyword 3'},
	'keyword_4': {'type': 'string', 'description': 'keyword 4'},
	}
	}
	}
	]

	video_custom_functions = [
	{
	'name': 'extract_video_id',
	'description': 'Get the video ID',
	'parameters': {
	'type': 'object',
	'properties': {
	'video_id': {'type': 'string', 'description': 'video ID'},
	}
	}
	}
	]
	# Initialize a string to store all transcripts
	all_video_transcripts = ""

	molecule_custom_functions = [
	{
	'name': 'extract_molecule_info',
	'description': 'Get the molecule name',
	'parameters': {
	'type': 'object',
	'properties': {
	'molecule_name': {'type': 'string', 'description': 'name of the molecule'},
	}
	}
	}
	]

	keyword_custom_functions = [
	{
	'name': 'extract_keyword_info',
	'description': 'Get the search query keyword',
	'parameters': {
	'type': 'object',
	'properties': {
	'keyword': {'type': 'string', 'description': 'keyword of teh search query'},
	}
	}
	}
	]

	# Example SMILES strings for each component - replace these with the actual values retrieved from your API calls
	reactant_1_smiles = 'your_reactant_1_smiles_here'
	reactant_2_smiles = 'your_reactant_2_smiles_here' # This might be an empty string if not present
	reagent_3_smiles = 'your_reagent_3_smiles_here'
	product_4_smiles = 'your_product_4_smiles_here'
	product_5_smiles = 'your_product_5_smiles_here'
	molecule_custom_functions_reaction = [
	{
	'name': 'extract_molecules_info',
	'description': 'Get the name of the individual molecules',
	'parameters': {
	'type': 'object',
	'properties': {
	'reactant_1': {'type': 'string', 'description': 'reactant number 1 '},
	'reactant_2': {'type': 'string', 'description': 'reactant number 2 '},
	'reagent_3': {'type': 'string', 'description': 'reagent number 1 '},
	'product_4': {'type': 'string', 'description': 'product number 1'},
	'product_5': {'type': 'string', 'description': 'product number 2'},
	}
	}
	}
	]

	# Streamlit UI
	st.title("Stereo World Updated 🌍")
	image_variable = None
	# Session states initialization
	if 'prompt' not in st.session_state:
	st.session_state.prompt = ''
	if 'selected_options' not in st.session_state:
	st.session_state.selected_options = []
	if 'selected_options_reaction' not in st.session_state:
	st.session_state.selected_options_reaction = []


	# User inputs
	st.session_state.selected_options = st.multiselect("Select options", ["fun based", "context based", "real world based", "conceptual textbook based"])
	st.session_state.prompt = st.text_input("Enter your prompt")
	check_box = st.checkbox("Open Chem Sketcher")
	with st.sidebar:
	st.sidebar.title("Chat with the assistant 🤖")
	# Input for search query
	search_query = st.sidebar.text_input("Enter your video search query")
	reaction_query = st.sidebar.text_input("Enter your reaction search query")
	name_reaction = st.checkbox("I am searching a name reaction")
	if reaction_query:
	prompt = reaction_query
	content = "please give complete step by step reaction along with the complete name of the molecules for the reaction, the requested reaction is : "
	response = groq_response(content, prompt)
	response_functions = client_openai.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[{'role': 'user', 'content': response}],
	functions=molecule_custom_functions_reaction,
	function_call='auto'
	)
	data = json.loads(response_functions.choices[0].message.function_call.arguments)
	reactant_1 = data.get('reactant_1', '')
	reactant_2 = data.get('reactant_2', '')
	reagent_3 = data.get('reagent_3', '')
	product_4 = data.get('product_4', '')
	product_5 = data.get('product_5', '')

	reactant_1_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").status_code == 200 else ''
	reactant_2_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").status_code == 200 else ''
	reagent_3_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").status_code == 200 else ''
	product_4_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").status_code == 200 else ''
	product_5_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").status_code == 200 else ''

	#st.write("Reactant 1: ", reactant_1_smiles)
	#st.write("Reactant 2: ", reactant_2_smiles)
	#st.write("Reagent 3: ", reagent_3_smiles)
	#st.write("Product 4: ", product_4_smiles)
	#st.write("Product 5: ", product_5_smiles)
	# Building the reaction SMILES string dynamically based on available components
	# Building the reaction SMILES string
	reaction_components = []

	# Adding reactants
	reactants = [reactant for reactant in [reactant_1_smiles, reactant_2_smiles] if reactant]
	if reactants:
	reaction_components.append('.'.join(reactants))
	else:
	reaction_components.append('')

	# Adding reagents
	reagents = [reagent for reagent in [reagent_3_smiles] if reagent]
	if reagents:
	reaction_components.append('.'.join(reagents))
	else:
	reaction_components.append('')

	# Adding products
	products = [product for product in [product_4_smiles, product_5_smiles] if product]
	if products:
	reaction_components.append('.'.join(products))
	else:
	reaction_components.append('')

	reaction_smiles = '>'.join(reaction_components)
	try:
	# Generate the reaction from SMILES
	rxn = AllChem.ReactionFromSmarts(reaction_smiles, useSmiles=True)

	# Draw the reaction
	d2d = Draw.MolDraw2DCairo(800, 300) # Adjust size as needed
	d2d.DrawReaction(rxn)
	png = d2d.GetDrawingText()

	# Save the drawing to a file
	with open('reaction_image.png', 'wb+') as f:
	f.write(png)
	image_variable = png
	#st.image('reaction_image.png')


	except Exception as e:
	st.write(f"An error occurred: {e}")
	if search_query:
	prompt = search_query
	content = "please correct the spelling and write teh precise one search keyword for and only give teh keyword, only 1 and nothing else other that that : "
	response = groq_response(content, prompt)
	response_functions = client_openai.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[{'role': 'user', 'content': response}],
	functions=keyword_custom_functions,
	function_call='auto'
	)
	data = json.loads(response_functions.choices[0].message.function_call.arguments)
	keyword = data['keyword']
	st.sidebar.write(keyword)
	# Perform the search
	videosSearch = VideosSearch(search_query, limit=3)
	video_one = VideosSearch(search_query, limit=1)
	for video in video_one.result()['result']:
	video_one_id = video['id']

	for video in videosSearch.result()['result']:
	video_id = video['id'] # Extract video ID

	# Display the video thumbnail
	#st.image(video['thumbnails'][0]['url'])

	# Display the video title
	#st.write(f"{video['title']}")

	try:
	# Fetch the transcript for the video ID
	transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])

	# Concatenating all text from the transcript
	transcript_text = "\n".join([t['text'] for t in transcript_list])

	# Concatenate the transcript to the all_video_transcripts variable
	all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{transcript_text}\n---\n"

	except Exception as e:
	error_message = "Transcript not available or error in fetching transcript."
	# Concatenate the error message to the all_video_transcripts variable
	all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{error_message}\n---\n"

	# At this point, all_video_transcripts contains transcripts for all videos concatenated as a single string.
	# You can display it or process it as needed.
	# Here's an example of displaying the combined transcripts:
	video_id = ""
	if all_video_transcripts:
	#st.text_area("All Video Transcripts", all_video_transcripts, height=300)
	prompt = all_video_transcripts
	content = "write a one sentence summary for the the given videos and always preserve and give me the vido_id always "
	video_compression = groq_response(content, prompt)
	compressed_transcripts = video_compression
	prompt = compressed_transcripts
	content = "give me the best video with maximum content and the best keywords from the transcript and always preserve and give me teh vido_id always "
	chat_completion = groq_response(content, prompt)
	#st.write(chat_completion.choices[0].message.content)
	video_id_fetch = chat_completion
	#st.write(video_id_fetch)
	response_functions = client_openai.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[{'role': 'user', 'content': video_id_fetch}],
	functions=video_custom_functions,
	function_call='auto'
	)
	data = json.loads(response_functions.choices[0].message.function_call.arguments)
	video_id = data['video_id']
	st.video(f"https://www.youtube.com/watch?v={video_id}")

	messages = st.container(height=630)
	if image_variable:
	messages.chat_message("assistant").write(f"When you react {reactant_1} with {reactant_2} using {reagent_3}, you get {product_4} and {product_5}" + " here is the reaction in 2D bond representation:")
	messages.image(image_variable)
	if check_box:
	messages.chat_message("assistant").write("Here is the Chem Sketcher for you to draw the molecule:")
	with messages.chat_message("assistant"):
	components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org", height=600)
	prompt_sidebar = st.chat_input("Say something")
	if prompt_sidebar:
	messages.chat_message("user").write(prompt_sidebar)
	prompt = prompt_sidebar
	sidebar_chat = groq_response("please answer thsi query : ", prompt)
	response_functions = client_openai.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[{'role': 'user', 'content': prompt_sidebar}],
	functions=molecule_custom_functions,
	function_call='auto'
	)
	try:
	arguments = response_functions.choices[0].message.function_call.arguments
	if arguments is not None:
	data = json.loads(arguments)
	molecule_name = data['molecule_name','']
	if molecule_name:
	response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT")
	cid = response.text
	with messages.chat_message("assistant"):
	st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too 😉:")
	components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}")
	messages.chat_message("assistant").write(sidebar_chat)
	else:
	data = json.loads(arguments)
	molecule_name = data['molecule_name','']
	if molecule_name:
	response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT")
	cid = response.text
	with messages.chat_message("assistant"):
	st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too 😉:")
	components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}")
	messages.chat_message("assistant").write(sidebar_chat)
	data = {} # or set to None or a default value as needed
	except (IndexError, KeyError, TypeError) as e:
	#print(f"Error accessing the data: {e}")
	messages.chat_message("assistant").write(sidebar_chat)
	data = {}

	if st.session_state.prompt:
	prompt = st.session_state.prompt
	selected_options = " ".join(st.session_state.selected_options)
	messages = [
	{"role": "user", "content": f"create a {selected_options} scenarios based task question for learning stereochemistry, create 4 scenarios each time and number them: {prompt}"},
	]
	chat_completion = client_groq.chat.completions.create(
	messages=messages,
	model="mixtral-8x7b-32768",
	)
	response = chat_completion.choices[0].message.content

	if response:
	response_functions = client_openai.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[{'role': 'user', 'content': response}],
	functions=scenario_custom_functions,
	function_call='auto'
	)
	data = json.loads(response_functions.choices[0].message.function_call.arguments)

	# Tabs for scenarios
	scenario_tabs = ['Scenario 1', 'Scenario 2', 'Scenario 3', 'Scenario 4']
	tabs = st.tabs(scenario_tabs)
	for i, tab in enumerate(tabs):
	with tab:
	st.header(scenario_tabs[i])
	scenario_text = data[f'scenario_{i+1}']
	st.write(scenario_text)
	prompt = scenario_text
	content = "subdivide this scenario into three subquestions and only give the questions. The scenario is: "
	chat_completion_subquestions = groq_response(content, prompt)
	scenario_generated = chat_completion_subquestions
	st.write(scenario_generated)
	prompt = scenario_generated
	content = "give a sample ideal step-by-step format to attempt to answer this scenario question as a hint. Scenario: "
	chat_completion_hint = groq_response(content, prompt)
	st.text_area("Enter your answer here", key=f'answer_{i}')

	with st.expander("See hint for answering the question" + str(i+1) + "😀"):
	st.write(chat_completion_hint)
	# Upload PDF button
	uploaded_file = st.file_uploader("Upload your answer (PDF)", type="pdf", key=f"pdf_uploader_{i}")
	if uploaded_file is not None:
	st.success("File uploaded successfully!")


	col1, col2 = st.columns(2)
	with col1:
	with st.expander("See explanation 3D"):
	components.iframe("https://embed.molview.org/v1/?mode=balls&cid=124527813")
	with col2:
	with st.expander("See explanation 2D"):
	components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org")

	# Example of error handling with client_groq API calls