Spaces:

OpenRAG128
/

scrapitout128

Sleeping

App Files Files Community

scrapitout128 / app.py

OpenRAG128

Create app.py

0e4d656 verified 3 months ago

raw

history blame

No virus

3.53 kB

	import streamlit as st
	import os
	from langchain_groq import ChatGroq
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chains.combine_documents import create_stuff_documents_chain
	from langchain_core.prompts import ChatPromptTemplate
	from langchain.chains import create_retrieval_chain
	from langchain_community.vectorstores import FAISS
	from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
	from bs4 import BeautifulSoup as Soup
	import time
	from langchain.embeddings import HuggingFaceEmbeddings


	st.sidebar.title("OpenRAG")
	st.sidebar.markdown(
	"""
	OpenRAG is a tool that enhances the speed and efficiency of retrieving information from educational websites,
	including the scrap it out component, allowing quick access to precise answers.
	"""
	)
	st.sidebar.markdown(
	"""
	Whether for academic research, professional inquiries, or personal curiosity, OpenRAG's Scrap it out feature is poised
	to revolutionize the way users engage with online educational resources. Experience the unparalleled convenience and effectiveness of Scrap it out
	– your gateway to rapid, reliable information retrieval.
	"""
	)

	st.sidebar.markdown(
	"""
	Enjoy Using Scrap it out!!
	"""

	)


	st.title("Scrap it out 🦅")
	st.text("")
	url_link = st.text_input("Input your website link here")

	# Check if website needs to be loaded (initial load or new URL)
	if url_link and ("vector" not in st.session_state or url_link != st.session_state.get("loaded_url")):
	with st.spinner("Loading..."):
	st.session_state.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	st.session_state.loader = RecursiveUrlLoader(url=url_link, max_depth=10, extractor=lambda x: Soup(x, "html.parser").text)
	st.session_state.docs = st.session_state.loader.load()
	st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
	st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs)
	st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings)
	st.session_state["loaded_url"] = url_link # Store the loaded URL
	st.success("Loaded!")

	# Rest of the code for LLM and user interaction remains the same

	llm = ChatGroq(model_name="mixtral-8x7b-32768", groq_api_key="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")


	prompt = ChatPromptTemplate.from_template(
	"""
	Answer the questions based on the provided context only.
	Please provide the most accurate response based on the question.
	<context>
	{context}
	<context>
	Questions;{input}
	"""
	)

	if url_link:
	document_chain = create_stuff_documents_chain(llm, prompt)
	retriever = st.session_state.vectors.as_retriever()
	retrieval_chain = create_retrieval_chain(retriever, document_chain)

	st.text("")
	query = st.text_input("Input your question here")

	if query:
	start = time.process_time()
	response = (retrieval_chain.invoke({"input":query}))
	print("Response time: ", time.process_time() - start)
	st.write(response['answer'])
	st.write("Response time: ", time.process_time() - start)

	with st.expander("NOT THE EXPECTED RESPONSE? CHECK OUT HERE"):

	for i, doc in enumerate(response["context"]):
	st.write(doc.page_content)
	st.write("----------------------------------")