Spaces:

Qazi-Mudassar-Ilyas
/

RAGWithMSExcel

Running

App Files Files Community

RAGWithMSExcel / app.py

Qazi-Mudassar-Ilyas

Update app.py

f4e3a75 verified 4 months ago

raw

history blame

No virus

4.54 kB

	import os
	import gradio as gr
	from langchain.chains import RetrievalQA
	from langchain_community.document_loaders import TextLoader
	from langchain_community.document_loaders import UnstructuredExcelLoader
	from langchain.indexes import VectorstoreIndexCreator
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain_community.llms import HuggingFaceEndpoint
	from langchain.memory import ConversationBufferMemory
	from langchain.chains import ConversationalRetrievalChain

	from dotenv import find_dotenv, load_dotenv

	from langchain.chains import create_retrieval_chain, RetrievalQA
	from langchain_community.vectorstores import FAISS

	_=load_dotenv(find_dotenv())
	hf_api = os.getenv("HUGGINGFACEHUB_API_TOKEN")

	def indexdocs (file_path, progress=gr.Progress()):

	progress(0.1, desc="Loading documents...")

	loaders = [UnstructuredExcelLoader(file, mode="elements") for file in file_path]
	documents=[]
	for loader in loaders:
	documents.extend(loader.load())

	progress(0.3, desc="Splitting documents...")

	text_splitter = RecursiveCharacterTextSplitter (chunk_size=1500, chunk_overlap=300)
	pages=text_splitter.split_documents(documents)
	embedding = HuggingFaceEmbeddings()

	progress(0.5, desc="Creating vectorstore...")

	vector=FAISS.from_documents (documents=pages,embedding=embedding)
	retriever = vector.as_retriever()

	progress(0.8, desc="Setting up language model...")

	memory = ConversationBufferMemory(
	memory_key="chat_history",
	output_key='answer',
	return_messages=True
	)

	llm = HuggingFaceEndpoint(
	repo_id="Mistralai/Mistral-7B-Instruct-v0.2",
	temperature = 0.1,
	max_new_tokens = 200,
	top_k = 1 #top_k,
	)

	qa_chain = ConversationalRetrievalChain.from_llm(
	llm,
	retriever=retriever,
	chain_type="stuff",
	memory=memory,
	return_source_documents=True,
	verbose=False,
	)
	return qa_chain, None

	def format_chat_history(chat_history):
	formatted_chat_history = []
	for user_message, bot_message in chat_history:
	formatted_chat_history.append(f"User: {user_message}")
	formatted_chat_history.append(f"Assistant: {bot_message}")
	return formatted_chat_history

	def chat(qa_chain,msg,history):
	formatted_chat_history = format_chat_history(history)
	response = qa_chain.invoke({"question": msg, "chat_history": formatted_chat_history})
	response_answer = response["answer"]
	response_sources=response["source_documents"]
	response_source1= response_sources[0].metadata["filename"]
	response_source_sheet= response_sources[0].metadata["page_name"]
	new_history = history + [(msg, response_answer)]
	return qa_chain, gr.update(value=""), new_history, response_source1, response_source_sheet

	with gr.Blocks() as demo:
	qa_chain=gr.State()

	gr.Markdown(
	"""
	# MS Excel Knowledge Base QA using RAG
	"""
	)
	with gr.Column():
	file_list = gr.File(label='Upload your MS Excel files...', file_count='multiple', file_types=['.xls,.xlsx'])
	fileuploadbtn= gr.Button ("Index Documents and Start Chatting")
	with gr.Row():
	chatbot=gr.Chatbot(height=300)
	with gr.Row():
	source=gr.Textbox(info="Source",container=False,scale=4)
	source_page=gr.Textbox(info="Sheet",container=False,scale=1)
	with gr.Row():
	prompt=gr.Textbox(placeholder="Please enter your prompt...",container=False, scale=4, visible=True, interactive=False)
	promptsubmit=gr.Button("Submit", scale=1, visible=True, interactive=False)
	gr.Markdown(
	"""
	# Responsible AI Usage
	Your documents uploaded to the system or interactions with the chatbot are not saved.
	"""
	)

	fileuploadbtn.click(fn=indexdocs, inputs = [file_list], outputs=[qa_chain,chatbot]).then(lambda:[gr.Textbox(interactive=True), gr.Button (interactive=True)], \
	inputs=None, outputs=[prompt,promptsubmit], queue=False)
	promptsubmit.click(fn=chat, inputs=[qa_chain,prompt,chatbot], outputs=[qa_chain,prompt,chatbot,source,source_page],queue=False)
	prompt.submit(fn=chat, inputs=[qa_chain,prompt,chatbot], outputs=[qa_chain,prompt,chatbot,source,source_page],queue=False)

	if __name__ == "__main__":
	demo.launch()