import os import gradio as gr from langchain.chains import RetrievalQA from langchain_community.document_loaders import TextLoader from langchain_community.document_loaders import UnstructuredExcelLoader from langchain.indexes import VectorstoreIndexCreator from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain_community.llms import HuggingFaceEndpoint from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from dotenv import find_dotenv, load_dotenv from langchain.chains import create_retrieval_chain, RetrievalQA from langchain_community.vectorstores import FAISS _=load_dotenv(find_dotenv()) hf_api = os.getenv("HUGGINGFACEHUB_API_TOKEN") def indexdocs (file_path, progress=gr.Progress()): progress(0.1, desc="Loading documents...") loaders = [UnstructuredExcelLoader(file, mode="elements") for file in file_path] documents=[] for loader in loaders: documents.extend(loader.load()) progress(0.3, desc="Splitting documents...") text_splitter = RecursiveCharacterTextSplitter (chunk_size=1500, chunk_overlap=300) pages=text_splitter.split_documents(documents) embedding = HuggingFaceEmbeddings() progress(0.5, desc="Creating vectorstore...") vector=FAISS.from_documents (documents=pages,embedding=embedding) retriever = vector.as_retriever() progress(0.8, desc="Setting up language model...") memory = ConversationBufferMemory( memory_key="chat_history", output_key='answer', return_messages=True ) llm = HuggingFaceEndpoint( repo_id="Mistralai/Mistral-7B-Instruct-v0.2", temperature = 0.1, max_new_tokens = 200, top_k = 1 #top_k, ) qa_chain = ConversationalRetrievalChain.from_llm( llm, retriever=retriever, chain_type="stuff", memory=memory, return_source_documents=True, verbose=False, ) return qa_chain, None def format_chat_history(chat_history): formatted_chat_history = [] for user_message, bot_message in chat_history: formatted_chat_history.append(f"User: {user_message}") formatted_chat_history.append(f"Assistant: {bot_message}") return formatted_chat_history def chat(qa_chain,msg,history): formatted_chat_history = format_chat_history(history) response = qa_chain.invoke({"question": msg, "chat_history": formatted_chat_history}) response_answer = response["answer"] response_sources=response["source_documents"] print (response_sources[0]) response_source1= response_sources[0].metadata["filename"] response_source_sheet= response_sources[0].metadata["page_name"] new_history = history + [(msg, response_answer)] return qa_chain, gr.update(value=""), new_history, response_source1, response_source_sheet with gr.Blocks() as demo: qa_chain=gr.State() gr.Markdown( """ # MS Excel Knowledge Base QA using RAG """ ) with gr.Column(): file_list = gr.File(label='Upload your MS Excel files...', file_count='multiple', file_types=['.xls,.xlsx']) fileuploadbtn= gr.Button ("Index Documents and Start Chatting") with gr.Row(): chatbot=gr.Chatbot(height=300) with gr.Row(): source=gr.Textbox(info="Source",container=False,scale=4) source_page=gr.Textbox(info="Sheet",container=False,scale=1) with gr.Row(): prompt=gr.Textbox(placeholder="Please enter your prompt...",container=False, scale=4, visible=True, interactive=False) promptsubmit=gr.Button("Submit", scale=1, visible=True, interactive=False) gr.Markdown( """ # Responsible AI Usage Your documents uploaded to the system or interactions with the chatbot are not saved. """ ) fileuploadbtn.click(fn=indexdocs, inputs = [file_list], outputs=[qa_chain,chatbot]).then(lambda:[gr.Textbox(interactive=True), gr.Button (interactive=True)], \ inputs=None, outputs=[prompt,promptsubmit], queue=False) promptsubmit.click(fn=chat, inputs=[qa_chain,prompt,chatbot], outputs=[qa_chain,prompt,chatbot,source,source_page],queue=False) prompt.submit(fn=chat, inputs=[qa_chain,prompt,chatbot], outputs=[qa_chain,prompt,chatbot,source,source_page],queue=False) if __name__ == "__main__": demo.launch()