Qazi-Mudassar-Ilyas commited on
Commit
397ebce
1 Parent(s): 9c40c61

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from langchain.chains import RetrievalQA
4
+ from langchain_community.document_loaders import TextLoader
5
+ from langchain_community.document_loaders import UnstructuredExcelLoader
6
+ from langchain.indexes import VectorstoreIndexCreator
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.embeddings import HuggingFaceEmbeddings
9
+ from langchain_community.llms import HuggingFaceEndpoint
10
+ from langchain.memory import ConversationBufferMemory
11
+ from langchain.chains import ConversationalRetrievalChain
12
+
13
+ from dotenv import find_dotenv, load_dotenv
14
+
15
+ from langchain.chains import create_retrieval_chain, RetrievalQA
16
+ from langchain_community.vectorstores import FAISS
17
+
18
+ _=load_dotenv(find_dotenv())
19
+ hf_api = os.getenv("HUGGINGFACEHUB_API_TOKEN")
20
+
21
+ def indexdocs (file_path, progress=gr.Progress()):
22
+
23
+ progress(0.1, desc="Loading documents...")
24
+
25
+ loaders = [UnstructuredExcelLoader(file, mode="elements") for file in file_path]
26
+ documents=[]
27
+ for loader in loaders:
28
+ documents.extend(loader.load())
29
+
30
+ progress(0.3, desc="Splitting documents...")
31
+
32
+ text_splitter = RecursiveCharacterTextSplitter (chunk_size=1500, chunk_overlap=300)
33
+ pages=text_splitter.split_documents(documents)
34
+ embedding = HuggingFaceEmbeddings()
35
+
36
+ progress(0.5, desc="Creating vectorstore...")
37
+
38
+ vector=FAISS.from_documents (documents=pages,embedding=embedding)
39
+ retriever = vector.as_retriever()
40
+
41
+ progress(0.8, desc="Setting up language model...")
42
+
43
+ memory = ConversationBufferMemory(
44
+ memory_key="chat_history",
45
+ output_key='answer',
46
+ return_messages=True
47
+ )
48
+
49
+ llm = HuggingFaceEndpoint(
50
+ repo_id="Mistralai/Mistral-7B-Instruct-v0.2",
51
+ temperature = 0.1,
52
+ max_new_tokens = 200,
53
+ top_k = 1 #top_k,
54
+ )
55
+
56
+ qa_chain = ConversationalRetrievalChain.from_llm(
57
+ llm,
58
+ retriever=retriever,
59
+ chain_type="stuff",
60
+ memory=memory,
61
+ return_source_documents=True,
62
+ verbose=False,
63
+ )
64
+ return qa_chain, None
65
+
66
+ def format_chat_history(chat_history):
67
+ formatted_chat_history = []
68
+ for user_message, bot_message in chat_history:
69
+ formatted_chat_history.append(f"User: {user_message}")
70
+ formatted_chat_history.append(f"Assistant: {bot_message}")
71
+ return formatted_chat_history
72
+
73
+ def chat(qa_chain,msg,history):
74
+ formatted_chat_history = format_chat_history(history)
75
+ response = qa_chain.invoke({"question": msg, "chat_history": formatted_chat_history})
76
+ response_answer = response["answer"]
77
+ response_sources=response["source_documents"]
78
+ print (response_sources[0])
79
+ response_source1= response_sources[0].metadata["filename"]
80
+ response_source_sheet= response_sources[0].metadata["page_name"]
81
+ new_history = history + [(msg, response_answer)]
82
+ return qa_chain, gr.update(value=""), new_history, response_source1, response_source_sheet
83
+
84
+ with gr.Blocks() as demo:
85
+ qa_chain=gr.State()
86
+
87
+ gr.Markdown(
88
+ """
89
+ # MS Excel Knowledge Base QA using RAG
90
+ """
91
+ )
92
+ with gr.Column():
93
+ file_list = gr.File(label='Upload your MS Excel files...', file_count='multiple', file_types=['.xls,.xlsx'])
94
+ fileuploadbtn= gr.Button ("Index Documents and Start Chatting")
95
+ with gr.Row():
96
+ chatbot=gr.Chatbot(height=300)
97
+ with gr.Row():
98
+ source=gr.Textbox(info="Source",container=False,scale=4)
99
+ source_page=gr.Textbox(info="Sheet",container=False,scale=1)
100
+ with gr.Row():
101
+ prompt=gr.Textbox(placeholder="Please enter your prompt...",container=False, scale=4, visible=True, interactive=False)
102
+ promptsubmit=gr.Button("Submit", scale=1, visible=True, interactive=False)
103
+ gr.Markdown(
104
+ """
105
+ # Responsible AI Usage
106
+ Your documents uploaded to the system or interactions with the chatbot are not saved.
107
+ """
108
+ )
109
+
110
+ fileuploadbtn.click(fn=indexdocs, inputs = [file_list], outputs=[qa_chain,chatbot]).then(lambda:[gr.Textbox(interactive=True), gr.Button (interactive=True)], \
111
+ inputs=None, outputs=[prompt,promptsubmit], queue=False)
112
+ promptsubmit.click(fn=chat, inputs=[qa_chain,prompt,chatbot], outputs=[qa_chain,prompt,chatbot,source,source_page],queue=False)
113
+ prompt.submit(fn=chat, inputs=[qa_chain,prompt,chatbot], outputs=[qa_chain,prompt,chatbot,source,source_page],queue=False)
114
+
115
+ if __name__ == "__main__":
116
+ demo.launch()