Shubhamtribhuwan17 commited on
Commit
b684da5
β€’
1 Parent(s): 0ed9814

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -2
app.py CHANGED
@@ -1,3 +1,121 @@
1
- import gradio as gr
2
 
3
- gr.load("models/mistralai/Mistral-7B-v0.1").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import gradio as gr
2
 
3
+ # gr.load("models/mistralai/Mistral-7B-v0.1").launch()
4
+
5
+ import os
6
+ import streamlit as st
7
+ from dotenv import load_dotenv
8
+ from PyPDF2 import PdfReader
9
+ from langchain.text_splitter import CharacterTextSplitter
10
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
11
+ from langchain.vectorstores import FAISS
12
+ from langchain.chat_models import ChatOpenAI
13
+ from langchain.memory import ConversationBufferMemory
14
+ from langchain.chains import ConversationalRetrievalChain
15
+ from htmlTemplates import css, bot_template, user_template
16
+ from langchain.llms import HuggingFaceHub
17
+
18
+ # set this key as an environment variable
19
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets['huggingface_token']
20
+
21
+ def get_pdf_text(pdf_docs : list) -> str:
22
+ text = ""
23
+ for pdf in pdf_docs:
24
+ pdf_reader = PdfReader(pdf)
25
+ for page in pdf_reader.pages:
26
+ text += page.extract_text()
27
+ return text
28
+
29
+
30
+ def get_text_chunks(text:str) ->list:
31
+ text_splitter = CharacterTextSplitter(
32
+ separator="\n", chunk_size=1500, chunk_overlap=300, length_function=len
33
+ )
34
+ chunks = text_splitter.split_text(text)
35
+ return chunks
36
+
37
+
38
+ def get_vectorstore(text_chunks : list) -> FAISS:
39
+ model = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
40
+ encode_kwargs = {
41
+ "normalize_embeddings": True
42
+ } # set True to compute cosine similarity
43
+ embeddings = HuggingFaceBgeEmbeddings(
44
+ model_name=model, encode_kwargs=encode_kwargs, model_kwargs={"device": "cpu"}
45
+ )
46
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
47
+ return vectorstore
48
+
49
+
50
+ def get_conversation_chain(vectorstore:FAISS) -> ConversationalRetrievalChain:
51
+ # llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
52
+ llm = HuggingFaceHub(
53
+ repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
54
+ #repo_id="TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF"
55
+ model_kwargs={"temperature": 0.5, "max_length": 1048},
56
+ )
57
+
58
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
59
+ conversation_chain = ConversationalRetrievalChain.from_llm(
60
+ llm=llm, retriever=vectorstore.as_retriever(), memory=memory
61
+ )
62
+ return conversation_chain
63
+
64
+
65
+ def handle_userinput(user_question:str):
66
+ response = st.session_state.conversation({"question": user_question})
67
+ st.session_state.chat_history = response["chat_history"]
68
+
69
+ for i, message in enumerate(st.session_state.chat_history):
70
+ if i % 2 == 0:
71
+ st.write(" Usuario: " + message.content)
72
+ else:
73
+ st.write("πŸ€– ChatBot: " + message.content)
74
+
75
+
76
+ def main():
77
+ st.set_page_config(
78
+ page_title="Chat with a Bot that tries to answer questions about multiple PDFs",
79
+ page_icon=":books:",
80
+ )
81
+
82
+ st.markdown("# Chat with a Bot")
83
+ st.markdown("This bot tries to answer questions about multiple PDFs. Let the processing of the PDF finish before adding your question. πŸ™πŸΎ")
84
+
85
+ st.write(css, unsafe_allow_html=True)
86
+
87
+
88
+ if "conversation" not in st.session_state:
89
+ st.session_state.conversation = None
90
+ if "chat_history" not in st.session_state:
91
+ st.session_state.chat_history = None
92
+
93
+
94
+ st.header("Chat with a Bot πŸ€–πŸ¦Ύ that tries to answer questions about multiple PDFs :books:")
95
+ user_question = st.text_input("Ask a question about your documents:")
96
+ if user_question:
97
+ handle_userinput(user_question)
98
+
99
+
100
+ with st.sidebar:
101
+ st.subheader("Your documents")
102
+ pdf_docs = st.file_uploader(
103
+ "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
104
+ )
105
+ if st.button("Process"):
106
+ with st.spinner("Processing"):
107
+ # get pdf text
108
+ raw_text = get_pdf_text(pdf_docs)
109
+
110
+ # get the text chunks
111
+ text_chunks = get_text_chunks(raw_text)
112
+
113
+ # create vector store
114
+ vectorstore = get_vectorstore(text_chunks)
115
+
116
+ # create conversation chain
117
+ st.session_state.conversation = get_conversation_chain(vectorstore)
118
+
119
+
120
+ if __name__ == "__main__":
121
+ main()