Brahmadev619 commited on
Commit
bd1030b
·
verified ·
1 Parent(s): 45fd21c

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +103 -0
  2. htmlTemplates.py +44 -0
  3. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain_openai import OpenAIEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ # from langchain_community.vectorstores import FAISS
8
+ from langchain.embeddings import HuggingFaceEmbeddings
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from langchain.chat_models import ChatOpenAI
12
+ from htmlTemplates import css, bot_template, user_template
13
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
14
+ from langchain.llms import HuggingFaceHub
15
+
16
+ def get_pdf_text(pdf_doc):
17
+ text = ""
18
+ for pdf in pdf_doc:
19
+ pdf_reader = PdfReader(pdf)
20
+ for page in pdf_reader.pages:
21
+ text += page.extract_text()
22
+ return text
23
+
24
+
25
+ def get_text_chunk(row_text):
26
+ text_splitter = CharacterTextSplitter(
27
+ separator="\n",
28
+ chunk_size = 1000,
29
+ chunk_overlap = 200,
30
+ length_function = len
31
+ )
32
+ chunk = text_splitter.split_text(row_text)
33
+ return chunk
34
+
35
+
36
+ def get_vectorstore(text_chunk):
37
+ # embeddings = OpenAIEmbeddings()
38
+ embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
39
+ vector = FAISS.from_texts(text_chunk,embeddings)
40
+ return vector
41
+
42
+
43
+ def get_conversation_chain(vectorstores):
44
+ # llm = ChatOpenAI()
45
+ llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature":0.5, "max_length":512})
46
+ memory = ConversationBufferMemory(memory_key = "chat_history",return_messages = True)
47
+ conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm,
48
+ retriever=vectorstores.as_retriever(),
49
+ memory=memory)
50
+ return conversation_chain
51
+
52
+
53
+ def user_input(user_question):
54
+ response = st.session_state.conversation({"question":user_question})
55
+ st.session_state.chat_history = response["chat_history"]
56
+
57
+ for indx, msg in enumerate(st.session_state.chat_history):
58
+ if indx % 2==0:
59
+ st.write(user_template.replace("{{MSG}}",msg.content), unsafe_allow_html=True)
60
+ else:
61
+ st.write(bot_template.replace("{{MSG}}", msg.content), unsafe_allow_html=True)
62
+
63
+
64
+
65
+ def main():
66
+ # load secret key
67
+ load_dotenv()
68
+
69
+ # config the pg
70
+ st.set_page_config(page_title="Chat with multiple PDFs" ,page_icon=":books:")
71
+ st.write(css, unsafe_allow_html=True)
72
+ if "conversation" not in st.session_state:
73
+ st.session_state.conversation = None
74
+
75
+ st.header("Chat with multiple PDFs :books:")
76
+ user_question = st.text_input("Ask a question about your docs")
77
+ if user_question:
78
+ user_input(user_question)
79
+
80
+ # st.write(user_template.replace("{{MSG}}","Hello Robot"), unsafe_allow_html=True)
81
+ # st.write(bot_template.replace("{{MSG}}","Hello Human"), unsafe_allow_html=True)
82
+
83
+ # create side bar
84
+ with st.sidebar:
85
+ st.subheader("Your Documents")
86
+ pdf_doc = st.file_uploader(label="Upload your documents",accept_multiple_files=True)
87
+ if st.button("Process"):
88
+ with st.spinner(text="Processing"):
89
+
90
+ # get pdf text
91
+ row_text = get_pdf_text(pdf_doc)
92
+ # get the text chunk
93
+ text_chunk = get_text_chunk(row_text)
94
+ # st.write(text_chunk)
95
+ # create vecor store
96
+ vectorstores = get_vectorstore(text_chunk)
97
+ # st.write(vectorstores)
98
+ # create conversation chain
99
+ st.session_state.conversation = get_conversation_chain(vectorstores)
100
+
101
+
102
+ if __name__ == "__main__":
103
+ main()
htmlTemplates.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ css = '''
2
+ <style>
3
+ .chat-message {
4
+ padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
5
+ }
6
+ .chat-message.user {
7
+ background-color: #2b313e
8
+ }
9
+ .chat-message.bot {
10
+ background-color: #475063
11
+ }
12
+ .chat-message .avatar {
13
+ width: 20%;
14
+ }
15
+ .chat-message .avatar img {
16
+ max-width: 78px;
17
+ max-height: 78px;
18
+ border-radius: 50%;
19
+ object-fit: cover;
20
+ }
21
+ .chat-message .message {
22
+ width: 80%;
23
+ padding: 0 1.5rem;
24
+ color: #fff;
25
+ }
26
+ '''
27
+
28
+ bot_template = '''
29
+ <div class="chat-message bot">
30
+ <div class="avatar">
31
+ <img src="https://i.ibb.co/cN0nmSj/Screenshot-2023-05-28-at-02-37-21.png">
32
+ </div>
33
+ <div class="message">{{MSG}}</div>
34
+ </div>
35
+ '''
36
+
37
+ user_template = '''
38
+ <div class="chat-message user">
39
+ <div class="avatar">
40
+ <img src="https://source.unsplash.com/shallow-focus-photo-of-man-NR705beN_CU">
41
+ </div>
42
+ <div class="message">{{MSG}}</div>
43
+ </div>
44
+ '''
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ streamlit
3
+ faiss-cpu
4
+ huggingface-hub
5
+ InstructorEmbedding
6
+ langchain-openai
7
+ openai
8
+ PyPDF2
9
+ sentence-transformers
10
+ langchain-community