Atharva-28 commited on
Commit
958eb68
1 Parent(s): fe00c6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -64
app.py CHANGED
@@ -1,106 +1,213 @@
1
- import streamlit as st
2
- from PyPDF2 import PdfReader
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- import os
5
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
- import google.generativeai as genai
7
- from langchain.vectorstores import FAISS
8
- from langchain_google_genai import ChatGoogleGenerativeAI
9
- from langchain.chains.question_answering import load_qa_chain
10
- from langchain.prompts import PromptTemplate
11
- from dotenv import load_dotenv
12
 
13
- load_dotenv()
14
- os.getenv("GOOGLE_API_KEY")
15
- genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
16
 
17
 
18
 
19
 
20
 
21
 
22
- def get_pdf_text(pdf_docs):
23
- text=""
24
- for pdf in pdf_docs:
25
- pdf_reader= PdfReader(pdf)
26
- for page in pdf_reader.pages:
27
- text+= page.extract_text()
28
- return text
29
 
30
 
31
 
32
- def get_text_chunks(text):
33
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
34
- chunks = text_splitter.split_text(text)
35
- return chunks
36
 
37
 
38
- def get_vector_store(text_chunks):
39
- embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
40
- vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
41
- vector_store.save_local("faiss_index",allow_dangerous_deserialization=True)
42
 
43
 
44
- def get_conversational_chain():
45
 
46
- prompt_template = """
47
- Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
48
- provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
49
- Context:\n {context}?\n
50
- Question: \n{question}\n
51
 
52
- Answer:
53
- """
54
 
55
- model = ChatGoogleGenerativeAI(model="gemini-pro",
56
- temperature=0.3)
57
 
58
- prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
59
- chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
60
 
61
- return chain
62
 
63
 
64
 
65
- def user_input(user_question):
66
- embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
67
 
68
- new_db = FAISS.load_local("faiss_index", embeddings)
69
- docs = new_db.similarity_search(user_question)
70
 
71
- chain = get_conversational_chain()
72
 
73
 
74
- response = chain(
75
- {"input_documents":docs, "question": user_question}
76
- , return_only_outputs=True)
77
 
78
- print(response)
79
- st.write("Reply: ", response["output_text"])
80
 
81
 
82
 
83
 
84
- def main():
85
- st.set_page_config("Chat PDF")
86
- st.header("Chat with PDF using Gemini💁")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
- user_question = st.text_input("Ask a Question from the PDF Files")
 
 
 
 
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  if user_question:
91
- user_input(user_question)
92
 
93
  with st.sidebar:
94
- st.title("Menu:")
95
- pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
96
- if st.button("Submit & Process"):
97
- with st.spinner("Processing..."):
 
 
98
  raw_text = get_pdf_text(pdf_docs)
 
 
99
  text_chunks = get_text_chunks(raw_text)
100
- get_vector_store(text_chunks)
101
- st.success("Done")
102
 
 
 
 
 
 
 
103
 
104
 
105
- if __name__ == "__main__":
106
  main()
 
1
+ # import streamlit as st
2
+ # from PyPDF2 import PdfReader
3
+ # from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ # import os
5
+ # from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
+ # import google.generativeai as genai
7
+ # from langchain.vectorstores import FAISS
8
+ # from langchain_google_genai import ChatGoogleGenerativeAI
9
+ # from langchain.chains.question_answering import load_qa_chain
10
+ # from langchain.prompts import PromptTemplate
11
+ # from dotenv import load_dotenv
12
 
13
+ # load_dotenv()
14
+ # os.getenv("GOOGLE_API_KEY")
15
+ # genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
16
 
17
 
18
 
19
 
20
 
21
 
22
+ # def get_pdf_text(pdf_docs):
23
+ # text=""
24
+ # for pdf in pdf_docs:
25
+ # pdf_reader= PdfReader(pdf)
26
+ # for page in pdf_reader.pages:
27
+ # text+= page.extract_text()
28
+ # return text
29
 
30
 
31
 
32
+ # def get_text_chunks(text):
33
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
34
+ # chunks = text_splitter.split_text(text)
35
+ # return chunks
36
 
37
 
38
+ # def get_vector_store(text_chunks):
39
+ # embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
40
+ # vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
41
+ # vector_store.save_local("faiss_index",allow_dangerous_deserialization=True)
42
 
43
 
44
+ # def get_conversational_chain():
45
 
46
+ # prompt_template = """
47
+ # Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
48
+ # provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
49
+ # Context:\n {context}?\n
50
+ # Question: \n{question}\n
51
 
52
+ # Answer:
53
+ # """
54
 
55
+ # model = ChatGoogleGenerativeAI(model="gemini-pro",
56
+ # temperature=0.3)
57
 
58
+ # prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
59
+ # chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
60
 
61
+ # return chain
62
 
63
 
64
 
65
+ # def user_input(user_question):
66
+ # embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
67
 
68
+ # new_db = FAISS.load_local("faiss_index", embeddings)
69
+ # docs = new_db.similarity_search(user_question)
70
 
71
+ # chain = get_conversational_chain()
72
 
73
 
74
+ # response = chain(
75
+ # {"input_documents":docs, "question": user_question}
76
+ # , return_only_outputs=True)
77
 
78
+ # print(response)
79
+ # st.write("Reply: ", response["output_text"])
80
 
81
 
82
 
83
 
84
+ # def main():
85
+ # st.set_page_config("Chat PDF")
86
+ # st.header("Chat with PDF using Gemini💁")
87
+
88
+ # user_question = st.text_input("Ask a Question from the PDF Files")
89
+
90
+ # if user_question:
91
+ # user_input(user_question)
92
+
93
+ # with st.sidebar:
94
+ # st.title("Menu:")
95
+ # pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
96
+ # if st.button("Submit & Process"):
97
+ # with st.spinner("Processing..."):
98
+ # raw_text = get_pdf_text(pdf_docs)
99
+ # text_chunks = get_text_chunks(raw_text)
100
+ # get_vector_store(text_chunks)
101
+ # st.success("Done")
102
+
103
+
104
+
105
+ # if __name__ == "__main__":
106
+ # main()
107
+
108
+ import streamlit as st
109
+ from dotenv import load_dotenv
110
+ # import PyPDF2
111
+ from PyPDF2 import PdfReader
112
+ from langchain.text_splitter import CharacterTextSplitter
113
+ from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
114
+ from langchain_community.document_loaders import PyMuPDFLoader
115
+ from langchain.vectorstores import FAISS
116
+ from langchain.chat_models import ChatOpenAI
117
+ from langchain.memory import ConversationBufferMemory
118
+ from langchain.chains import ConversationalRetrievalChain
119
+ from htmlTemplates import css, bot_template, user_template
120
+ from langchain.llms import HuggingFaceHub
121
+
122
+ def get_pdf_text(pdf_docs):
123
+ text = ""
124
+ for pdf in pdf_docs:
125
+ pdf_reader = PdfReader(pdf)
126
+ for page in pdf_reader.pages:
127
+ text += page.extract_text()
128
+ return text
129
+
130
+
131
+ def get_text_chunks(text):
132
+ text_splitter = CharacterTextSplitter(
133
+ separator="\n",
134
+ chunk_size=1000,
135
+ chunk_overlap=200,
136
+ length_function=len
137
+ )
138
+ chunks = text_splitter.split_text(text)
139
+ return chunks
140
+
141
 
142
+ def get_vectorstore(text_chunks):
143
+ embeddings = OpenAIEmbeddings()
144
+ # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
145
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
146
+ return vectorstore
147
 
148
+
149
+ def get_conversation_chain(vectorstore):
150
+ llm = ChatOpenAI()
151
+ # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
152
+
153
+ memory = ConversationBufferMemory(
154
+ memory_key='chat_history', return_messages=True)
155
+ conversation_chain = ConversationalRetrievalChain.from_llm(
156
+ llm=llm,
157
+ retriever=vectorstore.as_retriever(),
158
+ memory=memory
159
+ )
160
+ return conversation_chain
161
+
162
+
163
+ def handle_userinput(user_question):
164
+ response = st.session_state.conversation({'question': user_question})
165
+ st.session_state.chat_history = response['chat_history']
166
+
167
+ for i, message in enumerate(st.session_state.chat_history):
168
+ if i % 2 == 0:
169
+ st.write(user_template.replace(
170
+ "{{MSG}}", message.content), unsafe_allow_html=True)
171
+ else:
172
+ st.write(bot_template.replace(
173
+ "{{MSG}}", message.content), unsafe_allow_html=True)
174
+
175
+
176
+ def main():
177
+ load_dotenv()
178
+ st.set_page_config(page_title="Chat with multiple PDFs",
179
+ page_icon=":books:")
180
+ st.write(css, unsafe_allow_html=True)
181
+
182
+ if "conversation" not in st.session_state:
183
+ st.session_state.conversation = None
184
+ if "chat_history" not in st.session_state:
185
+ st.session_state.chat_history = None
186
+
187
+ st.header("Chat with multiple PDFs :books:")
188
+ user_question = st.text_input("Ask a question about your documents:")
189
  if user_question:
190
+ handle_userinput(user_question)
191
 
192
  with st.sidebar:
193
+ st.subheader("Your documents")
194
+ pdf_docs = st.file_uploader(
195
+ "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
196
+ if st.button("Process"):
197
+ with st.spinner("Processing"):
198
+ # get pdf text
199
  raw_text = get_pdf_text(pdf_docs)
200
+
201
+ # get the text chunks
202
  text_chunks = get_text_chunks(raw_text)
 
 
203
 
204
+ # create vector store
205
+ vectorstore = get_vectorstore(text_chunks)
206
+
207
+ # create conversation chain
208
+ st.session_state.conversation = get_conversation_chain(
209
+ vectorstore)
210
 
211
 
212
+ if __name__ == '__main__':
213
  main()