Atharva-28 commited on
Commit
ea1518c
1 Parent(s): 958eb68

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -213
app.py DELETED
@@ -1,213 +0,0 @@
1
- # import streamlit as st
2
- # from PyPDF2 import PdfReader
3
- # from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- # import os
5
- # from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
- # import google.generativeai as genai
7
- # from langchain.vectorstores import FAISS
8
- # from langchain_google_genai import ChatGoogleGenerativeAI
9
- # from langchain.chains.question_answering import load_qa_chain
10
- # from langchain.prompts import PromptTemplate
11
- # from dotenv import load_dotenv
12
-
13
- # load_dotenv()
14
- # os.getenv("GOOGLE_API_KEY")
15
- # genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
16
-
17
-
18
-
19
-
20
-
21
-
22
- # def get_pdf_text(pdf_docs):
23
- # text=""
24
- # for pdf in pdf_docs:
25
- # pdf_reader= PdfReader(pdf)
26
- # for page in pdf_reader.pages:
27
- # text+= page.extract_text()
28
- # return text
29
-
30
-
31
-
32
- # def get_text_chunks(text):
33
- # text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
34
- # chunks = text_splitter.split_text(text)
35
- # return chunks
36
-
37
-
38
- # def get_vector_store(text_chunks):
39
- # embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
40
- # vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
41
- # vector_store.save_local("faiss_index",allow_dangerous_deserialization=True)
42
-
43
-
44
- # def get_conversational_chain():
45
-
46
- # prompt_template = """
47
- # Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
48
- # provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
49
- # Context:\n {context}?\n
50
- # Question: \n{question}\n
51
-
52
- # Answer:
53
- # """
54
-
55
- # model = ChatGoogleGenerativeAI(model="gemini-pro",
56
- # temperature=0.3)
57
-
58
- # prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
59
- # chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
60
-
61
- # return chain
62
-
63
-
64
-
65
- # def user_input(user_question):
66
- # embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
67
-
68
- # new_db = FAISS.load_local("faiss_index", embeddings)
69
- # docs = new_db.similarity_search(user_question)
70
-
71
- # chain = get_conversational_chain()
72
-
73
-
74
- # response = chain(
75
- # {"input_documents":docs, "question": user_question}
76
- # , return_only_outputs=True)
77
-
78
- # print(response)
79
- # st.write("Reply: ", response["output_text"])
80
-
81
-
82
-
83
-
84
- # def main():
85
- # st.set_page_config("Chat PDF")
86
- # st.header("Chat with PDF using Gemini💁")
87
-
88
- # user_question = st.text_input("Ask a Question from the PDF Files")
89
-
90
- # if user_question:
91
- # user_input(user_question)
92
-
93
- # with st.sidebar:
94
- # st.title("Menu:")
95
- # pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
96
- # if st.button("Submit & Process"):
97
- # with st.spinner("Processing..."):
98
- # raw_text = get_pdf_text(pdf_docs)
99
- # text_chunks = get_text_chunks(raw_text)
100
- # get_vector_store(text_chunks)
101
- # st.success("Done")
102
-
103
-
104
-
105
- # if __name__ == "__main__":
106
- # main()
107
-
108
- import streamlit as st
109
- from dotenv import load_dotenv
110
- # import PyPDF2
111
- from PyPDF2 import PdfReader
112
- from langchain.text_splitter import CharacterTextSplitter
113
- from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
114
- from langchain_community.document_loaders import PyMuPDFLoader
115
- from langchain.vectorstores import FAISS
116
- from langchain.chat_models import ChatOpenAI
117
- from langchain.memory import ConversationBufferMemory
118
- from langchain.chains import ConversationalRetrievalChain
119
- from htmlTemplates import css, bot_template, user_template
120
- from langchain.llms import HuggingFaceHub
121
-
122
- def get_pdf_text(pdf_docs):
123
- text = ""
124
- for pdf in pdf_docs:
125
- pdf_reader = PdfReader(pdf)
126
- for page in pdf_reader.pages:
127
- text += page.extract_text()
128
- return text
129
-
130
-
131
- def get_text_chunks(text):
132
- text_splitter = CharacterTextSplitter(
133
- separator="\n",
134
- chunk_size=1000,
135
- chunk_overlap=200,
136
- length_function=len
137
- )
138
- chunks = text_splitter.split_text(text)
139
- return chunks
140
-
141
-
142
- def get_vectorstore(text_chunks):
143
- embeddings = OpenAIEmbeddings()
144
- # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
145
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
146
- return vectorstore
147
-
148
-
149
- def get_conversation_chain(vectorstore):
150
- llm = ChatOpenAI()
151
- # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
152
-
153
- memory = ConversationBufferMemory(
154
- memory_key='chat_history', return_messages=True)
155
- conversation_chain = ConversationalRetrievalChain.from_llm(
156
- llm=llm,
157
- retriever=vectorstore.as_retriever(),
158
- memory=memory
159
- )
160
- return conversation_chain
161
-
162
-
163
- def handle_userinput(user_question):
164
- response = st.session_state.conversation({'question': user_question})
165
- st.session_state.chat_history = response['chat_history']
166
-
167
- for i, message in enumerate(st.session_state.chat_history):
168
- if i % 2 == 0:
169
- st.write(user_template.replace(
170
- "{{MSG}}", message.content), unsafe_allow_html=True)
171
- else:
172
- st.write(bot_template.replace(
173
- "{{MSG}}", message.content), unsafe_allow_html=True)
174
-
175
-
176
- def main():
177
- load_dotenv()
178
- st.set_page_config(page_title="Chat with multiple PDFs",
179
- page_icon=":books:")
180
- st.write(css, unsafe_allow_html=True)
181
-
182
- if "conversation" not in st.session_state:
183
- st.session_state.conversation = None
184
- if "chat_history" not in st.session_state:
185
- st.session_state.chat_history = None
186
-
187
- st.header("Chat with multiple PDFs :books:")
188
- user_question = st.text_input("Ask a question about your documents:")
189
- if user_question:
190
- handle_userinput(user_question)
191
-
192
- with st.sidebar:
193
- st.subheader("Your documents")
194
- pdf_docs = st.file_uploader(
195
- "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
196
- if st.button("Process"):
197
- with st.spinner("Processing"):
198
- # get pdf text
199
- raw_text = get_pdf_text(pdf_docs)
200
-
201
- # get the text chunks
202
- text_chunks = get_text_chunks(raw_text)
203
-
204
- # create vector store
205
- vectorstore = get_vectorstore(text_chunks)
206
-
207
- # create conversation chain
208
- st.session_state.conversation = get_conversation_chain(
209
- vectorstore)
210
-
211
-
212
- if __name__ == '__main__':
213
- main()