maitykritadhi commited on
Commit
0f39449
1 Parent(s): 63da8c0

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -0
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import streamlit as st
4
+ import chromadb
5
+ import config as cf
6
+
7
+ from langchain.chains.question_answering import load_qa_chain
8
+ from langchain.prompts import PromptTemplate
9
+ # from langchain_community.embeddings import SentenceTransformerEmbeddings
10
+ from sentence_transformers import SentenceTransformer
11
+ from langchain_groq import ChatGroq
12
+ from langchain.schema import Document
13
+ from source.utils.data_processing import ProcessDocs
14
+
15
+ from source.utils.store_data import get_vector_store, check_pdfs_chromadb, save_uploaded_files
16
+ from source.utils.process_data import get_pdf_text, get_text_chunks
17
+
18
+
19
+ llm = None
20
+
21
+
22
+ def get_conversational_chain(model):
23
+ global llm
24
+
25
+ # prompt_template = """
26
+ # Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
27
+ # provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
28
+ # Context:\n {context}?\n
29
+ # Question: \n{question}\n
30
+
31
+ # Answer:
32
+ # """
33
+
34
+ # model = ChatGoogleGenerativeAI(model="gemini-pro",temperature=0.3)
35
+ if model == 'gemma-7b-it':
36
+ llm = ChatGroq(temperature=0, model_name="gemma-7b-it")
37
+ if model == 'mixtral-8x7b-32768':
38
+ llm = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768")
39
+ if model == 'llama3-70b-8192':
40
+ llm = ChatGroq(temperature=0, model_name="llama3-70b-8192")
41
+ if model == 'llama3-8b-8192':
42
+ llm = ChatGroq(temperature=0, model_name="llama3-8b-8192")
43
+
44
+
45
+
46
+ # prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
47
+ chain = load_qa_chain(llm, chain_type="stuff",
48
+ # prompt=prompt
49
+ )
50
+ return chain
51
+
52
+
53
+
54
+ def user_input(user_question,model):
55
+ embedding_model = SentenceTransformer("all-mpnet-base-v2")
56
+
57
+ chain = get_conversational_chain(model)
58
+ docs = []
59
+
60
+ input_embeddings = embedding_model.encode(user_question).tolist()
61
+ client = chromadb.PersistentClient("chromadb")
62
+ collection = client.get_collection("Chromadb_pdf")
63
+
64
+ results = collection.query(
65
+ query_embeddings = [input_embeddings],
66
+ n_results = 5,
67
+ include=['distances', 'metadatas', 'documents']
68
+ )
69
+
70
+
71
+ if results['documents']:
72
+ pg_num = []
73
+ for i in range(len(results['documents'][0])):
74
+ document = results['documents'][0][i]
75
+ metadata = results['metadatas'][0][i]
76
+ pdf_name = metadata['pdf_name']
77
+ page_number = metadata['page_number']
78
+
79
+
80
+ docs.append(Document(
81
+ page_content=document,
82
+ metadata={
83
+ 'source': pdf_name,
84
+ 'page': page_number
85
+ }
86
+ ))
87
+
88
+ pg_num.append(str(page_number))
89
+
90
+
91
+ response = chain(
92
+ {"input_documents": docs,
93
+ "question": user_question},
94
+ # return_only_outputs=True
95
+ return_only_outputs= False
96
+ )
97
+
98
+ # st.write("Reply: ", document)
99
+ # st.write("Reply:", response)
100
+ st.write("Reply:", response["output_text"])
101
+ st.write("Metadata: ", f"PDF Name: {pdf_name}, Page Numbers: {','.join(pg_num)}")
102
+ else:
103
+ st.write("No results found.")
104
+
105
+
106
+
107
+ def main():
108
+ st.set_page_config("Chat PDF")
109
+ model = st.selectbox("Select Model", ["llama3-8b-8192", "llama3-70b-8192","mixtral-8x7b-32768","gemma-7b-it"])
110
+ st.header("Chat with PDF after Uploading")
111
+
112
+ user_question = st.text_input("Ask a Question from the PDF Files")
113
+
114
+ if user_question:
115
+ db_obj = ProcessDocs(cf.db_collection_name)
116
+ response = db_obj.retrieval_qa(user_question, model)
117
+ st.write("Response:", response)
118
+ # st.write("Metadata: ", f"PDF Name: {pdf_name}, Page Numbers: {','.join(pg_num)}")
119
+ # user_input(user_question, model)
120
+
121
+ with st.sidebar:
122
+ st.title("Menu:")
123
+ pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
124
+ db_obj = ProcessDocs(cf.db_collection_name)
125
+ # print(pdf_docs)
126
+ if st.button("Submit & Process"):
127
+ # global list_of_pdfs
128
+ # list_of_pdfs = check_pdfs_chromadb()
129
+ # check_pdfs_chromadb(list_of_pdfs)
130
+
131
+ new_files = [doc.name for doc in pdf_docs]
132
+
133
+ # new_files = [pdf_name for pdf_name in uploaded_docs_list]
134
+ # docs_directory = 'docs'
135
+
136
+ print(new_files)
137
+ if new_files:
138
+ if os.path.exists(cf.pdf_download_path):
139
+ shutil.rmtree(cf.pdf_download_path)
140
+ os.makedirs(cf.pdf_download_path)
141
+
142
+ pdf_docs = [pdf for pdf in pdf_docs if pdf.name in new_files]
143
+ print(pdf_docs)
144
+
145
+ save_uploaded_files(pdf_docs, cf.pdf_download_path)
146
+
147
+ with st.spinner("Processing..."):
148
+ new_unique_files = db_obj.identify_new_uploaded_files()
149
+ pdf_docs = db_obj.create_pdf_docx_loader(new_unique_files, model)
150
+ splits = db_obj.split_documents(pdf_docs)
151
+ db_obj.vector_store(splits)
152
+ # raw_text = get_pdf_text(cf.pdf_download_path)
153
+ # text_chunks = get_text_chunks(raw_text)
154
+ # get_vector_store(text_chunks)
155
+
156
+ st.success("Done")
157
+ # st.success("Done")
158
+ else:
159
+ st.success("No new files to process")
160
+
161
+
162
+ if __name__ == "__main__":
163
+ main()
164
+