AbuZayDin commited on
Commit
8f3f965
1 Parent(s): 2517b91

Upload 7 files

Browse files
Files changed (7) hide show
  1. .env +1 -0
  2. Home.py +96 -0
  3. Login.py +42 -0
  4. Logo.jpg +0 -0
  5. README.md +4 -12
  6. app.py +216 -0
  7. download.png +0 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ GOOGLE_API_KEY="AIzaSyD19vL5uui8kp0qw8WL4nOznZwLNOufGRw"
Home.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Libraries
2
+ import streamlit as st
3
+ from PIL import Image
4
+
5
+
6
+
7
+
8
+
9
+ st.set_page_config(page_title='Document Comparer App', page_icon=':bar_chart:', layout='wide')
10
+
11
+ c1, c2, c3, c4, c5, c6 = st.columns(6)
12
+
13
+ c1.image(Image.open('download.png'))
14
+
15
+
16
+ c2.title('Document Comparer App')
17
+
18
+ # .stSidebarNavSeparator {
19
+ # pointer-events: none;
20
+ # }
21
+
22
+ # Introduction
23
+ st.subheader('Introduction')
24
+ st.write(
25
+ """
26
+ Welcome to "Chat Your PDF"!
27
+
28
+ Unlock the power of conversation with your PDF documents like never before. Say goodbye to tedious scrolling and searching through pages of text. With "Chat Your PDF," you can effortlessly interact with your PDF files through simple, natural language.
29
+ """
30
+ )
31
+
32
+ st.subheader('Key Features')
33
+
34
+
35
+ st.write("""
36
+ Natural Language Interaction: Engage with your PDF documents using natural language commands and queries, making it easy to navigate, search, and extract information.
37
+
38
+ PDF Parsing: Seamlessly parse PDF files to extract text, images, and other relevant content, enabling efficient interaction with the document's contents.
39
+
40
+ Intelligent Search: Utilize advanced search capabilities to quickly find specific information within your PDF files, enhancing productivity and saving time.
41
+
42
+ Multi-format Support: Support for various file formats, including PDF, DOCX, and TXT, allowing users to upload and interact with documents in different formats.
43
+
44
+ Conversation History: Maintain a history of your interactions with each PDF document, enabling you to track your progress and revisit previous queries and commands.
45
+
46
+ Annotation and Highlighting: Annotate and highlight sections of your PDF documents directly within the chat interface, facilitating collaboration and knowledge sharing.
47
+
48
+ Personalization: Customize the chat interface and settings to suit your preferences, including font size, theme, and language options.
49
+
50
+ Responsive Design: Ensure compatibility across devices with a responsive web design that adapts to various screen sizes and orientations, providing a seamless user experience on desktops, tablets, and smartphones.
51
+
52
+ Secure Data Handling: Implement robust security measures to protect user data and ensure confidentiality when interacting with sensitive documents.
53
+
54
+ Integration with Cloud Storage: Integrate with popular cloud storage services such as Google Drive, Dropbox, and OneDrive, allowing users to access and interact with their PDF files directly from their cloud accounts.
55
+
56
+ Feedback and Support: Provide users with a feedback mechanism and access to customer support resources to address inquiries, resolve issues, and gather suggestions for future enhancements.
57
+
58
+ Accessibility Features: Incorporate accessibility features such as screen reader compatibility and keyboard navigation, ensuring inclusivity and usability for all users, including those with disabilities.
59
+ """
60
+ )
61
+
62
+ # Methodology
63
+ st.subheader('Methodology')
64
+ st.write(
65
+ """
66
+ Parse uploaded PDF files to extract text and metadata.
67
+
68
+ Utilize natural language processing (NLP) algorithms to interpret user queries and commands.
69
+
70
+ Implement search and navigation algorithms to locate relevant content within the documents.
71
+
72
+ Enable interactive chat-based communication for users to interact with PDF content seamlessly
73
+ # [**GitHub Repository**](https://github.com/alitaslimi/cross-chain-monitoring).
74
+ """
75
+
76
+ )
77
+
78
+ # Divider
79
+ st.divider()
80
+
81
+
82
+
83
+ # st.page_link("http://localhost:8501/Login", label="Login", icon="2️⃣")
84
+ # st.sidebar.page_link("http://localhost:8501/Home", label="Manage users")
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
Login.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ # import second_page
3
+
4
+ from PyPDF2 import PdfReader
5
+ import docx2txt
6
+ import app as app
7
+
8
+ c1, c2 = st.columns([4, 7])
9
+
10
+ def initialize_session_state():
11
+ """Initializes the 'authenticated' key in c1.session_state to False."""
12
+ if "authenticated" not in st.session_state:
13
+ st.session_state["authenticated"] = False
14
+ # and password == "admin"
15
+ def validate_credentials(password):
16
+ """Validates provided credentials against a defined (dummy) set of credentials.
17
+ Replace this with your actual authentication logic."""
18
+ return password == "12345"
19
+
20
+ def authenticate_user():
21
+ """Handles user authentication and displays login interface if needed."""
22
+ initialize_session_state()
23
+
24
+ if not st.session_state["authenticated"]:
25
+
26
+ password = st.sidebar.text_input(label="Password", value="", key="passwd", type="password")
27
+ if st.sidebar.button("Access Code"):
28
+ if password:
29
+ authenticated = validate_credentials(password)
30
+ st.session_state["authenticated"] = authenticated
31
+
32
+ if not authenticated:
33
+ st.sidebar.error("Invalid credentials. Please try again.")
34
+ else:
35
+ st.sidebar.success("Welcome, authenticated user!")
36
+
37
+
38
+ st.markdown("#")
39
+ app.main()
40
+
41
+ if __name__ == "__main__":
42
+ authenticate_user()
Logo.jpg ADDED
README.md CHANGED
@@ -1,12 +1,4 @@
1
- ---
2
- title: Chat Your Pdfs
3
- emoji: 🏃
4
- colorFrom: pink
5
- colorTo: green
6
- sdk: streamlit
7
- sdk_version: 1.34.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # Chat-your-Document-Using-Gemini-Pro-API-with-RAG
2
+ Chat Your File
3
+
4
+ The repository contain a web app that enables one to upload a file and questions the model based on the file been uploaded. The technology used is RAG inline with Gemini Pro API
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv()
3
+
4
+ import streamlit as st
5
+ import json
6
+ import os
7
+ from datetime import datetime, timedelta
8
+
9
+ import google.generativeai as genai
10
+ from langchain_community.vectorstores import Chroma
11
+ from langchain.chains.question_answering import load_qa_chain
12
+ from langchain import PromptTemplate
13
+ from langchain_community.embeddings import SentenceTransformerEmbeddings
14
+ from langchain_community.vectorstores import FAISS
15
+ from langchain_google_genai import ChatGoogleGenerativeAI
16
+ from langchain.prompts import ChatPromptTemplate
17
+ from langchain_community.document_loaders import PyPDFDirectoryLoader
18
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
19
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
20
+ from transformers import pipeline
21
+ from PyPDF2 import PdfReader
22
+ import docx2txt
23
+
24
+ print(genai.configure(api_key=os.getenv("GOOGLE_API_KEY")))
25
+
26
+ model = genai.GenerativeModel("gemini-pro")
27
+ chat = model.start_chat(history=[])
28
+
29
+ if 'chat_history' not in st.session_state:
30
+ st.session_state['chat_history'] = []
31
+
32
+ if "messages" not in st.session_state:
33
+ st.session_state["messages"] = []
34
+
35
+ if 'level' not in st.session_state:
36
+ st.session_state['level'] = 'Beginner'
37
+
38
+ # Add initial assistant message if chat history is empty
39
+ if not st.session_state["messages"]:
40
+ st.session_state["messages"].append({"role": "assistant", "content": "Ask Me Anything About The Uploaded Pdfs"})
41
+
42
+
43
+
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+ prompt_template = """
52
+ Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
53
+ provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
54
+ Context:\n {context}?\n
55
+ Question: \n{question}\n
56
+
57
+ Answer:
58
+ """
59
+
60
+ model = ChatGoogleGenerativeAI(model="gemini-pro",
61
+ temperature=0.3)
62
+
63
+ prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
64
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
65
+
66
+
67
+
68
+
69
+
70
+ def get_file_text(files):
71
+ text = []
72
+ for file in files:
73
+ if file.type == "application/pdf": # Handle PDF files
74
+ pdf_reader = PdfReader(file)
75
+ for page in pdf_reader.pages:
76
+ text.append(page.extract_text())
77
+ elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": # Handle DOCX files
78
+ text.append(docx2txt.process(file))
79
+ elif file.type == "text/plain": # Handle text files
80
+ with file.open(encoding="utf-8") as f: # Adjust encoding if needed
81
+ text.append(f.read())
82
+ else: # Handle unsupported file types
83
+ st.error(f"Unsupported file type: {file.type}")
84
+ return text
85
+
86
+
87
+ def format_timestamp(timestamp):
88
+ now = datetime.now()
89
+ if timestamp.date() == now.date():
90
+ return "today"
91
+ elif timestamp.date() == (now - timedelta(days=1)).date():
92
+ return "yesterday"
93
+ else:
94
+ return timestamp.strftime('%Y-%m-%d %H:%M:%S')
95
+
96
+
97
+ # document1=st.sidebar.file_uploader("Document 1 (question)",accept_multiple_files=True,key="document1")
98
+ # document=get_file_text(document1)
99
+
100
+
101
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=100000, chunk_overlap=200)
102
+ # context= ", ".join(map(str,document))
103
+
104
+
105
+
106
+ # text_chunks= text_splitter.split_text(context)
107
+
108
+
109
+
110
+
111
+
112
+ def main():
113
+ c1, c2, c3 = st.columns([1, 2, 1])
114
+ pth = "Logo.jpg"
115
+ c1.image(pth, width=130)
116
+ c2.title("Chat Your Pdfs")
117
+
118
+
119
+
120
+ chat_container = st.container()
121
+ input_container = st.container()
122
+
123
+ with chat_container:
124
+ # for msg in st.session_state.messages:
125
+ for i, msg in enumerate(st.session_state.messages):
126
+
127
+ if msg["role"] == "user":
128
+ # st.markdown(f"<div style='text-align: left; color: black; bac kground-color: #90EE90; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
129
+ # st.markdown(f"<div style='text-align: right; color: black; background-color: #d3d3d3; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
130
+ st.markdown(f"<div id='{i}' style='text-align: left; color: black; background-color: #dae1e0; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
131
+
132
+ else:
133
+ # st.markdown(f"<div style='text-align: left; color: black; background-color: #90EE90; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
134
+ # st.markdown(f"<div style='text-align: left; color: white; background-color: #1a73e8; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
135
+ st.markdown(f"<div id='{i}' style='text-align: left; color: black; background-color: #f4f4f4; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
136
+
137
+ document1=st.sidebar.file_uploader("Document 1 (question)",accept_multiple_files=True,key="document1")
138
+ document=get_file_text(document1)
139
+
140
+
141
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=100000, chunk_overlap=200)
142
+ context= ", ".join(map(str,document))
143
+
144
+ embeddings =SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')
145
+
146
+
147
+ text_chunks= text_splitter.split_text(context)
148
+
149
+
150
+
151
+ for chuck in text_chunks:
152
+ if embeddings:
153
+
154
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
155
+ vector_store.save_local("faiss_index")
156
+
157
+
158
+ # embeddings =SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')
159
+ c2.markdown('##')
160
+
161
+ if prompt := st.chat_input():
162
+
163
+ with input_container:
164
+ if prompt:
165
+ with chat_container:
166
+ for i, msg in enumerate(st.session_state.messages):
167
+ if msg["role"] == "user":
168
+ st.markdown(f"<div id='{i}' style='text-align: left; color: black; background-color: #dae1e0; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
169
+
170
+
171
+
172
+
173
+ new_db = FAISS.load_local("faiss_index", embeddings)
174
+ docs = new_db.similarity_search(prompt)
175
+
176
+ response_text = chain(
177
+ {"input_documents":docs, "question": prompt}
178
+ , return_only_outputs=False)
179
+
180
+ timestamp = datetime.now()
181
+ st.session_state['chat_history'].append(("You", prompt, timestamp))
182
+ st.session_state['chat_history'].append(("Bot", response_text['output_text'], timestamp))
183
+
184
+ st.session_state["messages"].append({"role": "user", "content": prompt})
185
+ st.session_state["messages"].append({"role": "assistant", "content": response_text["output_text"]})
186
+
187
+ st.experimental_rerun()
188
+ st.sidebar.title("Chat History")
189
+ for entry in st.session_state['chat_history']:
190
+ role, content, timestamp = entry
191
+ if role == "You":
192
+ st.sidebar.write(f"{content} ({format_timestamp(timestamp)})")
193
+
194
+ st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
195
+
196
+
197
+
198
+ def clear_chat_history():
199
+ st.session_state.messages = [{"role": "assistant", "content": "Ask Me Anything About The Uploaded Pdfs"}]
200
+ st.session_state['chat_history'] = []
201
+ st.experimental_rerun()
202
+
203
+
204
+
205
+
206
+
207
+
208
+
209
+
210
+
211
+
212
+
213
+
214
+
215
+
216
+
download.png ADDED