Spaces:
Configuration error
Configuration error
Upload 7 files
Browse files
.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
GOOGLE_API_KEY="AIzaSyD19vL5uui8kp0qw8WL4nOznZwLNOufGRw"
|
Home.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Libraries
|
2 |
+
import streamlit as st
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
st.set_page_config(page_title='Document Comparer App', page_icon=':bar_chart:', layout='wide')
|
10 |
+
|
11 |
+
c1, c2, c3, c4, c5, c6 = st.columns(6)
|
12 |
+
|
13 |
+
c1.image(Image.open('download.png'))
|
14 |
+
|
15 |
+
|
16 |
+
c2.title('Document Comparer App')
|
17 |
+
|
18 |
+
# .stSidebarNavSeparator {
|
19 |
+
# pointer-events: none;
|
20 |
+
# }
|
21 |
+
|
22 |
+
# Introduction
|
23 |
+
st.subheader('Introduction')
|
24 |
+
st.write(
|
25 |
+
"""
|
26 |
+
Welcome to "Chat Your PDF"!
|
27 |
+
|
28 |
+
Unlock the power of conversation with your PDF documents like never before. Say goodbye to tedious scrolling and searching through pages of text. With "Chat Your PDF," you can effortlessly interact with your PDF files through simple, natural language.
|
29 |
+
"""
|
30 |
+
)
|
31 |
+
|
32 |
+
st.subheader('Key Features')
|
33 |
+
|
34 |
+
|
35 |
+
st.write("""
|
36 |
+
Natural Language Interaction: Engage with your PDF documents using natural language commands and queries, making it easy to navigate, search, and extract information.
|
37 |
+
|
38 |
+
PDF Parsing: Seamlessly parse PDF files to extract text, images, and other relevant content, enabling efficient interaction with the document's contents.
|
39 |
+
|
40 |
+
Intelligent Search: Utilize advanced search capabilities to quickly find specific information within your PDF files, enhancing productivity and saving time.
|
41 |
+
|
42 |
+
Multi-format Support: Support for various file formats, including PDF, DOCX, and TXT, allowing users to upload and interact with documents in different formats.
|
43 |
+
|
44 |
+
Conversation History: Maintain a history of your interactions with each PDF document, enabling you to track your progress and revisit previous queries and commands.
|
45 |
+
|
46 |
+
Annotation and Highlighting: Annotate and highlight sections of your PDF documents directly within the chat interface, facilitating collaboration and knowledge sharing.
|
47 |
+
|
48 |
+
Personalization: Customize the chat interface and settings to suit your preferences, including font size, theme, and language options.
|
49 |
+
|
50 |
+
Responsive Design: Ensure compatibility across devices with a responsive web design that adapts to various screen sizes and orientations, providing a seamless user experience on desktops, tablets, and smartphones.
|
51 |
+
|
52 |
+
Secure Data Handling: Implement robust security measures to protect user data and ensure confidentiality when interacting with sensitive documents.
|
53 |
+
|
54 |
+
Integration with Cloud Storage: Integrate with popular cloud storage services such as Google Drive, Dropbox, and OneDrive, allowing users to access and interact with their PDF files directly from their cloud accounts.
|
55 |
+
|
56 |
+
Feedback and Support: Provide users with a feedback mechanism and access to customer support resources to address inquiries, resolve issues, and gather suggestions for future enhancements.
|
57 |
+
|
58 |
+
Accessibility Features: Incorporate accessibility features such as screen reader compatibility and keyboard navigation, ensuring inclusivity and usability for all users, including those with disabilities.
|
59 |
+
"""
|
60 |
+
)
|
61 |
+
|
62 |
+
# Methodology
|
63 |
+
st.subheader('Methodology')
|
64 |
+
st.write(
|
65 |
+
"""
|
66 |
+
Parse uploaded PDF files to extract text and metadata.
|
67 |
+
|
68 |
+
Utilize natural language processing (NLP) algorithms to interpret user queries and commands.
|
69 |
+
|
70 |
+
Implement search and navigation algorithms to locate relevant content within the documents.
|
71 |
+
|
72 |
+
Enable interactive chat-based communication for users to interact with PDF content seamlessly
|
73 |
+
# [**GitHub Repository**](https://github.com/alitaslimi/cross-chain-monitoring).
|
74 |
+
"""
|
75 |
+
|
76 |
+
)
|
77 |
+
|
78 |
+
# Divider
|
79 |
+
st.divider()
|
80 |
+
|
81 |
+
|
82 |
+
|
83 |
+
# st.page_link("http://localhost:8501/Login", label="Login", icon="2️⃣")
|
84 |
+
# st.sidebar.page_link("http://localhost:8501/Home", label="Manage users")
|
85 |
+
|
86 |
+
|
87 |
+
|
88 |
+
|
89 |
+
|
90 |
+
|
91 |
+
|
92 |
+
|
93 |
+
|
94 |
+
|
95 |
+
|
96 |
+
|
Login.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
# import second_page
|
3 |
+
|
4 |
+
from PyPDF2 import PdfReader
|
5 |
+
import docx2txt
|
6 |
+
import app as app
|
7 |
+
|
8 |
+
c1, c2 = st.columns([4, 7])
|
9 |
+
|
10 |
+
def initialize_session_state():
|
11 |
+
"""Initializes the 'authenticated' key in c1.session_state to False."""
|
12 |
+
if "authenticated" not in st.session_state:
|
13 |
+
st.session_state["authenticated"] = False
|
14 |
+
# and password == "admin"
|
15 |
+
def validate_credentials(password):
|
16 |
+
"""Validates provided credentials against a defined (dummy) set of credentials.
|
17 |
+
Replace this with your actual authentication logic."""
|
18 |
+
return password == "12345"
|
19 |
+
|
20 |
+
def authenticate_user():
|
21 |
+
"""Handles user authentication and displays login interface if needed."""
|
22 |
+
initialize_session_state()
|
23 |
+
|
24 |
+
if not st.session_state["authenticated"]:
|
25 |
+
|
26 |
+
password = st.sidebar.text_input(label="Password", value="", key="passwd", type="password")
|
27 |
+
if st.sidebar.button("Access Code"):
|
28 |
+
if password:
|
29 |
+
authenticated = validate_credentials(password)
|
30 |
+
st.session_state["authenticated"] = authenticated
|
31 |
+
|
32 |
+
if not authenticated:
|
33 |
+
st.sidebar.error("Invalid credentials. Please try again.")
|
34 |
+
else:
|
35 |
+
st.sidebar.success("Welcome, authenticated user!")
|
36 |
+
|
37 |
+
|
38 |
+
st.markdown("#")
|
39 |
+
app.main()
|
40 |
+
|
41 |
+
if __name__ == "__main__":
|
42 |
+
authenticate_user()
|
Logo.jpg
ADDED
![]() |
README.md
CHANGED
@@ -1,12 +1,4 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
colorTo: green
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.34.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
# Chat-your-Document-Using-Gemini-Pro-API-with-RAG
|
2 |
+
Chat Your File
|
3 |
+
|
4 |
+
The repository contain a web app that enables one to upload a file and questions the model based on the file been uploaded. The technology used is RAG inline with Gemini Pro API
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
load_dotenv()
|
3 |
+
|
4 |
+
import streamlit as st
|
5 |
+
import json
|
6 |
+
import os
|
7 |
+
from datetime import datetime, timedelta
|
8 |
+
|
9 |
+
import google.generativeai as genai
|
10 |
+
from langchain_community.vectorstores import Chroma
|
11 |
+
from langchain.chains.question_answering import load_qa_chain
|
12 |
+
from langchain import PromptTemplate
|
13 |
+
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
14 |
+
from langchain_community.vectorstores import FAISS
|
15 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
16 |
+
from langchain.prompts import ChatPromptTemplate
|
17 |
+
from langchain_community.document_loaders import PyPDFDirectoryLoader
|
18 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
19 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
20 |
+
from transformers import pipeline
|
21 |
+
from PyPDF2 import PdfReader
|
22 |
+
import docx2txt
|
23 |
+
|
24 |
+
print(genai.configure(api_key=os.getenv("GOOGLE_API_KEY")))
|
25 |
+
|
26 |
+
model = genai.GenerativeModel("gemini-pro")
|
27 |
+
chat = model.start_chat(history=[])
|
28 |
+
|
29 |
+
if 'chat_history' not in st.session_state:
|
30 |
+
st.session_state['chat_history'] = []
|
31 |
+
|
32 |
+
if "messages" not in st.session_state:
|
33 |
+
st.session_state["messages"] = []
|
34 |
+
|
35 |
+
if 'level' not in st.session_state:
|
36 |
+
st.session_state['level'] = 'Beginner'
|
37 |
+
|
38 |
+
# Add initial assistant message if chat history is empty
|
39 |
+
if not st.session_state["messages"]:
|
40 |
+
st.session_state["messages"].append({"role": "assistant", "content": "Ask Me Anything About The Uploaded Pdfs"})
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
+
prompt_template = """
|
52 |
+
Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
|
53 |
+
provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
|
54 |
+
Context:\n {context}?\n
|
55 |
+
Question: \n{question}\n
|
56 |
+
|
57 |
+
Answer:
|
58 |
+
"""
|
59 |
+
|
60 |
+
model = ChatGoogleGenerativeAI(model="gemini-pro",
|
61 |
+
temperature=0.3)
|
62 |
+
|
63 |
+
prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
|
64 |
+
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
65 |
+
|
66 |
+
|
67 |
+
|
68 |
+
|
69 |
+
|
70 |
+
def get_file_text(files):
|
71 |
+
text = []
|
72 |
+
for file in files:
|
73 |
+
if file.type == "application/pdf": # Handle PDF files
|
74 |
+
pdf_reader = PdfReader(file)
|
75 |
+
for page in pdf_reader.pages:
|
76 |
+
text.append(page.extract_text())
|
77 |
+
elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": # Handle DOCX files
|
78 |
+
text.append(docx2txt.process(file))
|
79 |
+
elif file.type == "text/plain": # Handle text files
|
80 |
+
with file.open(encoding="utf-8") as f: # Adjust encoding if needed
|
81 |
+
text.append(f.read())
|
82 |
+
else: # Handle unsupported file types
|
83 |
+
st.error(f"Unsupported file type: {file.type}")
|
84 |
+
return text
|
85 |
+
|
86 |
+
|
87 |
+
def format_timestamp(timestamp):
|
88 |
+
now = datetime.now()
|
89 |
+
if timestamp.date() == now.date():
|
90 |
+
return "today"
|
91 |
+
elif timestamp.date() == (now - timedelta(days=1)).date():
|
92 |
+
return "yesterday"
|
93 |
+
else:
|
94 |
+
return timestamp.strftime('%Y-%m-%d %H:%M:%S')
|
95 |
+
|
96 |
+
|
97 |
+
# document1=st.sidebar.file_uploader("Document 1 (question)",accept_multiple_files=True,key="document1")
|
98 |
+
# document=get_file_text(document1)
|
99 |
+
|
100 |
+
|
101 |
+
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=100000, chunk_overlap=200)
|
102 |
+
# context= ", ".join(map(str,document))
|
103 |
+
|
104 |
+
|
105 |
+
|
106 |
+
# text_chunks= text_splitter.split_text(context)
|
107 |
+
|
108 |
+
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
+
def main():
|
113 |
+
c1, c2, c3 = st.columns([1, 2, 1])
|
114 |
+
pth = "Logo.jpg"
|
115 |
+
c1.image(pth, width=130)
|
116 |
+
c2.title("Chat Your Pdfs")
|
117 |
+
|
118 |
+
|
119 |
+
|
120 |
+
chat_container = st.container()
|
121 |
+
input_container = st.container()
|
122 |
+
|
123 |
+
with chat_container:
|
124 |
+
# for msg in st.session_state.messages:
|
125 |
+
for i, msg in enumerate(st.session_state.messages):
|
126 |
+
|
127 |
+
if msg["role"] == "user":
|
128 |
+
# st.markdown(f"<div style='text-align: left; color: black; bac kground-color: #90EE90; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
|
129 |
+
# st.markdown(f"<div style='text-align: right; color: black; background-color: #d3d3d3; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
|
130 |
+
st.markdown(f"<div id='{i}' style='text-align: left; color: black; background-color: #dae1e0; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
|
131 |
+
|
132 |
+
else:
|
133 |
+
# st.markdown(f"<div style='text-align: left; color: black; background-color: #90EE90; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
|
134 |
+
# st.markdown(f"<div style='text-align: left; color: white; background-color: #1a73e8; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
|
135 |
+
st.markdown(f"<div id='{i}' style='text-align: left; color: black; background-color: #f4f4f4; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
|
136 |
+
|
137 |
+
document1=st.sidebar.file_uploader("Document 1 (question)",accept_multiple_files=True,key="document1")
|
138 |
+
document=get_file_text(document1)
|
139 |
+
|
140 |
+
|
141 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100000, chunk_overlap=200)
|
142 |
+
context= ", ".join(map(str,document))
|
143 |
+
|
144 |
+
embeddings =SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')
|
145 |
+
|
146 |
+
|
147 |
+
text_chunks= text_splitter.split_text(context)
|
148 |
+
|
149 |
+
|
150 |
+
|
151 |
+
for chuck in text_chunks:
|
152 |
+
if embeddings:
|
153 |
+
|
154 |
+
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
155 |
+
vector_store.save_local("faiss_index")
|
156 |
+
|
157 |
+
|
158 |
+
# embeddings =SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')
|
159 |
+
c2.markdown('##')
|
160 |
+
|
161 |
+
if prompt := st.chat_input():
|
162 |
+
|
163 |
+
with input_container:
|
164 |
+
if prompt:
|
165 |
+
with chat_container:
|
166 |
+
for i, msg in enumerate(st.session_state.messages):
|
167 |
+
if msg["role"] == "user":
|
168 |
+
st.markdown(f"<div id='{i}' style='text-align: left; color: black; background-color: #dae1e0; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
|
169 |
+
|
170 |
+
|
171 |
+
|
172 |
+
|
173 |
+
new_db = FAISS.load_local("faiss_index", embeddings)
|
174 |
+
docs = new_db.similarity_search(prompt)
|
175 |
+
|
176 |
+
response_text = chain(
|
177 |
+
{"input_documents":docs, "question": prompt}
|
178 |
+
, return_only_outputs=False)
|
179 |
+
|
180 |
+
timestamp = datetime.now()
|
181 |
+
st.session_state['chat_history'].append(("You", prompt, timestamp))
|
182 |
+
st.session_state['chat_history'].append(("Bot", response_text['output_text'], timestamp))
|
183 |
+
|
184 |
+
st.session_state["messages"].append({"role": "user", "content": prompt})
|
185 |
+
st.session_state["messages"].append({"role": "assistant", "content": response_text["output_text"]})
|
186 |
+
|
187 |
+
st.experimental_rerun()
|
188 |
+
st.sidebar.title("Chat History")
|
189 |
+
for entry in st.session_state['chat_history']:
|
190 |
+
role, content, timestamp = entry
|
191 |
+
if role == "You":
|
192 |
+
st.sidebar.write(f"{content} ({format_timestamp(timestamp)})")
|
193 |
+
|
194 |
+
st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
|
195 |
+
|
196 |
+
|
197 |
+
|
198 |
+
def clear_chat_history():
|
199 |
+
st.session_state.messages = [{"role": "assistant", "content": "Ask Me Anything About The Uploaded Pdfs"}]
|
200 |
+
st.session_state['chat_history'] = []
|
201 |
+
st.experimental_rerun()
|
202 |
+
|
203 |
+
|
204 |
+
|
205 |
+
|
206 |
+
|
207 |
+
|
208 |
+
|
209 |
+
|
210 |
+
|
211 |
+
|
212 |
+
|
213 |
+
|
214 |
+
|
215 |
+
|
216 |
+
|
download.png
ADDED
![]() |