Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
DALL路E[[:space:]]2025-01-26[[:space:]]11.43.33[[:space:]]-[[:space:]]A[[:space:]]futuristic[[:space:]]and[[:space:]]sleek[[:space:]]magical[[:space:]]animated[[:space:]]GIF-style[[:space:]]icon[[:space:]]design[[:space:]]for[[:space:]]'DocWise',[[:space:]]representing[[:space:]]knowledge,[[:space:]]documents,[[:space:]]and[[:space:]]wisdom.[[:space:]]The[[:space:]]design[[:space:]]includes[[:space:]]a[[:space:]]glow.jpg filter=lfs diff=lfs merge=lfs -text
|
DALL路E 2025-01-26 11.43.33 - A futuristic and sleek magical animated GIF-style icon design for 'DocWise', representing knowledge, documents, and wisdom. The design includes a glow.jpg
ADDED
![]() |
Git LFS Details
|
app.py
ADDED
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import requests
|
3 |
+
import gradio as gr
|
4 |
+
import PyPDF2
|
5 |
+
import google.generativeai as genai
|
6 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
+
from sentence_transformers import SentenceTransformer, util
|
8 |
+
import numpy as np
|
9 |
+
import os
|
10 |
+
from langchain_community.tools.tavily_search import TavilySearchResults
|
11 |
+
from langchain_core.documents import Document
|
12 |
+
|
13 |
+
# Retrieve API keys from environment variables
|
14 |
+
google_api_key = os.getenv("GOOGLE_API_KEY")
|
15 |
+
tavily_api_key = os.getenv("TAVILY_API_KEY")
|
16 |
+
docusign_api_key = os.getenv("DOCUSIGN_API_KEY")
|
17 |
+
|
18 |
+
# Configure Google Generative AI
|
19 |
+
genai.configure(api_key=google_api_key)
|
20 |
+
|
21 |
+
# Create the Gemini model
|
22 |
+
generation_config = {
|
23 |
+
"temperature": 0.7,
|
24 |
+
"top_p": 0.95,
|
25 |
+
"top_k": 64,
|
26 |
+
"max_output_tokens": 65536,
|
27 |
+
"response_mime_type": "text/plain",
|
28 |
+
}
|
29 |
+
|
30 |
+
model = genai.GenerativeModel(
|
31 |
+
model_name="gemini-2.0-flash-thinking-exp-01-21",
|
32 |
+
generation_config=generation_config,
|
33 |
+
)
|
34 |
+
|
35 |
+
chat_session = model.start_chat(history=[])
|
36 |
+
|
37 |
+
# Function to extract text from a PDF
|
38 |
+
def extract_text_from_pdf(file_path):
|
39 |
+
try:
|
40 |
+
with open(file_path, "rb") as file:
|
41 |
+
reader = PyPDF2.PdfReader(file)
|
42 |
+
text = "".join(page.extract_text() for page in reader.pages)
|
43 |
+
return text
|
44 |
+
except Exception as e:
|
45 |
+
return f"Error extracting text from PDF: {e}"
|
46 |
+
|
47 |
+
# Function to chunk the text
|
48 |
+
def chunk_text(text, chunk_size=500, chunk_overlap=50):
|
49 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
50 |
+
chunk_size=chunk_size,
|
51 |
+
chunk_overlap=chunk_overlap,
|
52 |
+
length_function=len
|
53 |
+
)
|
54 |
+
chunks = text_splitter.split_text(text)
|
55 |
+
return chunks
|
56 |
+
|
57 |
+
# Function to embed the chunks
|
58 |
+
def embed_chunks(chunks, model_name="all-MiniLM-L6-v2"):
|
59 |
+
model = SentenceTransformer(model_name)
|
60 |
+
embeddings = model.encode(chunks, convert_to_tensor=True)
|
61 |
+
return embeddings, model
|
62 |
+
|
63 |
+
# Function to retrieve relevant chunks
|
64 |
+
def retrieve_relevant_chunks(query, chunks, embeddings, model, top_k=3):
|
65 |
+
query_embedding = model.encode(query, convert_to_tensor=True)
|
66 |
+
similarities = util.cos_sim(query_embedding, embeddings)[0]
|
67 |
+
top_k = min(top_k, len(chunks))
|
68 |
+
top_indices = np.argsort(similarities.cpu().numpy())[-top_k:][::-1]
|
69 |
+
relevant_chunks = [chunks[i] for i in top_indices]
|
70 |
+
return relevant_chunks
|
71 |
+
|
72 |
+
# Function to summarize the agreement using Gemini
|
73 |
+
def summarize_agreement_with_gemini(text):
|
74 |
+
try:
|
75 |
+
# Create a prompt for summarization
|
76 |
+
prompt = f"Summarize the following text in 3-5 sentences:\n\n{text}\n\nSummary:"
|
77 |
+
|
78 |
+
# Send the prompt to the Gemini model
|
79 |
+
response = chat_session.send_message(prompt)
|
80 |
+
|
81 |
+
return response.text
|
82 |
+
except Exception as e:
|
83 |
+
return f"Error summarizing text with Gemini: {e}"
|
84 |
+
|
85 |
+
# Configure Tavily API
|
86 |
+
os.environ["TAVILY_API_KEY"] = tavily_api_key
|
87 |
+
web_search_tool = TavilySearchResults(k=3)
|
88 |
+
|
89 |
+
def generate_response_with_rag(query, pdf_path, state):
|
90 |
+
if "chunks" not in state or "embeddings" not in state or "embedding_model" not in state:
|
91 |
+
text = extract_text_from_pdf(pdf_path)
|
92 |
+
chunks = chunk_text(text)
|
93 |
+
embeddings, embedding_model = embed_chunks(chunks)
|
94 |
+
state["chunks"] = chunks
|
95 |
+
state["embeddings"] = embeddings
|
96 |
+
state["embedding_model"] = embedding_model
|
97 |
+
else:
|
98 |
+
chunks = state["chunks"]
|
99 |
+
embeddings = state["embeddings"]
|
100 |
+
embedding_model = state["embedding_model"]
|
101 |
+
|
102 |
+
# Retrieve relevant chunks based on the query
|
103 |
+
relevant_chunks = retrieve_relevant_chunks(query, chunks, embeddings, embedding_model, top_k=5) # Increase top_k
|
104 |
+
|
105 |
+
# Debug: Print relevant chunks
|
106 |
+
print(f"Relevant Chunks: {relevant_chunks}")
|
107 |
+
|
108 |
+
# Combine the relevant chunks into a single context
|
109 |
+
context = "\n\n".join(relevant_chunks)
|
110 |
+
|
111 |
+
# Debug: Print the context
|
112 |
+
print(f"Context from PDF: {context}")
|
113 |
+
|
114 |
+
# Create a prompt that instructs the model to answer only from the context
|
115 |
+
prompt = f"""
|
116 |
+
You are a helpful assistant that answers questions based on the provided context.
|
117 |
+
Use the context below to answer the question. If the context does not contain enough information to answer the question, respond with "I don't know."
|
118 |
+
|
119 |
+
**Context:**
|
120 |
+
{context}
|
121 |
+
|
122 |
+
**Question:**
|
123 |
+
{query}
|
124 |
+
|
125 |
+
**Answer:**
|
126 |
+
"""
|
127 |
+
|
128 |
+
# Debug: Print the prompt
|
129 |
+
print(f"Prompt for Gemini: {prompt}")
|
130 |
+
|
131 |
+
# Send the prompt to the Gemini model
|
132 |
+
try:
|
133 |
+
response = chat_session.send_message(prompt)
|
134 |
+
initial_answer = response.text
|
135 |
+
|
136 |
+
# Check if the initial answer is "I don't know"
|
137 |
+
if "I don't know" in initial_answer or "i don't know" in initial_answer:
|
138 |
+
print("Initial answer is 'I don't know'. Performing web search...")
|
139 |
+
docs = web_search_tool.invoke({"query": query})
|
140 |
+
web_results = "\n".join([d["content"] for d in docs])
|
141 |
+
web_results = Document(page_content=web_results)
|
142 |
+
|
143 |
+
# Debug: Print web search results
|
144 |
+
print(f"Web Search Results: {web_results.page_content}")
|
145 |
+
|
146 |
+
# Create a prompt that instructs the model to answer from the web search results
|
147 |
+
web_prompt = f"""
|
148 |
+
You are a helpful assistant that answers questions based on the provided context.
|
149 |
+
The context below is from a web search. Use the context to answer the question. If the context does not contain enough information to answer the question, respond with "I don't know."
|
150 |
+
|
151 |
+
**Context:**
|
152 |
+
{web_results.page_content}
|
153 |
+
|
154 |
+
**Question:**
|
155 |
+
{query}
|
156 |
+
|
157 |
+
**Answer:**
|
158 |
+
"""
|
159 |
+
|
160 |
+
# Debug: Print the prompt
|
161 |
+
print(f"Prompt for Gemini (Web Search): {web_prompt}")
|
162 |
+
|
163 |
+
# Send the prompt to the Gemini model
|
164 |
+
web_response = chat_session.send_message(web_prompt)
|
165 |
+
# Add a note indicating the answer is based on a web search
|
166 |
+
return f"{web_response.text}\n\n*Note: This answer is based on a web search.*"
|
167 |
+
else:
|
168 |
+
return initial_answer
|
169 |
+
except Exception as e:
|
170 |
+
return f"Error generating response: {e}"
|
171 |
+
|
172 |
+
# Function to send document to DocuSign
|
173 |
+
def send_to_docusign(file_path, recipient_email, recipient_name):
|
174 |
+
account_id = "184d0409-2626-4c48-98b5-d383b9854a47"
|
175 |
+
base_url = "https://demo.docusign.net/restapi"
|
176 |
+
|
177 |
+
with open(file_path, "rb") as file:
|
178 |
+
document_base64 = base64.b64encode(file.read()).decode()
|
179 |
+
|
180 |
+
envelope_definition = {
|
181 |
+
"emailSubject": "Please sign this document",
|
182 |
+
"documents": [
|
183 |
+
{
|
184 |
+
"documentId": "1",
|
185 |
+
"name": "document.pdf",
|
186 |
+
"fileExtension": "pdf",
|
187 |
+
"documentBase64": document_base64
|
188 |
+
}
|
189 |
+
],
|
190 |
+
"recipients": {
|
191 |
+
"signers": [
|
192 |
+
{
|
193 |
+
"email": recipient_email,
|
194 |
+
"name": recipient_name,
|
195 |
+
"recipientId": "1",
|
196 |
+
"tabs": {
|
197 |
+
"signHereTabs": [
|
198 |
+
{
|
199 |
+
"documentId": "1",
|
200 |
+
"pageNumber": "1",
|
201 |
+
"xPosition": "100",
|
202 |
+
"yPosition": "100"
|
203 |
+
}
|
204 |
+
]
|
205 |
+
}
|
206 |
+
}
|
207 |
+
]
|
208 |
+
},
|
209 |
+
"status": "sent"
|
210 |
+
}
|
211 |
+
|
212 |
+
headers = {
|
213 |
+
"Authorization": f"Bearer {docusign_api_key}",
|
214 |
+
"Content-Type": "application/json"
|
215 |
+
}
|
216 |
+
try:
|
217 |
+
response = requests.post(
|
218 |
+
f"{base_url}/v2.1/accounts/{account_id}/envelopes",
|
219 |
+
headers=headers,
|
220 |
+
json=envelope_definition
|
221 |
+
)
|
222 |
+
response.raise_for_status()
|
223 |
+
return response.json()
|
224 |
+
except requests.exceptions.RequestException as e:
|
225 |
+
return {"error": str(e)}
|
226 |
+
|
227 |
+
# Function to process the agreement
|
228 |
+
def process_agreement(file, recipient_email, recipient_name, state):
|
229 |
+
try:
|
230 |
+
text = extract_text_from_pdf(file.name)
|
231 |
+
if text.startswith("Error"):
|
232 |
+
return text, {}, {}, state
|
233 |
+
|
234 |
+
# Use Gemini for summarization
|
235 |
+
summary = summarize_agreement_with_gemini(text)
|
236 |
+
if summary.startswith("Error"):
|
237 |
+
return summary, {}, {}, state
|
238 |
+
|
239 |
+
docusign_response = send_to_docusign(file.name, recipient_email, recipient_name)
|
240 |
+
if "error" in docusign_response:
|
241 |
+
return summary, {}, docusign_response, state
|
242 |
+
|
243 |
+
return summary, {}, docusign_response, state
|
244 |
+
except Exception as e:
|
245 |
+
return f"Error: {e}", {}, {}, state
|
246 |
+
|
247 |
+
# Gradio interface
|
248 |
+
def main_interface(file, recipient_email, recipient_name, question, state):
|
249 |
+
if file is not None:
|
250 |
+
state["file"] = file
|
251 |
+
state["text"] = extract_text_from_pdf(file.name)
|
252 |
+
state["chat_history"] = [] # Initialize chat history
|
253 |
+
|
254 |
+
summary_output = ""
|
255 |
+
docusign_output = {}
|
256 |
+
chatbot_output = ""
|
257 |
+
|
258 |
+
if "file" in state:
|
259 |
+
if recipient_email and recipient_name:
|
260 |
+
summary_output, _, docusign_output, state = process_agreement(state["file"], recipient_email, recipient_name, state)
|
261 |
+
|
262 |
+
if question:
|
263 |
+
chatbot_output = generate_response_with_rag(question, state["file"].name, state)
|
264 |
+
state["chat_history"].append((question, chatbot_output)) # Update chat history
|
265 |
+
|
266 |
+
return summary_output, docusign_output, chatbot_output, state
|
267 |
+
|
268 |
+
# CSS for styling
|
269 |
+
css = """
|
270 |
+
.gradio-container {
|
271 |
+
background-image: url('https://huggingface.co/spaces/Nadaazakaria/DocWise/resolve/main/DALL%C2%B7E%202025-01-26%2011.43.33%20-%20A%20futuristic%20and%20sleek%20magical%20animated%20GIF-style%20icon%20design%20for%20%27DocWise%27%2C%20representing%20knowledge%2C%20documents%2C%20and%20wisdom.%20The%20design%20includes%20a%20glow.jpg');
|
272 |
+
background-size: cover;
|
273 |
+
background-position: center;
|
274 |
+
background-repeat: no-repeat;
|
275 |
+
}
|
276 |
+
|
277 |
+
.gradio-container h1,
|
278 |
+
.gradio-container .tabs > .tab-nav > .tab-button {
|
279 |
+
color: #FFF5E1 !important;
|
280 |
+
text-shadow: 0 0 5px rgba(255, 245, 225, 0.5);
|
281 |
+
}
|
282 |
+
|
283 |
+
.tabs {
|
284 |
+
background-color: #f0f0f0 !important;
|
285 |
+
border-radius: 10px !important;
|
286 |
+
padding: 20px !important;
|
287 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important;
|
288 |
+
}
|
289 |
+
|
290 |
+
.tabs > .tab-nav {
|
291 |
+
background-color: #e0e0e0 !important;
|
292 |
+
border-radius: 5px !important;
|
293 |
+
margin-bottom: 15px !important;
|
294 |
+
}
|
295 |
+
|
296 |
+
.tabs > .tab-nav > .tab-button {
|
297 |
+
color: black !important;
|
298 |
+
font-weight: bold !important;
|
299 |
+
}
|
300 |
+
|
301 |
+
.tabs > .tab-nav > .tab-button.selected {
|
302 |
+
background-color: #d0d0d0 !important;
|
303 |
+
color: black !important;
|
304 |
+
}
|
305 |
+
|
306 |
+
#process-button, #chatbot-button {
|
307 |
+
background-color: white !important;
|
308 |
+
color: black !important;
|
309 |
+
border: 1px solid #ccc !important;
|
310 |
+
padding: 10px 20px !important;
|
311 |
+
border-radius: 5px !important;
|
312 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
|
313 |
+
transition: background-color 0.3s ease !important;
|
314 |
+
}
|
315 |
+
|
316 |
+
#process-button:hover, #chatbot-button:hover {
|
317 |
+
background-color: #f0f0f0 !important;
|
318 |
+
}
|
319 |
+
"""
|
320 |
+
|
321 |
+
# Gradio app
|
322 |
+
with gr.Blocks(css=css) as app:
|
323 |
+
gr.Markdown(
|
324 |
+
"""
|
325 |
+
<div style="text-align: center;">
|
326 |
+
<h1 id="main-title">
|
327 |
+
DocWise(Agreement Analyzer with Chatbot and Docusign Integration)
|
328 |
+
</h1>
|
329 |
+
</div>
|
330 |
+
""",
|
331 |
+
)
|
332 |
+
|
333 |
+
state = gr.State({})
|
334 |
+
file_input = gr.File(label="Upload Agreement (PDF)")
|
335 |
+
|
336 |
+
with gr.Tab("Agreement Processing", elem_id="agreement-tab"):
|
337 |
+
email_input = gr.Textbox(label="Recipient Email")
|
338 |
+
name_input = gr.Textbox(label="Recipient Name")
|
339 |
+
summary_output = gr.Textbox(label="Agreement Summary")
|
340 |
+
docusign_output = gr.JSON(label="DocuSign Response")
|
341 |
+
process_button = gr.Button("Process Agreement", elem_id="process-button")
|
342 |
+
|
343 |
+
with gr.Tab("Chatbot", elem_id="chatbot-tab"):
|
344 |
+
chatbot_question_input = gr.Textbox(label="Ask a Question")
|
345 |
+
chatbot_answer_output = gr.Textbox(label="Answer")
|
346 |
+
chatbot_button = gr.Button("Ask", elem_id="chatbot-button")
|
347 |
+
|
348 |
+
process_button.click(
|
349 |
+
main_interface,
|
350 |
+
inputs=[file_input, email_input, name_input, chatbot_question_input, state],
|
351 |
+
outputs=[summary_output, docusign_output, chatbot_answer_output, state]
|
352 |
+
)
|
353 |
+
chatbot_button.click(
|
354 |
+
main_interface,
|
355 |
+
inputs=[file_input, email_input, name_input, chatbot_question_input, state],
|
356 |
+
outputs=[summary_output, docusign_output, chatbot_answer_output, state]
|
357 |
+
)
|
358 |
+
|
359 |
+
app.launch(debug=True)
|
requirements.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
requests
|
3 |
+
PyPDF2
|
4 |
+
transformers
|
5 |
+
torch
|
6 |
+
google-generativeai>=0.7.2
|
7 |
+
langchain-google-genai
|
8 |
+
faiss-cpu
|
9 |
+
langchain
|
10 |
+
langchain-community
|
11 |
+
langchain-core
|
12 |
+
sentence_transformers
|
13 |
+
tavily-python
|