Codequestt commited on
Commit
94d3bd7
verified
1 Parent(s): a1e7d65

Upload 3 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ DALL路E[[:space:]]2025-01-26[[:space:]]11.43.33[[:space:]]-[[:space:]]A[[:space:]]futuristic[[:space:]]and[[:space:]]sleek[[:space:]]magical[[:space:]]animated[[:space:]]GIF-style[[:space:]]icon[[:space:]]design[[:space:]]for[[:space:]]'DocWise',[[:space:]]representing[[:space:]]knowledge,[[:space:]]documents,[[:space:]]and[[:space:]]wisdom.[[:space:]]The[[:space:]]design[[:space:]]includes[[:space:]]a[[:space:]]glow.jpg filter=lfs diff=lfs merge=lfs -text
DALL路E 2025-01-26 11.43.33 - A futuristic and sleek magical animated GIF-style icon design for 'DocWise', representing knowledge, documents, and wisdom. The design includes a glow.jpg ADDED

Git LFS Details

  • SHA256: cf1b17959d846b1f76eebe4927ecf762229f178402d454ddbee04d3f5ccb23a1
  • Pointer size: 131 Bytes
  • Size of remote file: 209 kB
app.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import requests
3
+ import gradio as gr
4
+ import PyPDF2
5
+ import google.generativeai as genai
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from sentence_transformers import SentenceTransformer, util
8
+ import numpy as np
9
+ import os
10
+ from langchain_community.tools.tavily_search import TavilySearchResults
11
+ from langchain_core.documents import Document
12
+
13
+ # Retrieve API keys from environment variables
14
+ google_api_key = os.getenv("GOOGLE_API_KEY")
15
+ tavily_api_key = os.getenv("TAVILY_API_KEY")
16
+ docusign_api_key = os.getenv("DOCUSIGN_API_KEY")
17
+
18
+ # Configure Google Generative AI
19
+ genai.configure(api_key=google_api_key)
20
+
21
+ # Create the Gemini model
22
+ generation_config = {
23
+ "temperature": 0.7,
24
+ "top_p": 0.95,
25
+ "top_k": 64,
26
+ "max_output_tokens": 65536,
27
+ "response_mime_type": "text/plain",
28
+ }
29
+
30
+ model = genai.GenerativeModel(
31
+ model_name="gemini-2.0-flash-thinking-exp-01-21",
32
+ generation_config=generation_config,
33
+ )
34
+
35
+ chat_session = model.start_chat(history=[])
36
+
37
+ # Function to extract text from a PDF
38
+ def extract_text_from_pdf(file_path):
39
+ try:
40
+ with open(file_path, "rb") as file:
41
+ reader = PyPDF2.PdfReader(file)
42
+ text = "".join(page.extract_text() for page in reader.pages)
43
+ return text
44
+ except Exception as e:
45
+ return f"Error extracting text from PDF: {e}"
46
+
47
+ # Function to chunk the text
48
+ def chunk_text(text, chunk_size=500, chunk_overlap=50):
49
+ text_splitter = RecursiveCharacterTextSplitter(
50
+ chunk_size=chunk_size,
51
+ chunk_overlap=chunk_overlap,
52
+ length_function=len
53
+ )
54
+ chunks = text_splitter.split_text(text)
55
+ return chunks
56
+
57
+ # Function to embed the chunks
58
+ def embed_chunks(chunks, model_name="all-MiniLM-L6-v2"):
59
+ model = SentenceTransformer(model_name)
60
+ embeddings = model.encode(chunks, convert_to_tensor=True)
61
+ return embeddings, model
62
+
63
+ # Function to retrieve relevant chunks
64
+ def retrieve_relevant_chunks(query, chunks, embeddings, model, top_k=3):
65
+ query_embedding = model.encode(query, convert_to_tensor=True)
66
+ similarities = util.cos_sim(query_embedding, embeddings)[0]
67
+ top_k = min(top_k, len(chunks))
68
+ top_indices = np.argsort(similarities.cpu().numpy())[-top_k:][::-1]
69
+ relevant_chunks = [chunks[i] for i in top_indices]
70
+ return relevant_chunks
71
+
72
+ # Function to summarize the agreement using Gemini
73
+ def summarize_agreement_with_gemini(text):
74
+ try:
75
+ # Create a prompt for summarization
76
+ prompt = f"Summarize the following text in 3-5 sentences:\n\n{text}\n\nSummary:"
77
+
78
+ # Send the prompt to the Gemini model
79
+ response = chat_session.send_message(prompt)
80
+
81
+ return response.text
82
+ except Exception as e:
83
+ return f"Error summarizing text with Gemini: {e}"
84
+
85
+ # Configure Tavily API
86
+ os.environ["TAVILY_API_KEY"] = tavily_api_key
87
+ web_search_tool = TavilySearchResults(k=3)
88
+
89
+ def generate_response_with_rag(query, pdf_path, state):
90
+ if "chunks" not in state or "embeddings" not in state or "embedding_model" not in state:
91
+ text = extract_text_from_pdf(pdf_path)
92
+ chunks = chunk_text(text)
93
+ embeddings, embedding_model = embed_chunks(chunks)
94
+ state["chunks"] = chunks
95
+ state["embeddings"] = embeddings
96
+ state["embedding_model"] = embedding_model
97
+ else:
98
+ chunks = state["chunks"]
99
+ embeddings = state["embeddings"]
100
+ embedding_model = state["embedding_model"]
101
+
102
+ # Retrieve relevant chunks based on the query
103
+ relevant_chunks = retrieve_relevant_chunks(query, chunks, embeddings, embedding_model, top_k=5) # Increase top_k
104
+
105
+ # Debug: Print relevant chunks
106
+ print(f"Relevant Chunks: {relevant_chunks}")
107
+
108
+ # Combine the relevant chunks into a single context
109
+ context = "\n\n".join(relevant_chunks)
110
+
111
+ # Debug: Print the context
112
+ print(f"Context from PDF: {context}")
113
+
114
+ # Create a prompt that instructs the model to answer only from the context
115
+ prompt = f"""
116
+ You are a helpful assistant that answers questions based on the provided context.
117
+ Use the context below to answer the question. If the context does not contain enough information to answer the question, respond with "I don't know."
118
+
119
+ **Context:**
120
+ {context}
121
+
122
+ **Question:**
123
+ {query}
124
+
125
+ **Answer:**
126
+ """
127
+
128
+ # Debug: Print the prompt
129
+ print(f"Prompt for Gemini: {prompt}")
130
+
131
+ # Send the prompt to the Gemini model
132
+ try:
133
+ response = chat_session.send_message(prompt)
134
+ initial_answer = response.text
135
+
136
+ # Check if the initial answer is "I don't know"
137
+ if "I don't know" in initial_answer or "i don't know" in initial_answer:
138
+ print("Initial answer is 'I don't know'. Performing web search...")
139
+ docs = web_search_tool.invoke({"query": query})
140
+ web_results = "\n".join([d["content"] for d in docs])
141
+ web_results = Document(page_content=web_results)
142
+
143
+ # Debug: Print web search results
144
+ print(f"Web Search Results: {web_results.page_content}")
145
+
146
+ # Create a prompt that instructs the model to answer from the web search results
147
+ web_prompt = f"""
148
+ You are a helpful assistant that answers questions based on the provided context.
149
+ The context below is from a web search. Use the context to answer the question. If the context does not contain enough information to answer the question, respond with "I don't know."
150
+
151
+ **Context:**
152
+ {web_results.page_content}
153
+
154
+ **Question:**
155
+ {query}
156
+
157
+ **Answer:**
158
+ """
159
+
160
+ # Debug: Print the prompt
161
+ print(f"Prompt for Gemini (Web Search): {web_prompt}")
162
+
163
+ # Send the prompt to the Gemini model
164
+ web_response = chat_session.send_message(web_prompt)
165
+ # Add a note indicating the answer is based on a web search
166
+ return f"{web_response.text}\n\n*Note: This answer is based on a web search.*"
167
+ else:
168
+ return initial_answer
169
+ except Exception as e:
170
+ return f"Error generating response: {e}"
171
+
172
+ # Function to send document to DocuSign
173
+ def send_to_docusign(file_path, recipient_email, recipient_name):
174
+ account_id = "184d0409-2626-4c48-98b5-d383b9854a47"
175
+ base_url = "https://demo.docusign.net/restapi"
176
+
177
+ with open(file_path, "rb") as file:
178
+ document_base64 = base64.b64encode(file.read()).decode()
179
+
180
+ envelope_definition = {
181
+ "emailSubject": "Please sign this document",
182
+ "documents": [
183
+ {
184
+ "documentId": "1",
185
+ "name": "document.pdf",
186
+ "fileExtension": "pdf",
187
+ "documentBase64": document_base64
188
+ }
189
+ ],
190
+ "recipients": {
191
+ "signers": [
192
+ {
193
+ "email": recipient_email,
194
+ "name": recipient_name,
195
+ "recipientId": "1",
196
+ "tabs": {
197
+ "signHereTabs": [
198
+ {
199
+ "documentId": "1",
200
+ "pageNumber": "1",
201
+ "xPosition": "100",
202
+ "yPosition": "100"
203
+ }
204
+ ]
205
+ }
206
+ }
207
+ ]
208
+ },
209
+ "status": "sent"
210
+ }
211
+
212
+ headers = {
213
+ "Authorization": f"Bearer {docusign_api_key}",
214
+ "Content-Type": "application/json"
215
+ }
216
+ try:
217
+ response = requests.post(
218
+ f"{base_url}/v2.1/accounts/{account_id}/envelopes",
219
+ headers=headers,
220
+ json=envelope_definition
221
+ )
222
+ response.raise_for_status()
223
+ return response.json()
224
+ except requests.exceptions.RequestException as e:
225
+ return {"error": str(e)}
226
+
227
+ # Function to process the agreement
228
+ def process_agreement(file, recipient_email, recipient_name, state):
229
+ try:
230
+ text = extract_text_from_pdf(file.name)
231
+ if text.startswith("Error"):
232
+ return text, {}, {}, state
233
+
234
+ # Use Gemini for summarization
235
+ summary = summarize_agreement_with_gemini(text)
236
+ if summary.startswith("Error"):
237
+ return summary, {}, {}, state
238
+
239
+ docusign_response = send_to_docusign(file.name, recipient_email, recipient_name)
240
+ if "error" in docusign_response:
241
+ return summary, {}, docusign_response, state
242
+
243
+ return summary, {}, docusign_response, state
244
+ except Exception as e:
245
+ return f"Error: {e}", {}, {}, state
246
+
247
+ # Gradio interface
248
+ def main_interface(file, recipient_email, recipient_name, question, state):
249
+ if file is not None:
250
+ state["file"] = file
251
+ state["text"] = extract_text_from_pdf(file.name)
252
+ state["chat_history"] = [] # Initialize chat history
253
+
254
+ summary_output = ""
255
+ docusign_output = {}
256
+ chatbot_output = ""
257
+
258
+ if "file" in state:
259
+ if recipient_email and recipient_name:
260
+ summary_output, _, docusign_output, state = process_agreement(state["file"], recipient_email, recipient_name, state)
261
+
262
+ if question:
263
+ chatbot_output = generate_response_with_rag(question, state["file"].name, state)
264
+ state["chat_history"].append((question, chatbot_output)) # Update chat history
265
+
266
+ return summary_output, docusign_output, chatbot_output, state
267
+
268
+ # CSS for styling
269
+ css = """
270
+ .gradio-container {
271
+ background-image: url('https://huggingface.co/spaces/Nadaazakaria/DocWise/resolve/main/DALL%C2%B7E%202025-01-26%2011.43.33%20-%20A%20futuristic%20and%20sleek%20magical%20animated%20GIF-style%20icon%20design%20for%20%27DocWise%27%2C%20representing%20knowledge%2C%20documents%2C%20and%20wisdom.%20The%20design%20includes%20a%20glow.jpg');
272
+ background-size: cover;
273
+ background-position: center;
274
+ background-repeat: no-repeat;
275
+ }
276
+
277
+ .gradio-container h1,
278
+ .gradio-container .tabs > .tab-nav > .tab-button {
279
+ color: #FFF5E1 !important;
280
+ text-shadow: 0 0 5px rgba(255, 245, 225, 0.5);
281
+ }
282
+
283
+ .tabs {
284
+ background-color: #f0f0f0 !important;
285
+ border-radius: 10px !important;
286
+ padding: 20px !important;
287
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important;
288
+ }
289
+
290
+ .tabs > .tab-nav {
291
+ background-color: #e0e0e0 !important;
292
+ border-radius: 5px !important;
293
+ margin-bottom: 15px !important;
294
+ }
295
+
296
+ .tabs > .tab-nav > .tab-button {
297
+ color: black !important;
298
+ font-weight: bold !important;
299
+ }
300
+
301
+ .tabs > .tab-nav > .tab-button.selected {
302
+ background-color: #d0d0d0 !important;
303
+ color: black !important;
304
+ }
305
+
306
+ #process-button, #chatbot-button {
307
+ background-color: white !important;
308
+ color: black !important;
309
+ border: 1px solid #ccc !important;
310
+ padding: 10px 20px !important;
311
+ border-radius: 5px !important;
312
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
313
+ transition: background-color 0.3s ease !important;
314
+ }
315
+
316
+ #process-button:hover, #chatbot-button:hover {
317
+ background-color: #f0f0f0 !important;
318
+ }
319
+ """
320
+
321
+ # Gradio app
322
+ with gr.Blocks(css=css) as app:
323
+ gr.Markdown(
324
+ """
325
+ <div style="text-align: center;">
326
+ <h1 id="main-title">
327
+ DocWise(Agreement Analyzer with Chatbot and Docusign Integration)
328
+ </h1>
329
+ </div>
330
+ """,
331
+ )
332
+
333
+ state = gr.State({})
334
+ file_input = gr.File(label="Upload Agreement (PDF)")
335
+
336
+ with gr.Tab("Agreement Processing", elem_id="agreement-tab"):
337
+ email_input = gr.Textbox(label="Recipient Email")
338
+ name_input = gr.Textbox(label="Recipient Name")
339
+ summary_output = gr.Textbox(label="Agreement Summary")
340
+ docusign_output = gr.JSON(label="DocuSign Response")
341
+ process_button = gr.Button("Process Agreement", elem_id="process-button")
342
+
343
+ with gr.Tab("Chatbot", elem_id="chatbot-tab"):
344
+ chatbot_question_input = gr.Textbox(label="Ask a Question")
345
+ chatbot_answer_output = gr.Textbox(label="Answer")
346
+ chatbot_button = gr.Button("Ask", elem_id="chatbot-button")
347
+
348
+ process_button.click(
349
+ main_interface,
350
+ inputs=[file_input, email_input, name_input, chatbot_question_input, state],
351
+ outputs=[summary_output, docusign_output, chatbot_answer_output, state]
352
+ )
353
+ chatbot_button.click(
354
+ main_interface,
355
+ inputs=[file_input, email_input, name_input, chatbot_question_input, state],
356
+ outputs=[summary_output, docusign_output, chatbot_answer_output, state]
357
+ )
358
+
359
+ app.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ requests
3
+ PyPDF2
4
+ transformers
5
+ torch
6
+ google-generativeai>=0.7.2
7
+ langchain-google-genai
8
+ faiss-cpu
9
+ langchain
10
+ langchain-community
11
+ langchain-core
12
+ sentence_transformers
13
+ tavily-python