Shreyas094
commited on
Commit
•
d52f389
1
Parent(s):
978efd2
Update app.py
Browse files
app.py
CHANGED
@@ -66,17 +66,30 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
|
|
66 |
def get_embeddings():
|
67 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
def update_vectors(files, parser):
|
70 |
global uploaded_documents
|
71 |
logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
|
72 |
|
73 |
if not files:
|
74 |
logging.warning("No files provided for update_vectors")
|
75 |
-
return "Please upload at least one PDF file.",
|
76 |
-
choices=[doc["name"] for doc in uploaded_documents],
|
77 |
-
value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
|
78 |
-
label="Select documents to query"
|
79 |
-
)
|
80 |
|
81 |
embed = get_embeddings()
|
82 |
total_chunks = 0
|
@@ -89,7 +102,6 @@ def update_vectors(files, parser):
|
|
89 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
90 |
all_data.extend(data)
|
91 |
total_chunks += len(data)
|
92 |
-
# Append new documents instead of replacing
|
93 |
if not any(doc["name"] == file.name for doc in uploaded_documents):
|
94 |
uploaded_documents.append({"name": file.name, "selected": True})
|
95 |
logging.info(f"Added new document to uploaded_documents: {file.name}")
|
@@ -110,12 +122,11 @@ def update_vectors(files, parser):
|
|
110 |
|
111 |
database.save_local("faiss_database")
|
112 |
logging.info("FAISS database saved")
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
)
|
119 |
|
120 |
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
|
121 |
print(f"Starting generate_chunked_response with {num_calls} calls")
|
@@ -528,6 +539,12 @@ def display_documents():
|
|
528 |
label="Select documents to query"
|
529 |
)
|
530 |
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
def initial_conversation():
|
532 |
return [
|
533 |
(None, "Welcome! I'm your AI assistant for web search and PDF analysis. Here's how you can use me:\n\n"
|
@@ -539,7 +556,7 @@ def initial_conversation():
|
|
539 |
]
|
540 |
|
541 |
# Define the checkbox outside the demo block
|
542 |
-
document_selector =
|
543 |
|
544 |
use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
545 |
|
@@ -603,6 +620,7 @@ with demo:
|
|
603 |
file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
|
604 |
parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
|
605 |
update_button = gr.Button("Upload Document")
|
|
|
606 |
|
607 |
update_output = gr.Textbox(label="Update Status")
|
608 |
|
@@ -610,6 +628,11 @@ with demo:
|
|
610 |
update_button.click(update_vectors,
|
611 |
inputs=[file_input, parser_dropdown],
|
612 |
outputs=[update_output, document_selector])
|
|
|
|
|
|
|
|
|
|
|
613 |
|
614 |
gr.Markdown(
|
615 |
"""
|
|
|
66 |
def get_embeddings():
|
67 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
68 |
|
69 |
+
# Add this at the beginning of your script, after imports
|
70 |
+
DOCUMENTS_FILE = "uploaded_documents.json"
|
71 |
+
|
72 |
+
def load_documents():
|
73 |
+
if os.path.exists(DOCUMENTS_FILE):
|
74 |
+
with open(DOCUMENTS_FILE, "r") as f:
|
75 |
+
return json.load(f)
|
76 |
+
return []
|
77 |
+
|
78 |
+
def save_documents(documents):
|
79 |
+
with open(DOCUMENTS_FILE, "w") as f:
|
80 |
+
json.dump(documents, f)
|
81 |
+
|
82 |
+
# Replace the global uploaded_documents with this
|
83 |
+
uploaded_documents = load_documents()
|
84 |
+
|
85 |
+
# Modify the update_vectors function
|
86 |
def update_vectors(files, parser):
|
87 |
global uploaded_documents
|
88 |
logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
|
89 |
|
90 |
if not files:
|
91 |
logging.warning("No files provided for update_vectors")
|
92 |
+
return "Please upload at least one PDF file.", display_documents()
|
|
|
|
|
|
|
|
|
93 |
|
94 |
embed = get_embeddings()
|
95 |
total_chunks = 0
|
|
|
102 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
103 |
all_data.extend(data)
|
104 |
total_chunks += len(data)
|
|
|
105 |
if not any(doc["name"] == file.name for doc in uploaded_documents):
|
106 |
uploaded_documents.append({"name": file.name, "selected": True})
|
107 |
logging.info(f"Added new document to uploaded_documents: {file.name}")
|
|
|
122 |
|
123 |
database.save_local("faiss_database")
|
124 |
logging.info("FAISS database saved")
|
125 |
+
|
126 |
+
# Save the updated list of documents
|
127 |
+
save_documents(uploaded_documents)
|
128 |
+
|
129 |
+
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
|
|
|
130 |
|
131 |
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
|
132 |
print(f"Starting generate_chunked_response with {num_calls} calls")
|
|
|
539 |
label="Select documents to query"
|
540 |
)
|
541 |
|
542 |
+
# Add this new function
|
543 |
+
def refresh_documents():
|
544 |
+
global uploaded_documents
|
545 |
+
uploaded_documents = load_documents()
|
546 |
+
return display_documents()
|
547 |
+
|
548 |
def initial_conversation():
|
549 |
return [
|
550 |
(None, "Welcome! I'm your AI assistant for web search and PDF analysis. Here's how you can use me:\n\n"
|
|
|
556 |
]
|
557 |
|
558 |
# Define the checkbox outside the demo block
|
559 |
+
document_selector = display_documents()
|
560 |
|
561 |
use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
562 |
|
|
|
620 |
file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
|
621 |
parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
|
622 |
update_button = gr.Button("Upload Document")
|
623 |
+
refresh_button = gr.Button("Refresh Document List")
|
624 |
|
625 |
update_output = gr.Textbox(label="Update Status")
|
626 |
|
|
|
628 |
update_button.click(update_vectors,
|
629 |
inputs=[file_input, parser_dropdown],
|
630 |
outputs=[update_output, document_selector])
|
631 |
+
|
632 |
+
# Add the refresh button functionality
|
633 |
+
refresh_button.click(refresh_documents,
|
634 |
+
inputs=[],
|
635 |
+
outputs=[document_selector])
|
636 |
|
637 |
gr.Markdown(
|
638 |
"""
|