Spaces:
Paused
Paused
Shreyas094
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -22,8 +22,13 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
22 |
from langchain_community.llms import HuggingFaceHub
|
23 |
from langchain_core.documents import Document
|
24 |
from sentence_transformers import SentenceTransformer
|
|
|
|
|
|
|
|
|
25 |
|
26 |
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
|
|
27 |
|
28 |
# Load SentenceTransformer model
|
29 |
sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
|
@@ -108,12 +113,28 @@ class EnhancedContextDrivenChatbot:
|
|
108 |
|
109 |
return contextualized_question, topics, self.entity_tracker
|
110 |
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
"""Loads and splits the document into pages."""
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
-
def update_vectors(files):
|
117 |
if not files:
|
118 |
return "Please upload at least one PDF file."
|
119 |
|
@@ -122,7 +143,7 @@ def update_vectors(files):
|
|
122 |
|
123 |
all_data = []
|
124 |
for file in files:
|
125 |
-
data = load_document(file)
|
126 |
all_data.extend(data)
|
127 |
total_chunks += len(data)
|
128 |
|
@@ -134,7 +155,7 @@ def update_vectors(files):
|
|
134 |
|
135 |
database.save_local("faiss_database")
|
136 |
|
137 |
-
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files."
|
138 |
|
139 |
def get_embeddings():
|
140 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
@@ -410,17 +431,17 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
410 |
|
411 |
return "An unexpected error occurred. Please try again later."
|
412 |
|
413 |
-
# Gradio interface
|
414 |
# Gradio interface
|
415 |
with gr.Blocks() as demo:
|
416 |
-
gr.Markdown("# Context-Driven Conversational Chatbot")
|
417 |
|
418 |
with gr.Row():
|
419 |
file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
|
|
|
420 |
update_button = gr.Button("Upload PDF")
|
421 |
|
422 |
update_output = gr.Textbox(label="Update Status")
|
423 |
-
update_button.click(update_vectors, inputs=[file_input], outputs=update_output)
|
424 |
|
425 |
with gr.Row():
|
426 |
with gr.Column(scale=2):
|
@@ -433,10 +454,10 @@ with gr.Blocks() as demo:
|
|
433 |
repetition_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.0, step=0.1)
|
434 |
web_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False)
|
435 |
|
436 |
-
|
437 |
|
438 |
def chat(question, history, temperature, top_p, repetition_penalty, web_search):
|
439 |
-
answer = ask_question(question, temperature, top_p, repetition_penalty, web_search,
|
440 |
history.append((question, answer))
|
441 |
return "", history
|
442 |
|
|
|
22 |
from langchain_community.llms import HuggingFaceHub
|
23 |
from langchain_core.documents import Document
|
24 |
from sentence_transformers import SentenceTransformer
|
25 |
+
import nest_asyncio
|
26 |
+
from llama_parse import LlamaParse
|
27 |
+
|
28 |
+
nest_asyncio.apply()
|
29 |
|
30 |
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
31 |
+
llama_cloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
|
32 |
|
33 |
# Load SentenceTransformer model
|
34 |
sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
|
|
|
113 |
|
114 |
return contextualized_question, topics, self.entity_tracker
|
115 |
|
116 |
+
# Initialize LlamaParse
|
117 |
+
llama_parser = LlamaParse(
|
118 |
+
api_key=llama_cloud_api_key,
|
119 |
+
result_type="markdown",
|
120 |
+
num_workers=4,
|
121 |
+
verbose=True,
|
122 |
+
language="en",
|
123 |
+
)
|
124 |
+
|
125 |
+
def load_document(file: NamedTemporaryFile, parser: str = "pypdf") -> List[Document]:
|
126 |
"""Loads and splits the document into pages."""
|
127 |
+
if parser == "pypdf":
|
128 |
+
loader = PyPDFLoader(file.name)
|
129 |
+
return loader.load_and_split()
|
130 |
+
elif parser == "llamaparse":
|
131 |
+
documents = llama_parser.load_data(file.name)
|
132 |
+
# Convert LlamaParse output to langchain Document format
|
133 |
+
return [Document(page_content=doc.text, metadata={"source": file.name}) for doc in documents]
|
134 |
+
else:
|
135 |
+
raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
|
136 |
|
137 |
+
def update_vectors(files, parser):
|
138 |
if not files:
|
139 |
return "Please upload at least one PDF file."
|
140 |
|
|
|
143 |
|
144 |
all_data = []
|
145 |
for file in files:
|
146 |
+
data = load_document(file, parser)
|
147 |
all_data.extend(data)
|
148 |
total_chunks += len(data)
|
149 |
|
|
|
155 |
|
156 |
database.save_local("faiss_database")
|
157 |
|
158 |
+
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
|
159 |
|
160 |
def get_embeddings():
|
161 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
|
|
431 |
|
432 |
return "An unexpected error occurred. Please try again later."
|
433 |
|
|
|
434 |
# Gradio interface
|
435 |
with gr.Blocks() as demo:
|
436 |
+
gr.Markdown("# Enhanced Context-Driven Conversational Chatbot")
|
437 |
|
438 |
with gr.Row():
|
439 |
file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
|
440 |
+
parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="pypdf")
|
441 |
update_button = gr.Button("Upload PDF")
|
442 |
|
443 |
update_output = gr.Textbox(label="Update Status")
|
444 |
+
update_button.click(update_vectors, inputs=[file_input, parser_dropdown], outputs=update_output)
|
445 |
|
446 |
with gr.Row():
|
447 |
with gr.Column(scale=2):
|
|
|
454 |
repetition_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.0, step=0.1)
|
455 |
web_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False)
|
456 |
|
457 |
+
enhanced_context_driven_chatbot = EnhancedContextDrivenChatbot()
|
458 |
|
459 |
def chat(question, history, temperature, top_p, repetition_penalty, web_search):
|
460 |
+
answer = ask_question(question, temperature, top_p, repetition_penalty, web_search, enhanced_context_driven_chatbot)
|
461 |
history.append((question, answer))
|
462 |
return "", history
|
463 |
|