Shreyas094 commited on
Commit
c8302a1
·
verified ·
1 Parent(s): 9a7af34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -11
app.py CHANGED
@@ -22,8 +22,13 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
22
  from langchain_community.llms import HuggingFaceHub
23
  from langchain_core.documents import Document
24
  from sentence_transformers import SentenceTransformer
 
 
 
 
25
 
26
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 
27
 
28
  # Load SentenceTransformer model
29
  sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
@@ -108,12 +113,28 @@ class EnhancedContextDrivenChatbot:
108
 
109
  return contextualized_question, topics, self.entity_tracker
110
 
111
- def load_document(file: NamedTemporaryFile) -> List[Document]:
 
 
 
 
 
 
 
 
 
112
  """Loads and splits the document into pages."""
113
- loader = PyPDFLoader(file.name)
114
- return loader.load_and_split()
 
 
 
 
 
 
 
115
 
116
- def update_vectors(files):
117
  if not files:
118
  return "Please upload at least one PDF file."
119
 
@@ -122,7 +143,7 @@ def update_vectors(files):
122
 
123
  all_data = []
124
  for file in files:
125
- data = load_document(file)
126
  all_data.extend(data)
127
  total_chunks += len(data)
128
 
@@ -134,7 +155,7 @@ def update_vectors(files):
134
 
135
  database.save_local("faiss_database")
136
 
137
- return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files."
138
 
139
  def get_embeddings():
140
  return HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
@@ -410,17 +431,17 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
410
 
411
  return "An unexpected error occurred. Please try again later."
412
 
413
- # Gradio interface
414
  # Gradio interface
415
  with gr.Blocks() as demo:
416
- gr.Markdown("# Context-Driven Conversational Chatbot")
417
 
418
  with gr.Row():
419
  file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
 
420
  update_button = gr.Button("Upload PDF")
421
 
422
  update_output = gr.Textbox(label="Update Status")
423
- update_button.click(update_vectors, inputs=[file_input], outputs=update_output)
424
 
425
  with gr.Row():
426
  with gr.Column(scale=2):
@@ -433,10 +454,10 @@ with gr.Blocks() as demo:
433
  repetition_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.0, step=0.1)
434
  web_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False)
435
 
436
- context_driven_chatbot = EnhancedContextDrivenChatbot()
437
 
438
  def chat(question, history, temperature, top_p, repetition_penalty, web_search):
439
- answer = ask_question(question, temperature, top_p, repetition_penalty, web_search, context_driven_chatbot)
440
  history.append((question, answer))
441
  return "", history
442
 
 
22
  from langchain_community.llms import HuggingFaceHub
23
  from langchain_core.documents import Document
24
  from sentence_transformers import SentenceTransformer
25
+ import nest_asyncio
26
+ from llama_parse import LlamaParse
27
+
28
+ nest_asyncio.apply()
29
 
30
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
31
+ llama_cloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
32
 
33
  # Load SentenceTransformer model
34
  sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
 
113
 
114
  return contextualized_question, topics, self.entity_tracker
115
 
116
+ # Initialize LlamaParse
117
+ llama_parser = LlamaParse(
118
+ api_key=llama_cloud_api_key,
119
+ result_type="markdown",
120
+ num_workers=4,
121
+ verbose=True,
122
+ language="en",
123
+ )
124
+
125
+ def load_document(file: NamedTemporaryFile, parser: str = "pypdf") -> List[Document]:
126
  """Loads and splits the document into pages."""
127
+ if parser == "pypdf":
128
+ loader = PyPDFLoader(file.name)
129
+ return loader.load_and_split()
130
+ elif parser == "llamaparse":
131
+ documents = llama_parser.load_data(file.name)
132
+ # Convert LlamaParse output to langchain Document format
133
+ return [Document(page_content=doc.text, metadata={"source": file.name}) for doc in documents]
134
+ else:
135
+ raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
136
 
137
+ def update_vectors(files, parser):
138
  if not files:
139
  return "Please upload at least one PDF file."
140
 
 
143
 
144
  all_data = []
145
  for file in files:
146
+ data = load_document(file, parser)
147
  all_data.extend(data)
148
  total_chunks += len(data)
149
 
 
155
 
156
  database.save_local("faiss_database")
157
 
158
+ return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
159
 
160
  def get_embeddings():
161
  return HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
 
431
 
432
  return "An unexpected error occurred. Please try again later."
433
 
 
434
  # Gradio interface
435
  with gr.Blocks() as demo:
436
+ gr.Markdown("# Enhanced Context-Driven Conversational Chatbot")
437
 
438
  with gr.Row():
439
  file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
440
+ parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="pypdf")
441
  update_button = gr.Button("Upload PDF")
442
 
443
  update_output = gr.Textbox(label="Update Status")
444
+ update_button.click(update_vectors, inputs=[file_input, parser_dropdown], outputs=update_output)
445
 
446
  with gr.Row():
447
  with gr.Column(scale=2):
 
454
  repetition_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.0, step=0.1)
455
  web_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False)
456
 
457
+ enhanced_context_driven_chatbot = EnhancedContextDrivenChatbot()
458
 
459
  def chat(question, history, temperature, top_p, repetition_penalty, web_search):
460
+ answer = ask_question(question, temperature, top_p, repetition_penalty, web_search, enhanced_context_driven_chatbot)
461
  history.append((question, answer))
462
  return "", history
463