pdf-chatbot

Sleeping

App Files Files Community

MatteoScript commited on Jan 30

Commit

4091a1a

•

1 Parent(s): 2587659

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -6

app.py CHANGED Viewed

@@ -47,7 +47,7 @@ def load_doc(list_file_path, chunk_size, chunk_overlap):
 # Create vector database
 def create_db(splits, collection_name):
-    embedding = HuggingFaceEmbeddings()
     new_client = chromadb.EphemeralClient()
     vectordb = Chroma.from_documents(
         documents=splits,
@@ -61,7 +61,7 @@ def create_db(splits, collection_name):
 # Load vector database
 def load_db():
-    embedding = HuggingFaceEmbeddings()
     vectordb = Chroma(
         # persist_directory=default_persist_directory,
         embedding_function=embedding)
@@ -132,8 +132,8 @@ def initialize_database(list_file_obj, chunk_size, chunk_overlap, progress=gr.Pr
     #file_path = file_obj.name
     list_file_path = [x.name for x in list_file_obj if x is not None]
     collection_name = Path(list_file_path[0]).stem
-    # print('list_file_path: ', list_file_path)
-    # print('Collection name: ', collection_name)
     progress(0.25, desc="Loading document...")
     # Load document and create splits
     doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
@@ -174,8 +174,7 @@ def conversation(qa_chain, message, history):
     # Langchain sources are zero-based
     response_source1_page = response_sources[0].metadata["page"] + 1
     response_source2_page = response_sources[1].metadata["page"] + 1
-    # print ('chat response: ', response_answer)
-    # print('DB source', response_sources)
     # Append user message and response to chat history
     new_history = history + [(message, response_answer)]

 # Create vector database
 def create_db(splits, collection_name):
+    embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
     new_client = chromadb.EphemeralClient()
     vectordb = Chroma.from_documents(
         documents=splits,
 # Load vector database
 def load_db():
+    embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
     vectordb = Chroma(
         # persist_directory=default_persist_directory,
         embedding_function=embedding)
     #file_path = file_obj.name
     list_file_path = [x.name for x in list_file_obj if x is not None]
     collection_name = Path(list_file_path[0]).stem
+    print('list_file_path: ', list_file_path)
+    print('Collection name: ', collection_name)
     progress(0.25, desc="Loading document...")
     # Load document and create splits
     doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
     # Langchain sources are zero-based
     response_source1_page = response_sources[0].metadata["page"] + 1
     response_source2_page = response_sources[1].metadata["page"] + 1
+    print ('Response: ', response)
     # Append user message and response to chat history
     new_history = history + [(message, response_answer)]