rag-tool

Sleeping

Chris4K commited on Jan 20

Commit

ea07eae

•

1 Parent(s): 42d7c62

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,13 +24,27 @@ print("-----------")
 text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
 vdocuments = text_splitter.split_documents(documents)
 # Extract the embedding arrays from the PDF documents
-embeddings = []
-for doc in vdocuments:
-    embeddings.extend(api_hf_embeddings.get_embeddings(doc))
 # Create Chroma vector store for API embeddings
-api_db = Chroma.from_documents(vdocuments, HfApiEmbeddingRetriever, collection_name="api-collection")
 # Define the PDF retrieval function
 def pdf_retrieval(query):

 text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
 vdocuments = text_splitter.split_documents(documents)
+model = "BAAI/bge-base-en-v1.5"
+encode_kwargs = {
+    "normalize_embeddings": True
+}  # set True to compute cosine similarity
+embeddings = HuggingFaceBgeEmbeddings(
+    model_name=model, encode_kwargs=encode_kwargs, model_kwargs={"device": "cpu"}
+)
+api_db = FAISS.from_texts(texts=vdocuments, embedding=embeddings)
+api_db.as_retriever.similarity("What is ICD?")
 # Extract the embedding arrays from the PDF documents
+#embeddings = []
+#for doc in vdocuments:
+#    embeddings.extend(api_hf_embeddings.get_embeddings(doc))
 # Create Chroma vector store for API embeddings
+#api_db = Chroma.from_documents(vdocuments, HfApiEmbeddingRetriever, collection_name="api-collection")
 # Define the PDF retrieval function
 def pdf_retrieval(query):