Chris4K commited on
Commit
ea07eae
1 Parent(s): 42d7c62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -4
app.py CHANGED
@@ -24,13 +24,27 @@ print("-----------")
24
  text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
25
  vdocuments = text_splitter.split_documents(documents)
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # Extract the embedding arrays from the PDF documents
28
- embeddings = []
29
- for doc in vdocuments:
30
- embeddings.extend(api_hf_embeddings.get_embeddings(doc))
31
 
32
  # Create Chroma vector store for API embeddings
33
- api_db = Chroma.from_documents(vdocuments, HfApiEmbeddingRetriever, collection_name="api-collection")
34
 
35
  # Define the PDF retrieval function
36
  def pdf_retrieval(query):
 
24
  text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
25
  vdocuments = text_splitter.split_documents(documents)
26
 
27
+
28
+
29
+
30
+ model = "BAAI/bge-base-en-v1.5"
31
+ encode_kwargs = {
32
+ "normalize_embeddings": True
33
+ } # set True to compute cosine similarity
34
+ embeddings = HuggingFaceBgeEmbeddings(
35
+ model_name=model, encode_kwargs=encode_kwargs, model_kwargs={"device": "cpu"}
36
+ )
37
+ api_db = FAISS.from_texts(texts=vdocuments, embedding=embeddings)
38
+ api_db.as_retriever.similarity("What is ICD?")
39
+
40
+
41
  # Extract the embedding arrays from the PDF documents
42
+ #embeddings = []
43
+ #for doc in vdocuments:
44
+ # embeddings.extend(api_hf_embeddings.get_embeddings(doc))
45
 
46
  # Create Chroma vector store for API embeddings
47
+ #api_db = Chroma.from_documents(vdocuments, HfApiEmbeddingRetriever, collection_name="api-collection")
48
 
49
  # Define the PDF retrieval function
50
  def pdf_retrieval(query):