Adrian Cowham commited on
Commit
58660a4
1 Parent(s): 0c47d68

changed embeddings

Browse files
Files changed (1) hide show
  1. src/core/embedding.py +12 -23
src/core/embedding.py CHANGED
@@ -1,7 +1,7 @@
1
  from typing import List, Type
2
 
3
  from langchain.docstore.document import Document
4
- from langchain.embeddings import OpenAIEmbeddings
5
  from langchain.embeddings.base import Embeddings
6
  from langchain.vectorstores import VectorStore
7
  from langchain.vectorstores.faiss import FAISS
@@ -50,27 +50,16 @@ class FolderIndex:
50
  def embed_files(
51
  files: List[File], embedding: str, vector_store: str, **kwargs
52
  ) -> FolderIndex:
53
- """Embeds a collection of files and stores them in a FolderIndex."""
54
-
55
- supported_embeddings: dict[str, Type[Embeddings]] = {
56
- "openai": OpenAIEmbeddings,
57
- "debug": FakeEmbeddings,
58
- }
59
- supported_vector_stores: dict[str, Type[VectorStore]] = {
60
- "faiss": FAISS,
61
- "debug": FakeVectorStore,
62
- }
63
-
64
- if embedding in supported_embeddings:
65
- _embeddings = supported_embeddings[embedding](**kwargs)
66
- else:
67
- raise NotImplementedError(f"Embedding {embedding} not supported.")
68
-
69
- if vector_store in supported_vector_stores:
70
- _vector_store = supported_vector_stores[vector_store]
71
- else:
72
- raise NotImplementedError(f"Vector store {vector_store} not supported.")
73
-
74
  return FolderIndex.from_files(
75
- files=files, embeddings=_embeddings, vector_store=_vector_store
76
  )
 
1
  from typing import List, Type
2
 
3
  from langchain.docstore.document import Document
4
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
5
  from langchain.embeddings.base import Embeddings
6
  from langchain.vectorstores import VectorStore
7
  from langchain.vectorstores.faiss import FAISS
 
50
  def embed_files(
51
  files: List[File], embedding: str, vector_store: str, **kwargs
52
  ) -> FolderIndex:
53
+ model_name = "BAAI/bge-small-en"
54
+ model_kwargs = {'device': 'mps'}
55
+ encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
56
+ model_norm = HuggingFaceBgeEmbeddings(
57
+ model_name=model_name,
58
+ model_kwargs=model_kwargs,
59
+ encode_kwargs=encode_kwargs
60
+ )
61
+ # embeddings = OpenAIEmbeddings
62
+ embeddings = model_norm
 
 
 
 
 
 
 
 
 
 
 
63
  return FolderIndex.from_files(
64
+ files=files, embeddings=embeddings, vector_store=FAISS
65
  )