Spaces:
Sleeping
Sleeping
Adrian Cowham
commited on
Commit
•
58660a4
1
Parent(s):
0c47d68
changed embeddings
Browse files- src/core/embedding.py +12 -23
src/core/embedding.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from typing import List, Type
|
2 |
|
3 |
from langchain.docstore.document import Document
|
4 |
-
from langchain.embeddings import
|
5 |
from langchain.embeddings.base import Embeddings
|
6 |
from langchain.vectorstores import VectorStore
|
7 |
from langchain.vectorstores.faiss import FAISS
|
@@ -50,27 +50,16 @@ class FolderIndex:
|
|
50 |
def embed_files(
|
51 |
files: List[File], embedding: str, vector_store: str, **kwargs
|
52 |
) -> FolderIndex:
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
if embedding in supported_embeddings:
|
65 |
-
_embeddings = supported_embeddings[embedding](**kwargs)
|
66 |
-
else:
|
67 |
-
raise NotImplementedError(f"Embedding {embedding} not supported.")
|
68 |
-
|
69 |
-
if vector_store in supported_vector_stores:
|
70 |
-
_vector_store = supported_vector_stores[vector_store]
|
71 |
-
else:
|
72 |
-
raise NotImplementedError(f"Vector store {vector_store} not supported.")
|
73 |
-
|
74 |
return FolderIndex.from_files(
|
75 |
-
files=files, embeddings=
|
76 |
)
|
|
|
1 |
from typing import List, Type
|
2 |
|
3 |
from langchain.docstore.document import Document
|
4 |
+
from langchain.embeddings import HuggingFaceBgeEmbeddings
|
5 |
from langchain.embeddings.base import Embeddings
|
6 |
from langchain.vectorstores import VectorStore
|
7 |
from langchain.vectorstores.faiss import FAISS
|
|
|
50 |
def embed_files(
|
51 |
files: List[File], embedding: str, vector_store: str, **kwargs
|
52 |
) -> FolderIndex:
|
53 |
+
model_name = "BAAI/bge-small-en"
|
54 |
+
model_kwargs = {'device': 'mps'}
|
55 |
+
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
|
56 |
+
model_norm = HuggingFaceBgeEmbeddings(
|
57 |
+
model_name=model_name,
|
58 |
+
model_kwargs=model_kwargs,
|
59 |
+
encode_kwargs=encode_kwargs
|
60 |
+
)
|
61 |
+
# embeddings = OpenAIEmbeddings
|
62 |
+
embeddings = model_norm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
return FolderIndex.from_files(
|
64 |
+
files=files, embeddings=embeddings, vector_store=FAISS
|
65 |
)
|