Michael Feil
commited on
Commit
·
b43a465
1
Parent(s):
6840107
infinity: Update embedding_model.py (#1109)
Browse files### What problem does this PR solve?
I implemented infinity, a fast vector embeddings engine.
### Type of change
- [x] Performance Improvement
- [X] Other (please describe):
- rag/llm/embedding_model.py +42 -1
rag/llm/embedding_model.py
CHANGED
@@ -26,6 +26,7 @@ import dashscope
|
|
26 |
from openai import OpenAI
|
27 |
from FlagEmbedding import FlagModel
|
28 |
import torch
|
|
|
29 |
import numpy as np
|
30 |
|
31 |
from api.utils.file_utils import get_home_cache_dir
|
@@ -304,4 +305,44 @@ class JinaEmbed(Base):
|
|
304 |
|
305 |
def encode_queries(self, text):
|
306 |
embds, cnt = self.encode([text])
|
307 |
-
return np.array(embds[0]), cnt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
from openai import OpenAI
|
27 |
from FlagEmbedding import FlagModel
|
28 |
import torch
|
29 |
+
import asyncio
|
30 |
import numpy as np
|
31 |
|
32 |
from api.utils.file_utils import get_home_cache_dir
|
|
|
305 |
|
306 |
def encode_queries(self, text):
|
307 |
embds, cnt = self.encode([text])
|
308 |
+
return np.array(embds[0]), cnt
|
309 |
+
|
310 |
+
|
311 |
+
class InfinityEmbed(Base):
|
312 |
+
_model = None
|
313 |
+
|
314 |
+
def __init__(
|
315 |
+
self,
|
316 |
+
model_names: list[str] = ("BAAI/bge-small-en-v1.5",),
|
317 |
+
engine_kwargs: dict = {},
|
318 |
+
key = None,
|
319 |
+
):
|
320 |
+
|
321 |
+
from infinity_emb import EngineArgs
|
322 |
+
from infinity_emb.engine import AsyncEngineArray
|
323 |
+
|
324 |
+
self._default_model = model_names[0]
|
325 |
+
self.engine_array = AsyncEngineArray.from_args([EngineArgs(model_name_or_path = model_name, **engine_kwargs) for model_name in model_names])
|
326 |
+
|
327 |
+
async def _embed(self, sentences: list[str], model_name: str = ""):
|
328 |
+
if not model_name:
|
329 |
+
model_name = self._default_model
|
330 |
+
engine = self.engine_array[model_name]
|
331 |
+
was_already_running = engine.is_running
|
332 |
+
if not was_already_running:
|
333 |
+
await engine.astart()
|
334 |
+
embeddings, usage = await engine.embed(sentences=sentences)
|
335 |
+
if not was_already_running:
|
336 |
+
await engine.astop()
|
337 |
+
return embeddings, usage
|
338 |
+
|
339 |
+
def encode(self, texts: list[str], model_name: str = "") -> tuple[np.ndarray, int]:
|
340 |
+
# Using the internal tokenizer to encode the texts and get the total
|
341 |
+
# number of tokens
|
342 |
+
embeddings, usage = asyncio.run(self._embed(texts, model_name))
|
343 |
+
return np.array(embeddings), usage
|
344 |
+
|
345 |
+
def encode_queries(self, text: str) -> tuple[np.ndarray, int]:
|
346 |
+
# Using the internal tokenizer to encode the texts and get the total
|
347 |
+
# number of tokens
|
348 |
+
return self.encode([text])
|