Spaces:
Sleeping
Sleeping
File size: 1,614 Bytes
e8051be |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
"""
Embedding Management Module for Advanced RAG
Handles text encoding and embedding operations.
"""
import asyncio
from typing import List
from sentence_transformers import SentenceTransformer
from config.config import EMBEDDING_MODEL
class EmbeddingManager:
"""Manages text embeddings for RAG operations."""
def __init__(self):
"""Initialize the embedding manager."""
self.embedding_model = None
self._init_embedding_model()
def _init_embedding_model(self):
"""Initialize the embedding model."""
print(f"🔄 Loading embedding model: {EMBEDDING_MODEL}")
self.embedding_model = SentenceTransformer(EMBEDDING_MODEL)
print(f"✅ Embedding model loaded successfully")
async def encode_query(self, query: str) -> List[float]:
"""Encode a query into embeddings."""
def encode_sync():
embedding = self.embedding_model.encode([query], normalize_embeddings=True)
return embedding[0].astype("float32").tolist()
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, encode_sync)
async def encode_texts(self, texts: List[str]) -> List[List[float]]:
"""Encode multiple texts into embeddings."""
def encode_sync():
embeddings = self.embedding_model.encode(texts, normalize_embeddings=True)
return [emb.astype("float32").tolist() for emb in embeddings]
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, encode_sync)
|