Spaces:
Running
Running
# src/embedding_generator/embedder.py | |
from langchain_ollama import OllamaEmbeddings # cite: embed_pipeline.py, query_pipeline.py | |
from config.settings import OLLAMA_URL, EMBED_MODEL | |
import logging | |
from typing import List | |
logger = logging.getLogger(__name__) | |
class EmbeddingGenerator: | |
""" | |
Manages the embedding model and generates embeddings. | |
""" | |
def __init__(self): | |
# Initialize the OllamaEmbeddings model | |
# --- Financial Ministry Adaptation --- | |
# Consider adding error handling for unreachable Ollama server. | |
# For production, evaluate if Ollama is suitable or if a more robust/managed | |
# embedding service is required based on load and reliability needs. | |
# ------------------------------------ | |
try: | |
self.embedder = OllamaEmbeddings(base_url=OLLAMA_URL, model=EMBED_MODEL) # cite: embed_pipeline.py, query_pipeline.py | |
logger.info(f"Initialized embedding model: {EMBED_MODEL} at {OLLAMA_URL}") | |
except Exception as e: | |
logger.critical(f"Failed to initialize embedding model: {e}") | |
# Depending on requirements, you might want to re-raise or exit | |
raise e | |
def generate_embeddings(self, texts: List[str]) -> List[List[float]]: | |
""" | |
Generates embeddings for a list of text inputs. | |
Args: | |
texts: A list of strings to embed. | |
Returns: | |
A list of embeddings (list of floats). | |
""" | |
# --- Financial Ministry Adaptation --- | |
# Implement retry logic for API calls to the embedding service. | |
# Consider potential rate limits. | |
# ------------------------------------ | |
try: | |
embeddings = self.embedder.embed_documents(texts) # Used internally by add_documents, but good to have explicit method | |
# If using embed_query for a single text: | |
# embedding = self.embedder.embed_query(texts[0]) | |
logger.debug(f"Generated {len(embeddings)} embeddings.") | |
return embeddings | |
except Exception as e: | |
logger.error(f"Failed to generate embeddings: {e}") | |
raise e | |
def generate_query_embedding(self, text: str) -> List[float]: | |
""" | |
Generates an embedding for a single query text. | |
Args: | |
text: The query string. | |
Returns: | |
An embedding (list of floats). | |
""" | |
# --- Financial Ministry Adaptation --- | |
# Implement retry logic for API calls. | |
# ------------------------------------ | |
try: | |
embedding = self.embedder.embed_query(text) # cite: query_pipeline.py (implicitly used by retriever) | |
logger.debug("Generated query embedding.") | |
return embedding | |
except Exception as e: | |
logger.error(f"Failed to generate query embedding: {e}") | |
raise e |