Chandima Prabhath
Refactor code structure for improved readability and maintainability
10b392a
# src/embedding_generator/embedder.py
from langchain_ollama import OllamaEmbeddings # cite: embed_pipeline.py, query_pipeline.py
from config.settings import OLLAMA_URL, EMBED_MODEL
import logging
from typing import List
logger = logging.getLogger(__name__)
class EmbeddingGenerator:
"""
Manages the embedding model and generates embeddings.
"""
def __init__(self):
# Initialize the OllamaEmbeddings model
# --- Financial Ministry Adaptation ---
# Consider adding error handling for unreachable Ollama server.
# For production, evaluate if Ollama is suitable or if a more robust/managed
# embedding service is required based on load and reliability needs.
# ------------------------------------
try:
self.embedder = OllamaEmbeddings(base_url=OLLAMA_URL, model=EMBED_MODEL) # cite: embed_pipeline.py, query_pipeline.py
logger.info(f"Initialized embedding model: {EMBED_MODEL} at {OLLAMA_URL}")
except Exception as e:
logger.critical(f"Failed to initialize embedding model: {e}")
# Depending on requirements, you might want to re-raise or exit
raise e
def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
"""
Generates embeddings for a list of text inputs.
Args:
texts: A list of strings to embed.
Returns:
A list of embeddings (list of floats).
"""
# --- Financial Ministry Adaptation ---
# Implement retry logic for API calls to the embedding service.
# Consider potential rate limits.
# ------------------------------------
try:
embeddings = self.embedder.embed_documents(texts) # Used internally by add_documents, but good to have explicit method
# If using embed_query for a single text:
# embedding = self.embedder.embed_query(texts[0])
logger.debug(f"Generated {len(embeddings)} embeddings.")
return embeddings
except Exception as e:
logger.error(f"Failed to generate embeddings: {e}")
raise e
def generate_query_embedding(self, text: str) -> List[float]:
"""
Generates an embedding for a single query text.
Args:
text: The query string.
Returns:
An embedding (list of floats).
"""
# --- Financial Ministry Adaptation ---
# Implement retry logic for API calls.
# ------------------------------------
try:
embedding = self.embedder.embed_query(text) # cite: query_pipeline.py (implicitly used by retriever)
logger.debug("Generated query embedding.")
return embedding
except Exception as e:
logger.error(f"Failed to generate query embedding: {e}")
raise e