File size: 2,890 Bytes
10b392a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# src/embedding_generator/embedder.py
from langchain_ollama import OllamaEmbeddings # cite: embed_pipeline.py, query_pipeline.py
from config.settings import OLLAMA_URL, EMBED_MODEL
import logging
from typing import List

logger = logging.getLogger(__name__)

class EmbeddingGenerator:
    """
    Manages the embedding model and generates embeddings.
    """
    def __init__(self):
        # Initialize the OllamaEmbeddings model
        # --- Financial Ministry Adaptation ---
        # Consider adding error handling for unreachable Ollama server.
        # For production, evaluate if Ollama is suitable or if a more robust/managed
        # embedding service is required based on load and reliability needs.
        # ------------------------------------
        try:
            self.embedder = OllamaEmbeddings(base_url=OLLAMA_URL, model=EMBED_MODEL) # cite: embed_pipeline.py, query_pipeline.py
            logger.info(f"Initialized embedding model: {EMBED_MODEL} at {OLLAMA_URL}")
        except Exception as e:
            logger.critical(f"Failed to initialize embedding model: {e}")
            # Depending on requirements, you might want to re-raise or exit
            raise e

    def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
        """
        Generates embeddings for a list of text inputs.

        Args:
            texts: A list of strings to embed.

        Returns:
            A list of embeddings (list of floats).
        """
        # --- Financial Ministry Adaptation ---
        # Implement retry logic for API calls to the embedding service.
        # Consider potential rate limits.
        # ------------------------------------
        try:
            embeddings = self.embedder.embed_documents(texts) # Used internally by add_documents, but good to have explicit method
            # If using embed_query for a single text:
            # embedding = self.embedder.embed_query(texts[0])
            logger.debug(f"Generated {len(embeddings)} embeddings.")
            return embeddings
        except Exception as e:
            logger.error(f"Failed to generate embeddings: {e}")
            raise e

    def generate_query_embedding(self, text: str) -> List[float]:
        """
        Generates an embedding for a single query text.

        Args:
            text: The query string.

        Returns:
            An embedding (list of floats).
        """
        # --- Financial Ministry Adaptation ---
        # Implement retry logic for API calls.
        # ------------------------------------
        try:
            embedding = self.embedder.embed_query(text) # cite: query_pipeline.py (implicitly used by retriever)
            logger.debug("Generated query embedding.")
            return embedding
        except Exception as e:
            logger.error(f"Failed to generate query embedding: {e}")
            raise e