| | import os |
| | from typing import Any |
| |
|
| | from dotenv import find_dotenv, load_dotenv |
| | from huggingface_hub import login |
| | from llama_index.core import VectorStoreIndex |
| | from llama_index.embeddings.huggingface import HuggingFaceEmbedding |
| | from llama_index.vector_stores.milvus import MilvusVectorStore |
| |
|
| | from src.agent_hackathon.consts import PROJECT_ROOT_DIR |
| | from src.agent_hackathon.logger import get_logger |
| |
|
| | logger = get_logger(log_name="query_vector_db", log_dir=PROJECT_ROOT_DIR / "logs") |
| |
|
| |
|
| | class RetrieverEngineBuilder: |
| | """ |
| | Handles the creation of a query engine for a vector database using HuggingFace and LlamaIndex. |
| | """ |
| |
|
| | def __init__( |
| | self, |
| | hf_token_env: str = "HF_TOKEN", |
| | embedding_model: str = "Qwen/Qwen3-Embedding-0.6B", |
| | vector_store: MilvusVectorStore = None, |
| | device: str = "cpu", |
| | ) -> None: |
| | """ |
| | Initialize the QueryEngineBuilder. |
| | |
| | Args: |
| | hf_token_env: Environment variable name for HuggingFace token. |
| | embedding_model: Name of the embedding model. |
| | vector_store: An instance of MilvusVectorStore. |
| | device: Device to run the embedding model on. |
| | """ |
| | self.hf_token_env = hf_token_env |
| | self.embedding_model = embedding_model |
| | self.vector_store = vector_store |
| | self.device = device |
| |
|
| | logger.info("Initializing RetrieverEngineBuilder.") |
| | |
| | |
| |
|
| | self.embed_model = HuggingFaceEmbedding( |
| | model_name=self.embedding_model, device=self.device |
| | ) |
| | logger.info("RetrieverEngineBuilder initialized.") |
| |
|
| | def _login_huggingface(self) -> None: |
| | """Login to HuggingFace using the token from environment variable.""" |
| | logger.info("Logging in to HuggingFace.") |
| | login(token=os.getenv(key=self.hf_token_env)) |
| | logger.info("Logged in to HuggingFace.") |
| |
|
| | def _load_env(self) -> None: |
| | """Load environment variables from .env file.""" |
| | logger.info("Loading environment variables.") |
| | _ = load_dotenv(dotenv_path=find_dotenv(raise_error_if_not_found=False)) |
| | logger.info("Environment variables loaded.") |
| |
|
| | def build_retriever_engine(self) -> Any: |
| | """ |
| | Build and return the retriever engine. |
| | |
| | Returns: |
| | Retriever engine object. |
| | """ |
| | logger.info("Building retriever engine.") |
| | index = VectorStoreIndex.from_vector_store( |
| | vector_store=self.vector_store, embed_model=self.embed_model |
| | ) |
| | retriever = index.as_retriever( |
| | vector_store_query_mode="hybrid", |
| | similarity_top_k=5, |
| | ) |
| | logger.info("Retriever engine built.") |
| | return retriever |
| |
|