# edullm_rag_pipeline.py # ========================= # 📦 IMPORTACIONES # ========================= import os from typing import Union from dotenv import load_dotenv from loguru import logger from pydantic import BaseModel from core.pipeline.utils import limitar_contexto, limpiar_contexto_bruto, validar_input from core.vectorstore.embeddings import EmbeddingManager from core.vectorstore.distance_strategy import DistanceStrategyManager from core.vectorstore.vectorstore_manager import VectorStoreManager from core.llm.llm_manager import LLMManager # ========================= # ⚙️ CONFIGURACIÓN INICIAL # ========================= load_dotenv(dotenv_path="config/.env") VECTORSTORE_PATH = os.getenv("VECTORSTORE_PATH", "docs/") VECTORSTORE_NAME = os.getenv("VECTORSTORE_NAME", "edullm_store") # ========================= # 🚀 INICIALIZACIÓN DE COMPONENTES # ========================= embeddings = EmbeddingManager.get_embeddings() strategy_mgr = DistanceStrategyManager() vector_mgr = VectorStoreManager(path=VECTORSTORE_PATH, name=VECTORSTORE_NAME) llm_manager = LLMManager() # ========================= # 📄 MODELOS # ========================= class Documento(BaseModel): contenido: str fuente: str puntaje: float # ========================= # 🛠️ FUNCIONES UTILITARIAS # ========================= def init_vectorstore(force_rebuild: bool = False): """Inicializa o reconstruye el vectorstore si es necesario.""" if force_rebuild or not vector_mgr.exist_vectorstore(): vector_mgr.create_vectorstore() # ========================= # 🎯 PIPELINE PRINCIPAL # ========================= def edullm_rag_pipeline( input_data: Union[str, bytes], top_k: int = 4, search_type: str = "similarity" ) -> str: """Pipeline RAG para procesamiento multimodal y generación de respuesta educativa.""" if not validar_input(input_data): logger.error("❌ Entrada inválida. Debes proporcionar texto o imagen válida.") return "Error: Entrada no válida." if isinstance(input_data, bytes): return llm_manager.generate_response( user_query="Procesa la imagen adjunta y responde según el contexto educativo.", image=input_data, ) retriever = vector_mgr.as_retriever(search_type=search_type, k=top_k) docs = retriever.invoke(input_data) if not docs: contexto_final = "No se encontró contexto relevante." logger.warning("⚠️ Sin resultados en FAISS para la consulta.") else: contexto_bruto = "\n\n".join(d.page_content for d in docs) contexto_limpio = limpiar_contexto_bruto(contexto_bruto) contexto_final = limitar_contexto(contexto_limpio) return llm_manager.generate_response(user_query=input_data, context=contexto_final)