Spaces:
Running
Running
# edullm_rag_pipeline.py | |
# ========================= | |
# 📦 IMPORTACIONES | |
# ========================= | |
import os | |
from typing import Union | |
from dotenv import load_dotenv | |
from loguru import logger | |
from pydantic import BaseModel | |
from core.pipeline.utils import limitar_contexto, limpiar_contexto_bruto, validar_input | |
from core.vectorstore.embeddings import EmbeddingManager | |
from core.vectorstore.distance_strategy import DistanceStrategyManager | |
from core.vectorstore.vectorstore_manager import VectorStoreManager | |
from core.llm.llm_manager import LLMManager | |
# ========================= | |
# ⚙️ CONFIGURACIÓN INICIAL | |
# ========================= | |
load_dotenv(dotenv_path="config/.env") | |
VECTORSTORE_PATH = os.getenv("VECTORSTORE_PATH", "docs/") | |
VECTORSTORE_NAME = os.getenv("VECTORSTORE_NAME", "edullm_store") | |
# ========================= | |
# 🚀 INICIALIZACIÓN DE COMPONENTES | |
# ========================= | |
embeddings = EmbeddingManager.get_embeddings() | |
strategy_mgr = DistanceStrategyManager() | |
vector_mgr = VectorStoreManager(path=VECTORSTORE_PATH, name=VECTORSTORE_NAME) | |
llm_manager = LLMManager() | |
# ========================= | |
# 📄 MODELOS | |
# ========================= | |
class Documento(BaseModel): | |
contenido: str | |
fuente: str | |
puntaje: float | |
# ========================= | |
# 🛠️ FUNCIONES UTILITARIAS | |
# ========================= | |
def init_vectorstore(force_rebuild: bool = False): | |
"""Inicializa o reconstruye el vectorstore si es necesario.""" | |
if force_rebuild or not vector_mgr.exist_vectorstore(): | |
vector_mgr.create_vectorstore() | |
# ========================= | |
# 🎯 PIPELINE PRINCIPAL | |
# ========================= | |
def edullm_rag_pipeline( | |
input_data: Union[str, bytes], top_k: int = 4, search_type: str = "similarity" | |
) -> str: | |
"""Pipeline RAG para procesamiento multimodal y generación de respuesta educativa.""" | |
if not validar_input(input_data): | |
logger.error("❌ Entrada inválida. Debes proporcionar texto o imagen válida.") | |
return "Error: Entrada no válida." | |
if isinstance(input_data, bytes): | |
return llm_manager.generate_response( | |
user_query="Procesa la imagen adjunta y responde según el contexto educativo.", | |
image=input_data, | |
) | |
retriever = vector_mgr.as_retriever(search_type=search_type, k=top_k) | |
docs = retriever.invoke(input_data) | |
if not docs: | |
contexto_final = "No se encontró contexto relevante." | |
logger.warning("⚠️ Sin resultados en FAISS para la consulta.") | |
else: | |
contexto_bruto = "\n\n".join(d.page_content for d in docs) | |
contexto_limpio = limpiar_contexto_bruto(contexto_bruto) | |
contexto_final = limitar_contexto(contexto_limpio) | |
return llm_manager.generate_response(user_query=input_data, context=contexto_final) | |