edullm / core /pipeline /edullm_rag_pipeline.py
JairoDanielMT's picture
Update core/pipeline/edullm_rag_pipeline.py
f942f85 verified
raw
history blame
2.8 kB
# edullm_rag_pipeline.py
# =========================
# 📦 IMPORTACIONES
# =========================
import os
from typing import Union
from dotenv import load_dotenv
from loguru import logger
from pydantic import BaseModel
from core.pipeline.utils import limitar_contexto, limpiar_contexto_bruto, validar_input
from core.vectorstore.embeddings import EmbeddingManager
from core.vectorstore.distance_strategy import DistanceStrategyManager
from core.vectorstore.vectorstore_manager import VectorStoreManager
from core.llm.llm_manager import LLMManager
# =========================
# ⚙️ CONFIGURACIÓN INICIAL
# =========================
load_dotenv(dotenv_path="config/.env")
VECTORSTORE_PATH = os.getenv("VECTORSTORE_PATH", "docs/")
VECTORSTORE_NAME = os.getenv("VECTORSTORE_NAME", "edullm_store")
# =========================
# 🚀 INICIALIZACIÓN DE COMPONENTES
# =========================
embeddings = EmbeddingManager.get_embeddings()
strategy_mgr = DistanceStrategyManager()
vector_mgr = VectorStoreManager(path=VECTORSTORE_PATH, name=VECTORSTORE_NAME)
llm_manager = LLMManager()
# =========================
# 📄 MODELOS
# =========================
class Documento(BaseModel):
contenido: str
fuente: str
puntaje: float
# =========================
# 🛠️ FUNCIONES UTILITARIAS
# =========================
def init_vectorstore(force_rebuild: bool = False):
"""Inicializa o reconstruye el vectorstore si es necesario."""
if force_rebuild or not vector_mgr.exist_vectorstore():
vector_mgr.create_vectorstore()
# =========================
# 🎯 PIPELINE PRINCIPAL
# =========================
def edullm_rag_pipeline(
input_data: Union[str, bytes], top_k: int = 4, search_type: str = "similarity"
) -> str:
"""Pipeline RAG para procesamiento multimodal y generación de respuesta educativa."""
if not validar_input(input_data):
logger.error("❌ Entrada inválida. Debes proporcionar texto o imagen válida.")
return "Error: Entrada no válida."
if isinstance(input_data, bytes):
return llm_manager.generate_response(
user_query="Procesa la imagen adjunta y responde según el contexto educativo.",
image=input_data,
)
retriever = vector_mgr.as_retriever(search_type=search_type, k=top_k)
docs = retriever.invoke(input_data)
if not docs:
contexto_final = "No se encontró contexto relevante."
logger.warning("⚠️ Sin resultados en FAISS para la consulta.")
else:
contexto_bruto = "\n\n".join(d.page_content for d in docs)
contexto_limpio = limpiar_contexto_bruto(contexto_bruto)
contexto_final = limitar_contexto(contexto_limpio)
return llm_manager.generate_response(user_query=input_data, context=contexto_final)