Spaces:
Running
Running
# core/integrations/telegram_bot.py | |
import os | |
import re | |
import tempfile | |
import time | |
import fitz # PyMuPDF | |
from docx import Document | |
from dotenv import load_dotenv | |
from telegram import InlineKeyboardButton, InlineKeyboardMarkup, InputFile, Update | |
from telegram.ext import ( | |
ApplicationBuilder, | |
CallbackQueryHandler, | |
CommandHandler, | |
ContextTypes, | |
MessageHandler, | |
filters, | |
) | |
from core.integrations.doc_converter import gestionar_descarga, procesar_markdown | |
from core.logging.usage_logger import registrar_uso | |
from core.pipeline.edullm_rag_pipeline import edullm_rag_pipeline | |
# ==== CONFIGURACIÓN GENERAL ==== | |
load_dotenv(dotenv_path="config/.env") | |
TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN") | |
DOCX_FILENAME = "material_educativo.docx" | |
FORMAT_WARNING_IMAGE = "assets/formatos_soportados.png" | |
if not TELEGRAM_TOKEN: | |
raise ValueError("❌ TELEGRAM_TOKEN no está definido en las variables de entorno.") | |
# ==== FUNCIONES AUXILIARES ==== | |
def extract_text_from_pdf(file_path): | |
text = "" | |
with fitz.open(file_path) as pdf: | |
for page in pdf: | |
text += page.get_text() | |
return text.strip() | |
def extract_text_from_docx(file_path): | |
doc = Document(file_path) | |
return "\n".join(para.text for para in doc.paragraphs if para.text.strip()) | |
def extract_text_from_txt(file_path): | |
with open(file_path, "r", encoding="utf-8") as f: | |
return f.read().strip() | |
def escape_markdown(text: str) -> str: | |
""" | |
Escapa caracteres especiales para MarkdownV2 de Telegram. | |
""" | |
escape_chars = r"_*[]()~`>#+-=|{}.!" | |
return re.sub(f"([{re.escape(escape_chars)}])", r"\\\1", text) | |
def detectar_tipo_entrada(user_input) -> str: | |
if isinstance(user_input, str): | |
return "Texto" | |
elif isinstance(user_input, bytes): | |
return "Imagen" | |
else: | |
return "Otro" | |
# ==== COMANDO /start ==== | |
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE): | |
await update.message.reply_text( | |
"👋 *¡Bienvenido a EduLLM Bot!*\n\n" | |
"📌 *Formatos aceptados:* Texto, Imagen, PDF, DOCX o TXT.\n" | |
"📄 *Formato que genero:* Material educativo listo para descargar en DOCX.\n\n" | |
"✅ *¿Qué puedo generar?*\n" | |
"Materiales educativos alineados al *CNEB, MBDD y MINEDU – Perú*, como:\n\n" | |
"1️⃣ *Ficha*\n" | |
"- Incluye: Metadatos, Resumen, Desarrollo, Preguntas DECO, Conclusión, Recomendación, Instrumento (opcional, debes indicar si quieres instrumentos de evaluación).\n\n" | |
"2️⃣ *Resumen temático*\n" | |
"- Incluye: Metadatos, Ideas clave (mínimo 3), Desarrollo, Conclusión.\n\n" | |
"3️⃣ *Banco de preguntas*\n" | |
"- Incluye: Metadatos, 10+ Preguntas DECO, Claves o respuestas (opcional, debes indicar que quieres respuestas).\n\n" | |
"4️⃣ *Rúbrica o Lista de cotejo*\n" | |
"- Incluye: Metadatos, Criterios, Niveles, Descriptores.\n\n" | |
"🎯 *¿Qué necesito de ti?*\n" | |
"Indícame: *área curricular*, *grado*, *bimestre*, *competencia*, *capacidad* y *desempeño esperado*.\n\n" | |
"📌 *Ejemplo:*\n" | |
"`Quiero 10 preguntas sobre los animales vertebrados para 4.º primaria (Ciencia y Tecnología, bim 1) con sus respectivas respuestas.`", | |
parse_mode="Markdown", | |
) | |
# ==== MANEJO DE MENSAJES ==== | |
async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE): | |
user_input = "" | |
try: | |
if update.message.text: | |
user_input = update.message.text | |
elif update.message.photo: | |
photo = update.message.photo[-1] | |
file = await photo.get_file() | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_img: | |
await file.download_to_drive(temp_img.name) | |
with open(temp_img.name, "rb") as img_file: | |
user_input = img_file.read() | |
elif update.message.document: | |
file = await update.message.document.get_file() | |
ext = update.message.document.file_name.split(".")[-1].lower() | |
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp_doc: | |
await file.download_to_drive(tmp_doc.name) | |
if ext == "pdf": | |
extracted_text = extract_text_from_pdf(tmp_doc.name) | |
elif ext == "docx": | |
extracted_text = extract_text_from_docx(tmp_doc.name) | |
elif ext == "txt": | |
extracted_text = extract_text_from_txt(tmp_doc.name) | |
else: | |
await enviar_mensaje_formato_no_soportado(update) | |
return | |
mensaje_texto = update.message.caption or "" | |
user_input = f"{mensaje_texto}\n\n{extracted_text}".strip() | |
elif update.message.audio or update.message.voice or update.message.video: | |
await update.message.reply_text( | |
"🎙️🎥 *Audios y videos no son compatibles.* Solo acepto texto, imágenes o documentos (PDF, DOCX, TXT).", | |
parse_mode="Markdown", | |
) | |
return | |
elif update.message.sticker: | |
await update.message.reply_text( | |
"🟢 Gracias por el sticker, pero necesito texto, imagen o documento educativo." | |
) | |
return | |
elif update.message.location: | |
await update.message.reply_text( | |
"📍 He recibido tu ubicación, pero solo trabajo con contenido educativo." | |
) | |
return | |
elif update.message.contact: | |
await update.message.reply_text( | |
"📞 Recibí un contacto, pero por favor envíame contenido académico (texto, imagen o documento)." | |
) | |
return | |
elif update.message.animation: | |
await update.message.reply_text( | |
"🎞️ Los GIFs no son compatibles. Por favor envía texto, imagen o documentos." | |
) | |
return | |
else: | |
await enviar_mensaje_formato_no_soportado(update) | |
return | |
finally: | |
for temp_var in ["temp_img", "tmp_doc"]: | |
if temp_var in locals() and os.path.exists(locals()[temp_var].name): | |
os.remove(locals()[temp_var].name) | |
if not user_input: | |
await update.message.reply_text("⚠️ No se pudo obtener contenido válido.") | |
return | |
await update.message.reply_text("⏳ Generando tu material educativo...") | |
start_time = time.time() | |
try: | |
resultado_md = edullm_rag_pipeline(user_input) | |
exito = True | |
except Exception as e: | |
resultado_md = f"❌ Error: {str(e)}" | |
exito = False | |
duracion = time.time() - start_time | |
registrar_uso( | |
user_id=update.effective_user.id, | |
username=update.effective_user.username, | |
tipo_entrada=detectar_tipo_entrada(user_input), | |
duracion_segundos=duracion, | |
exito=exito, | |
) | |
context.user_data["ultimo_markdown"] = resultado_md | |
preview = resultado_md[:1000] + ("\n..." if len(resultado_md) > 1000 else "") | |
preview_safe = escape_markdown(preview) | |
await update.message.reply_text( | |
f"✅ *Material generado*:\n\n```\n{preview_safe}\n```", parse_mode="MarkdownV2" | |
) | |
botones = [[InlineKeyboardButton("📄 Descargar DOCX", callback_data="descargar_docx")]] | |
await update.message.reply_text( | |
"¿Deseas descargar el material?", reply_markup=InlineKeyboardMarkup(botones) | |
) | |
# ==== MENSAJE DE FORMATO NO SOPORTADO ==== | |
async def enviar_mensaje_formato_no_soportado(update: Update): | |
await update.message.reply_photo( | |
photo=InputFile(FORMAT_WARNING_IMAGE), | |
caption="⚠️ *Formato no soportado.*\n\nAcepto:\n- Texto\n- Imagen\n- PDF (.pdf)\n- Word (.docx)\n- Texto plano (.txt)", | |
parse_mode=None, | |
) | |
# ==== CALLBACK BOTONES ==== | |
async def button_handler(update: Update, context: ContextTypes.DEFAULT_TYPE): | |
query = update.callback_query | |
await query.answer() | |
if query.data == "descargar_docx": | |
markdown_content = context.user_data.get("ultimo_markdown") | |
if not markdown_content: | |
await query.edit_message_text("⚠️ No hay material disponible para convertir.") | |
return | |
resultado = procesar_markdown(markdown_content) | |
if "error" in resultado: | |
await query.edit_message_text("❌ Error al generar el archivo DOCX.") | |
return | |
file_id = resultado["file_id"] | |
file_response = gestionar_descarga(file_id) | |
if isinstance(file_response, dict): | |
await query.edit_message_text(f"⚠️ {file_response.get('error')}") | |
else: | |
await query.edit_message_text("📥 Aquí tienes tu archivo DOCX:") | |
await context.bot.send_document( | |
chat_id=query.message.chat_id, | |
document=file_response.path, | |
filename=DOCX_FILENAME, | |
) | |
# ==== INICIAR BOT ==== | |
async def start_bot(): | |
app = ApplicationBuilder().token(TELEGRAM_TOKEN).build() | |
app.add_handler(CommandHandler("start", start)) | |
app.add_handler(MessageHandler(filters.ALL, handle_message)) | |
app.add_handler(CallbackQueryHandler(button_handler)) | |
print("🤖 EduLLM Bot en ejecución...") | |
# 🔁 Esta secuencia evita que se cierre el event loop | |
await app.initialize() | |
await app.start() | |
await app.updater.start_polling() |