# nuse_modules/keyword_extractor.py import os import requests import json from models_initialization.mistral_registry import mistral_generate def extract_last_keywords(raw: str, max_keywords: int = 8) -> list[str]: segments = raw.strip().split("\n") for line in reversed(segments): line = line.strip() if line.lower().startswith("extract") or not line or len(line) < 10: continue if line.count(",") >= 2: parts = [kw.strip().strip('"') for kw in line.split(",") if kw.strip()] if all(len(p.split()) <= 3 for p in parts) and 1 <= len(parts) <= max_keywords: return parts return [] def keywords_extractor(question: str) -> list[str]: prompt = ( f"Extract the 3–6 most important keywords from the following question. " f"Return only the keywords, comma-separated (no explanations):\n\n" f"{question}" ) raw_output = mistral_generate(prompt, max_new_tokens=32) keywords = extract_last_keywords(raw_output) print("Raw extracted keywords:", raw_output) print("Parsed keywords:", keywords) return keywords