# nuse_modules/keyword_extractor.py | |
import os | |
import requests | |
import json | |
from models_initialization.mistral_registry import mistral_generate | |
def extract_last_keywords(raw: str, max_keywords: int = 8) -> list[str]: | |
segments = raw.strip().split("\n") | |
for line in reversed(segments): | |
line = line.strip() | |
if line.lower().startswith("extract") or not line or len(line) < 10: | |
continue | |
if line.count(",") >= 2: | |
parts = [kw.strip().strip('"') for kw in line.split(",") if kw.strip()] | |
if all(len(p.split()) <= 3 for p in parts) and 1 <= len(parts) <= max_keywords: | |
return parts | |
return [] | |
def keywords_extractor(question: str) -> list[str]: | |
prompt = ( | |
f"Extract the 3–6 most important keywords from the following question. " | |
f"Return only the keywords, comma-separated (no explanations):\n\n" | |
f"{question}" | |
) | |
raw_output = mistral_generate(prompt, max_new_tokens=32) | |
keywords = extract_last_keywords(raw_output) | |
print("Raw extracted keywords:", raw_output) | |
print("Parsed keywords:", keywords) | |
return keywords | |