File size: 1,152 Bytes
aefa1e1 f00f379 aefa1e1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# nuse_modules/keyword_extractor.py
import os
import requests
import json
from models_initialization.mistral_registry import mistral_generate
def extract_last_keywords(raw: str, max_keywords: int = 8) -> list[str]:
segments = raw.strip().split("\n")
for line in reversed(segments):
line = line.strip()
if line.lower().startswith("extract") or not line or len(line) < 10:
continue
if line.count(",") >= 2:
parts = [kw.strip().strip('"') for kw in line.split(",") if kw.strip()]
if all(len(p.split()) <= 3 for p in parts) and 1 <= len(parts) <= max_keywords:
return parts
return []
def keywords_extractor(question: str) -> list[str]:
prompt = (
f"Extract the 3–6 most important keywords from the following question. "
f"Return only the keywords, comma-separated (no explanations):\n\n"
f"{question}"
)
raw_output = mistral_generate(prompt, max_new_tokens=32)
keywords = extract_last_keywords(raw_output)
print("Raw extracted keywords:", raw_output)
print("Parsed keywords:", keywords)
return keywords
|