raghavNCI
commited on
Commit
·
2c39b8a
1
Parent(s):
43cf665
changes v18
Browse files- question.py +15 -4
question.py
CHANGED
@@ -25,14 +25,25 @@ HEADERS = {
|
|
25 |
"Content-Type": "application/json"
|
26 |
}
|
27 |
|
28 |
-
def extract_last_keywords(raw: str, max_keywords=
|
29 |
segments = raw.strip().split("\n")
|
|
|
|
|
30 |
for line in reversed(segments):
|
31 |
-
|
32 |
-
if
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
return []
|
35 |
|
|
|
36 |
def is_relevant(article, keywords):
|
37 |
text = f"{article.get('title', '')} {article.get('content', '')}".lower()
|
38 |
return any(kw.lower() in text for kw in keywords)
|
|
|
25 |
"Content-Type": "application/json"
|
26 |
}
|
27 |
|
28 |
+
def extract_last_keywords(raw: str, max_keywords=8):
|
29 |
segments = raw.strip().split("\n")
|
30 |
+
|
31 |
+
# Ignore quoted or prompt lines
|
32 |
for line in reversed(segments):
|
33 |
+
line = line.strip()
|
34 |
+
if line.lower().startswith("extract") or not line or len(line) < 10:
|
35 |
+
continue
|
36 |
+
|
37 |
+
# Look for lines with multiple comma-separated items
|
38 |
+
if line.count(",") >= 2:
|
39 |
+
parts = [kw.strip().strip('"') for kw in line.split(",") if kw.strip()]
|
40 |
+
# Ensure they're not just long phrases or sentence fragments
|
41 |
+
if all(len(p.split()) <= 3 for p in parts) and 1 <= len(parts) <= max_keywords:
|
42 |
+
return parts
|
43 |
+
|
44 |
return []
|
45 |
|
46 |
+
|
47 |
def is_relevant(article, keywords):
|
48 |
text = f"{article.get('title', '')} {article.get('content', '')}".lower()
|
49 |
return any(kw.lower() in text for kw in keywords)
|