Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,86 @@ from constraint_parser_llm import parse_constraints
|
|
14 |
from constraint_parser import parse_constraints
|
15 |
from semantic_ranker import score_courses # optional; keep if you want semantic boost
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
# Robust day parsing for phrases like "no class on monday", "avoid mon", "not on mondays", etc.
|
18 |
DAY_ALIASES = {
|
19 |
"sunday": "Sun", "sundays": "Sun", "sun": "Sun",
|
|
|
14 |
from constraint_parser import parse_constraints
|
15 |
from semantic_ranker import score_courses # optional; keep if you want semantic boost
|
16 |
|
17 |
+
# -------- Robust free-text constraint parsing (deterministic fallback) --------
|
18 |
+
import re
|
19 |
+
|
20 |
+
DAY_ALIASES = {
|
21 |
+
"sunday": "Sun", "sundays": "Sun", "sun": "Sun",
|
22 |
+
"monday": "Mon", "mondays": "Mon", "mon": "Mon",
|
23 |
+
"tuesday": "Tue", "tuesdays": "Tue", "tue": "Tue",
|
24 |
+
"wednesday": "Wed", "wednesdays": "Wed", "wed": "Wed",
|
25 |
+
"thursday": "Thu", "thursdays": "Thu", "thu": "Thu",
|
26 |
+
"friday": "Fri", "fridays": "Fri", "fri": "Fri",
|
27 |
+
"saturday": "Sat", "saturdays": "Sat", "sat": "Sat",
|
28 |
+
}
|
29 |
+
WEEKEND = {"Sat", "Sun"}
|
30 |
+
WEEKDAYS = {"Mon", "Tue", "Wed", "Thu", "Fri"}
|
31 |
+
|
32 |
+
def _norm_ampm(h: str, m: str|None, ap: str|None) -> str:
|
33 |
+
h_i = int(h)
|
34 |
+
m_s = (m or "00")
|
35 |
+
ap = (ap or "").upper()
|
36 |
+
if ap not in ("AM", "PM"):
|
37 |
+
# heuristic: <= 8 -> AM, >= 4 -> PM; otherwise assume AM
|
38 |
+
ap = "AM" if h_i <= 11 else "PM"
|
39 |
+
return f"{h_i}:{m_s} {ap}"
|
40 |
+
|
41 |
+
def parse_det_constraints(text: str):
|
42 |
+
"""
|
43 |
+
Deterministic, high-recall extraction of:
|
44 |
+
- banned_days: {"Mon","Tue",...}
|
45 |
+
- no_before: "H:MM AM/PM" (start times >= this)
|
46 |
+
- no_after: "H:MM AM/PM" (end times <= this)
|
47 |
+
"""
|
48 |
+
res = {"banned_days": set(), "no_before": None, "no_after": None}
|
49 |
+
if not text:
|
50 |
+
return res
|
51 |
+
t = text.lower().strip()
|
52 |
+
|
53 |
+
# Weekends / weekdays
|
54 |
+
if re.search(r"\bno (?:weekend|weekends)\b", t) or re.search(r"\bweekdays only\b|\bonly on weekdays\b", t):
|
55 |
+
res["banned_days"] |= WEEKEND
|
56 |
+
if re.search(r"\bweekends only\b|\bonly on weekends\b", t) or re.search(r"\bno weekdays\b", t):
|
57 |
+
res["banned_days"] |= WEEKDAYS
|
58 |
+
|
59 |
+
# Ban specific days with broad triggers
|
60 |
+
triggers = r"(?:no|avoid|except|skip|without|not on|exclude|ban|block|never on)"
|
61 |
+
for alias, abbr in DAY_ALIASES.items():
|
62 |
+
# e.g., "no class on monday", "avoid mon", "not on tuesdays"
|
63 |
+
if re.search(rf"\b{triggers}\s+(?:classes?|class|lectures?)?\s*(?:on\s*)?{re.escape(alias)}\b", t):
|
64 |
+
res["banned_days"].add(abbr)
|
65 |
+
|
66 |
+
# Time windows: "only start after 10 AM", "after 10", "not before 10", "no earlier than 10"
|
67 |
+
m_after = re.search(r"\b(?:only\s*)?(?:start\s*)?after\s+(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\b", t)
|
68 |
+
m_not_before = re.search(r"\b(?:not before|no earlier than)\s+(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\b", t)
|
69 |
+
if m_after or m_not_before:
|
70 |
+
h, mm, ap = (m_after or m_not_before).groups()
|
71 |
+
res["no_before"] = _norm_ampm(h, mm, ap)
|
72 |
+
|
73 |
+
# "before 6 PM", "end before 6", "not after 6", "no later than 6"
|
74 |
+
m_before = re.search(r"\b(?:before|end before|not after|no later than)\s+(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\b", t)
|
75 |
+
if m_before:
|
76 |
+
h, mm, ap = m_before.groups()
|
77 |
+
res["no_after"] = _norm_ampm(h, mm, ap)
|
78 |
+
|
79 |
+
# "mornings / afternoons / evenings" shortcuts
|
80 |
+
if re.search(r"\bno mornings?\b", t):
|
81 |
+
res["no_after"] = "12:00 PM"
|
82 |
+
if re.search(r"\bmornings?\b", t) and not re.search(r"\bno mornings?\b", t):
|
83 |
+
res["no_before"] = res["no_before"] or "10:00 AM"
|
84 |
+
|
85 |
+
if re.search(r"\bafternoons?\b", t):
|
86 |
+
res["no_before"] = res["no_before"] or "12:00 PM"
|
87 |
+
if re.search(r"\bno afternoons?\b", t):
|
88 |
+
res["no_after"] = "12:00 PM"
|
89 |
+
|
90 |
+
if re.search(r"\bevenings?\b", t):
|
91 |
+
res["no_before"] = res["no_before"] or "4:00 PM"
|
92 |
+
if re.search(r"\bno evenings?\b", t):
|
93 |
+
res["no_after"] = "4:00 PM"
|
94 |
+
|
95 |
+
return res
|
96 |
+
|
97 |
# Robust day parsing for phrases like "no class on monday", "avoid mon", "not on mondays", etc.
|
98 |
DAY_ALIASES = {
|
99 |
"sunday": "Sun", "sundays": "Sun", "sun": "Sun",
|