Class_Schedule_Generator_AI

Sleeping

App Files Files Community

cgreszes commited on 24 days ago

Commit

30bee5d

verified ·

1 Parent(s): f630108

Create constraint_parser.py

Browse files

Files changed (1) hide show

constraint_parser.py +179 -0

constraint_parser.py ADDED Viewed

	@@ -0,0 +1,179 @@

+# constraint_parser.py
+from __future__ import annotations
+import json
+import re
+from dataclasses import dataclass, asdict
+from typing import Dict, List, Optional
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+# Small, cheap, works on CPU:
+MODEL_NAME = "google/flan-t5-small"
+_tokenizer = None
+_pipe = None
+def _lazy_pipe():
+    global _tokenizer, _pipe
+    if _pipe is None:
+        _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
+        _pipe = pipeline("text2text-generation", model=model, tokenizer=_tokenizer)
+    return _pipe
+DAYS_ALIASES = {
+    "sun": "Sun", "sunday": "Sun", "sundays": "Sun",
+    "mon": "Mon", "monday": "Mon", "mondays": "Mon",
+    "tue": "Tue", "tuesday": "Tue", "tuesdays": "Tue",
+    "wed": "Wed", "wednesday": "Wed", "wednesdays": "Wed",
+    "thu": "Thu", "thursday": "Thu", "thursdays": "Thu",
+    "fri": "Fri", "friday": "Fri", "fridays": "Fri",
+    "sat": "Sat", "saturday": "Sat", "saturdays": "Sat",
+}
+@dataclass
+class ParsedConstraints:
+    subject_counts: Dict[str, int]          # e.g., {"Computer Science": 4}
+    banned_days: List[str]                  # ["Sun","Fri"]
+    no_before: Optional[str] = None         # "10:00 AM"
+    no_after: Optional[str] = None          # "6:00 PM"
+    keywords: List[str] = None              # ["software engineering","algorithms"]
+    banned_professors: List[str] = None     # optional future use
+    def to_json(self) -> str:
+        return json.dumps(asdict(self))
+SCHEMA_INSTRUCTIONS = """You are a parser. Convert the user request into STRICT JSON with this schema:
+{
+  "subject_counts": {"<SubjectName>": <int>, ...},     // optional
+  "banned_days": ["Sun","Mon","Tue","Wed","Thu","Fri","Sat"], // optional
+  "no_before": "H:MM AM/PM",  // optional
+  "no_after": "H:MM AM/PM",   // optional
+  "keywords": ["..."],        // optional: thematic words/phrases to prefer
+  "banned_professors": ["..."] // optional
+}
+Only output valid JSON. No commentary.
+"""
+def _normalize_days(days):
+    out = []
+    for d in days:
+        k = d.strip().lower()
+        if k in DAYS_ALIASES:
+            std = DAYS_ALIASES[k]
+            if std not in out:
+                out.append(std)
+    return out
+def try_regex_first(text: str) -> ParsedConstraints:
+    """Cheap guardrail: catch obvious patterns before LLM."""
+    t = text.lower()
+    # days
+    days = set([m.group(1) for m in re.finditer(
+        r"(sundays?|mondays?|tuesdays?|wednesdays?|thursdays?|fridays?|saturdays?|sun|mon|tue|wed|thu|fri|sat)", t)])
+    banned_days = _normalize_days(days) if ("no classes on" in t or "don't give me classes on" in t or "dont give me classes on" in t) else []
+    # times
+    before = re.search(r"no classes before ([0-9]{1,2})(?::([0-9]{2}))?\s*(am|pm)", t)
+    after  = re.search(r"no classes after ([0-9]{1,2})(?::([0-9]{2}))?\s*(am|pm)", t)
+    def fmt(h,m,ap):
+        mm = m if m else "00"
+        return f"{int(h)}:{mm} {ap.upper()}"
+    no_before = fmt(*before.groups()) if before else None
+    no_after  = fmt(*after.groups()) if after else None
+    # subject count like "4 cs classes" / "three computer science"
+    subject_counts = {}
+    counts_map = {"one":1,"two":2,"three":3,"four":4,"five":5,"six":6}
+    # cheap mapping aliases
+    subj_alias = {
+        "cs":"Computer Science",
+        "computer science":"Computer Science",
+        "math":"Mathematics",
+        "econ":"Economics",
+        "psych":"Psychology",
+        "bio":"Biology",
+        "chem":"Chemistry",
+        "phys":"Physics",
+        "art history":"Art History",
+        "philosophy":"Philosophy",
+        "finance":"Finance",
+    }
+    m = re.findall(r"(\b\d+\b|\bone\b|\btwo\b|\bthree\b|\bfour\b|\bfive\b|\bsix\b)\s+([a-z ]+?)\s+classes", t)
+    for num, subj_phrase in m:
+        cnt = int(num) if num.isdigit() else counts_map.get(num, None)
+        if not cnt: continue
+        sp = subj_phrase.strip()
+        # resolve subject
+        resolved = None
+        for k,v in subj_alias.items():
+            if k in sp:
+                resolved = v; break
+        if resolved:
+            subject_counts[resolved] = subject_counts.get(resolved, 0) + cnt
+    return ParsedConstraints(
+        subject_counts=subject_counts,
+        banned_days=banned_days,
+        no_before=no_before,
+        no_after=no_after,
+        keywords=[],
+        banned_professors=[]
+    )
+def parse_constraints(text: str) -> ParsedConstraints:
+    """LLM parse with regex fallback & JSON repair."""
+    if not text or not text.strip():
+        return ParsedConstraints(subject_counts={}, banned_days=[], keywords=[], banned_professors=[])
+    # Start with regex quick pass
+    base = try_regex_first(text)
+    prompt = SCHEMA_INSTRUCTIONS + "\nUser request: " + text.strip()
+    out = _lazy_pipe()(prompt, max_new_tokens=256, temperature=0.0)[0]["generated_text"]
+    # try straight parse
+    obj = None
+    try:
+        obj = json.loads(out)
+    except Exception:
+        # attempt to extract JSON substring
+        start = out.find("{")
+        end = out.rfind("}")
+        if start != -1 and end != -1 and end > start:
+            try:
+                obj = json.loads(out[start:end+1])
+            except Exception:
+                obj = None
+    if not isinstance(obj, dict):
+        # fall back to regex-only result
+        return base
+    # merge with base
+    def get(k, default):
+        v = obj.get(k, default)
+        return v if v is not None else default
+    subject_counts = base.subject_counts.copy()
+    for k,v in get("subject_counts", {}).items():
+        if isinstance(v, int) and v > 0:
+            subject_counts[k] = subject_counts.get(k, 0) + v
+    banned_days = list({*base.banned_days, *(_normalize_days(get("banned_days", [])))})
+    no_before = get("no_before", base.no_before)
+    no_after  = get("no_after", base.no_after)
+    keywords = get("keywords", []) or base.keywords
+    banned_professors = get("banned_professors", []) or base.banned_professors
+    return ParsedConstraints(
+        subject_counts=subject_counts,
+        banned_days=banned_days,
+        no_before=no_before,
+        no_after=no_after,
+        keywords=keywords or [],
+        banned_professors=banned_professors or []
+    )