j-js commited on
Commit
ffa674c
·
verified ·
1 Parent(s): 6ccd1fc

Update context_parser.py

Browse files
Files changed (1) hide show
  1. context_parser.py +104 -33
context_parser.py CHANGED
@@ -1,31 +1,115 @@
1
  from __future__ import annotations
2
 
3
  import re
4
- from typing import Any, Dict, List, Optional, Tuple
5
 
6
 
7
- def split_unity_message(text: str) -> Tuple[str, str]:
8
- """
9
- Splits a Unity-style message into:
10
- - hidden/system/game context prefix
11
- - actual user-facing message
 
12
 
13
- If no obvious split is found, returns ("", original_text).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  """
15
  raw = (text or "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  if not raw:
17
- return "", ""
18
 
19
- # Pattern like:
20
- # CONTEXT: ...
21
- # USER: ...
22
- m = re.search(r"(?is)^(.*?)(?:\buser\b|\bprompt\b|\bmessage\b)\s*:\s*(.+)$", raw)
23
- if m:
24
- hidden = (m.group(1) or "").strip()
25
- user = (m.group(2) or "").strip()
26
- return hidden, user
27
 
28
- return "", raw
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
 
31
  def _extract_options(text: str) -> List[str]:
@@ -42,7 +126,6 @@ def _extract_options(text: str) -> List[str]:
42
  if options:
43
  return options
44
 
45
- # fallback: inline A) ... B) ...
46
  matches = re.findall(r"(?:^|\s)([A-E])[\)\.\:]\s*(.*?)(?=(?:\s+[A-E][\)\.\:])|$)", text, flags=re.I | re.S)
47
  if matches:
48
  return [m[1].strip() for m in matches if m[1].strip()]
@@ -51,10 +134,6 @@ def _extract_options(text: str) -> List[str]:
51
 
52
 
53
  def extract_game_context_fields(text: str) -> Dict[str, Any]:
54
- """
55
- Extracts lightweight structured fields from hidden Unity/game context.
56
- Always returns stable keys expected by app.py.
57
- """
58
  raw = (text or "").strip()
59
 
60
  result: Dict[str, Any] = {
@@ -70,33 +149,27 @@ def extract_game_context_fields(text: str) -> Dict[str, Any]:
70
  if not raw:
71
  return result
72
 
73
- # question
74
  q_match = re.search(r"\bquestion\s*[:=]\s*(.+?)(?=\n[A-Za-z_ ]+\s*[:=]|\Z)", raw, flags=re.I | re.S)
75
  if q_match:
76
  result["question"] = q_match.group(1).strip()
77
 
78
- # options block
79
  opt_match = re.search(r"\b(?:options|choices|answers)\s*[:=]\s*(.+?)(?=\n[A-Za-z_ ]+\s*[:=]|\Z)", raw, flags=re.I | re.S)
80
  if opt_match:
81
  result["options"] = _extract_options(opt_match.group(1))
82
 
83
- # if no explicit options block, scan whole context
84
  if not result["options"]:
85
  result["options"] = _extract_options(raw)
86
 
87
  result["has_choices"] = len(result["options"]) > 0
88
 
89
- # difficulty
90
  difficulty_match = re.search(r"\bdifficulty\s*[:=]\s*([A-Za-z0-9_\- ]+)", raw, flags=re.I)
91
  if difficulty_match:
92
  result["difficulty"] = difficulty_match.group(1).strip()
93
 
94
- # category/topic
95
  category_match = re.search(r"\b(?:category|topic)\s*[:=]\s*([A-Za-z0-9_\- /]+)", raw, flags=re.I)
96
  if category_match:
97
  result["category"] = category_match.group(1).strip()
98
 
99
- # money/balance
100
  money_match = re.search(r"\b(?:money|balance|bank)\s*[:=]\s*([\-]?\d+(?:\.\d+)?)", raw, flags=re.I)
101
  if money_match:
102
  try:
@@ -127,10 +200,6 @@ def extract_game_context_fields(text: str) -> Dict[str, Any]:
127
 
128
 
129
  def detect_intent(text: str, incoming_help_mode: Optional[str] = None) -> str:
130
- """
131
- Returns one of:
132
- answer, hint, instruction, walkthrough, explain, method, definition, concept
133
- """
134
  forced = (incoming_help_mode or "").strip().lower()
135
  if forced in {
136
  "answer",
@@ -213,8 +282,10 @@ def detect_intent(text: str, incoming_help_mode: Optional[str] = None) -> str:
213
 
214
 
215
  def intent_to_help_mode(intent: str) -> str:
216
- if intent in {"walkthrough", "step_by_step", "explain", "method", "concept"}:
217
  return "walkthrough"
 
 
218
  if intent == "hint":
219
  return "hint"
220
  if intent in {"definition", "instruction"}:
 
1
  from __future__ import annotations
2
 
3
  import re
4
+ from typing import Any, Dict, List, Optional
5
 
6
 
7
+ def _to_bool(value: str) -> bool:
8
+ return str(value or "").strip().lower() in {"true", "1", "yes", "y"}
9
+
10
+
11
+ def _looks_like_int(value: str) -> bool:
12
+ return bool(re.fullmatch(r"-?\d+", str(value or "").strip()))
13
 
14
+
15
+ def split_unity_message(text: str) -> Dict[str, Any]:
16
+ """
17
+ Parses several Unity-style message formats and always returns a stable dict.
18
+
19
+ Supported cases:
20
+ 1. Plain user text
21
+ 2. Hidden-context prefix + USER:/PROMPT:/MESSAGE:
22
+ 3. Structured multiline payload like:
23
+ hint
24
+ x/5 = 12
25
+ 0
26
+ solve
27
+ False
28
+ answer
29
+ answer
30
+ algebra
31
+ Quantitative
32
  """
33
  raw = (text or "").strip()
34
+
35
+ result: Dict[str, Any] = {
36
+ "hidden_context": "",
37
+ "user_text": raw,
38
+ "question_text": "",
39
+ "hint_stage": 0,
40
+ "user_last_input_type": "",
41
+ "built_on_previous_turn": False,
42
+ "help_mode": "",
43
+ "intent": "",
44
+ "topic": "",
45
+ "category": "",
46
+ }
47
+
48
  if not raw:
49
+ return result
50
 
51
+ # Case 1: hidden/system context followed by USER:/PROMPT:/MESSAGE:
52
+ tagged_match = re.search(r"(?is)^(.*?)(?:\buser\b|\bprompt\b|\bmessage\b)\s*:\s*(.+)$", raw)
53
+ if tagged_match:
54
+ hidden = (tagged_match.group(1) or "").strip()
55
+ user = (tagged_match.group(2) or "").strip()
56
+ result["hidden_context"] = hidden
57
+ result["user_text"] = user
58
+ return result
59
 
60
+ # Case 2: exact structured Unity payload block
61
+ lines = [line.strip() for line in raw.splitlines() if line.strip()]
62
+ if len(lines) >= 9 and _looks_like_int(lines[2]) and lines[4].lower() in {"true", "false"}:
63
+ result["user_text"] = lines[0]
64
+ result["question_text"] = lines[1]
65
+ result["hint_stage"] = int(lines[2])
66
+ result["user_last_input_type"] = lines[3]
67
+ result["built_on_previous_turn"] = _to_bool(lines[4])
68
+ result["help_mode"] = lines[5]
69
+ result["intent"] = lines[6]
70
+ result["topic"] = lines[7]
71
+ result["category"] = lines[8]
72
+ return result
73
+
74
+ # Case 3: field-based payload
75
+ def _extract_field(name: str) -> str:
76
+ match = re.search(rf"(?im)^\s*{re.escape(name)}\s*[:=]\s*(.+?)\s*$", raw)
77
+ return (match.group(1) or "").strip() if match else ""
78
+
79
+ question_text = _extract_field("question") or _extract_field("question_text")
80
+ user_text = _extract_field("user") or _extract_field("message") or _extract_field("prompt")
81
+ hint_stage_text = _extract_field("hint_stage")
82
+ user_last_input_type = _extract_field("user_last_input_type")
83
+ built_on_previous_turn = _extract_field("built_on_previous_turn")
84
+ help_mode = _extract_field("help_mode")
85
+ intent = _extract_field("intent")
86
+ topic = _extract_field("topic")
87
+ category = _extract_field("category")
88
+
89
+ if any([
90
+ question_text,
91
+ user_text,
92
+ hint_stage_text,
93
+ user_last_input_type,
94
+ built_on_previous_turn,
95
+ help_mode,
96
+ intent,
97
+ topic,
98
+ category,
99
+ ]):
100
+ result["question_text"] = question_text
101
+ result["user_text"] = user_text or raw
102
+ result["hint_stage"] = int(hint_stage_text) if _looks_like_int(hint_stage_text) else 0
103
+ result["user_last_input_type"] = user_last_input_type
104
+ result["built_on_previous_turn"] = _to_bool(built_on_previous_turn)
105
+ result["help_mode"] = help_mode
106
+ result["intent"] = intent
107
+ result["topic"] = topic
108
+ result["category"] = category
109
+ return result
110
+
111
+ # Fallback: plain message
112
+ return result
113
 
114
 
115
  def _extract_options(text: str) -> List[str]:
 
126
  if options:
127
  return options
128
 
 
129
  matches = re.findall(r"(?:^|\s)([A-E])[\)\.\:]\s*(.*?)(?=(?:\s+[A-E][\)\.\:])|$)", text, flags=re.I | re.S)
130
  if matches:
131
  return [m[1].strip() for m in matches if m[1].strip()]
 
134
 
135
 
136
  def extract_game_context_fields(text: str) -> Dict[str, Any]:
 
 
 
 
137
  raw = (text or "").strip()
138
 
139
  result: Dict[str, Any] = {
 
149
  if not raw:
150
  return result
151
 
 
152
  q_match = re.search(r"\bquestion\s*[:=]\s*(.+?)(?=\n[A-Za-z_ ]+\s*[:=]|\Z)", raw, flags=re.I | re.S)
153
  if q_match:
154
  result["question"] = q_match.group(1).strip()
155
 
 
156
  opt_match = re.search(r"\b(?:options|choices|answers)\s*[:=]\s*(.+?)(?=\n[A-Za-z_ ]+\s*[:=]|\Z)", raw, flags=re.I | re.S)
157
  if opt_match:
158
  result["options"] = _extract_options(opt_match.group(1))
159
 
 
160
  if not result["options"]:
161
  result["options"] = _extract_options(raw)
162
 
163
  result["has_choices"] = len(result["options"]) > 0
164
 
 
165
  difficulty_match = re.search(r"\bdifficulty\s*[:=]\s*([A-Za-z0-9_\- ]+)", raw, flags=re.I)
166
  if difficulty_match:
167
  result["difficulty"] = difficulty_match.group(1).strip()
168
 
 
169
  category_match = re.search(r"\b(?:category|topic)\s*[:=]\s*([A-Za-z0-9_\- /]+)", raw, flags=re.I)
170
  if category_match:
171
  result["category"] = category_match.group(1).strip()
172
 
 
173
  money_match = re.search(r"\b(?:money|balance|bank)\s*[:=]\s*([\-]?\d+(?:\.\d+)?)", raw, flags=re.I)
174
  if money_match:
175
  try:
 
200
 
201
 
202
  def detect_intent(text: str, incoming_help_mode: Optional[str] = None) -> str:
 
 
 
 
203
  forced = (incoming_help_mode or "").strip().lower()
204
  if forced in {
205
  "answer",
 
282
 
283
 
284
  def intent_to_help_mode(intent: str) -> str:
285
+ if intent in {"walkthrough", "step_by_step"}:
286
  return "walkthrough"
287
+ if intent in {"explain", "method", "concept"}:
288
+ return "explain"
289
  if intent == "hint":
290
  return "hint"
291
  if intent in {"definition", "instruction"}: