j-js commited on
Commit
75970da
·
verified ·
1 Parent(s): c3731ea

Update conversation_logic.py

Browse files
Files changed (1) hide show
  1. conversation_logic.py +89 -12
conversation_logic.py CHANGED
@@ -8,8 +8,8 @@ from formatting import format_reply
8
  from generator_engine import GeneratorEngine
9
  from models import RetrievedChunk, SolverResult
10
  from quant_solver import is_quant_question, solve_quant
 
11
  from retrieval_engine import RetrievalEngine
12
- from utils import short_lines
13
 
14
 
15
  RETRIEVAL_ALLOWED_INTENTS = {
@@ -56,6 +56,30 @@ STRUCTURE_KEYWORDS = {
56
  "number_properties": [
57
  "integer", "odd", "even", "prime", "divisible", "factor", "multiple",
58
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  }
60
 
61
  INTENT_KEYWORDS = {
@@ -80,6 +104,7 @@ MISMATCH_TERMS = {
80
  "probability": ["absolute value", "circle area", "quadratic"],
81
  "geometry": ["absolute value", "prime", "median salary"],
82
  "number_properties": ["circle", "triangle", "absolute value"],
 
83
  }
84
 
85
 
@@ -94,27 +119,29 @@ def _teaching_lines(chunks: List[RetrievedChunk]) -> List[str]:
94
  return lines
95
 
96
 
97
- def _compose_quant_reply(
98
  result: SolverResult,
99
  intent: str,
100
  reveal_answer: bool,
101
  verbosity: float,
 
 
102
  ) -> str:
103
  steps = result.steps or []
104
  internal = result.internal_answer or result.answer_value or ""
105
 
106
  if intent == "hint":
107
- return steps[0] if steps else "Start by translating the wording into an equation."
108
 
109
  if intent == "instruction":
110
  if steps:
111
  return f"First step: {steps[0]}"
112
- return "First, turn the wording into a mathematical relationship."
113
 
114
  if intent == "definition":
115
  if steps:
116
  return f"Here is the idea in context:\n- {steps[0]}"
117
- return "This is asking for the meaning of the term or operation in the problem."
118
 
119
  if intent in {"walkthrough", "step_by_step", "explain", "method", "concept"}:
120
  if not steps:
@@ -139,6 +166,12 @@ def _compose_quant_reply(
139
  if steps:
140
  return steps[0]
141
 
 
 
 
 
 
 
142
  return "I can help with this, but I cannot confidently solve it from the current parse alone yet."
143
 
144
 
@@ -156,12 +189,15 @@ def _extract_keywords(text: str) -> Set[str]:
156
  return {w for w in raw if len(w) > 2 and w not in stop}
157
 
158
 
159
- def _infer_structure_terms(question_text: str, topic: Optional[str]) -> List[str]:
160
  terms: List[str] = []
161
 
162
  if topic and topic in STRUCTURE_KEYWORDS:
163
  terms.extend(STRUCTURE_KEYWORDS[topic])
164
 
 
 
 
165
  q = (question_text or "").lower()
166
 
167
  if "=" in q:
@@ -174,6 +210,14 @@ def _infer_structure_terms(question_text: str, topic: Optional[str]) -> List[str
174
  terms.extend(["multiply", "undo operations"])
175
  if "%" in q or "percent" in q:
176
  terms.extend(["percent", "percentage"])
 
 
 
 
 
 
 
 
177
 
178
  return list(dict.fromkeys(terms))
179
 
@@ -204,9 +248,11 @@ def _is_direct_solve_request(text: str, intent: str) -> bool:
204
  return False
205
 
206
 
207
- def should_retrieve(intent: str, solved: bool, raw_user_text: str) -> bool:
208
  if intent in RETRIEVAL_ALLOWED_INTENTS:
209
  return True
 
 
210
  if not solved:
211
  return True
212
  if _is_direct_solve_request(raw_user_text, intent):
@@ -219,6 +265,7 @@ def _score_chunk(
219
  intent: str,
220
  topic: Optional[str],
221
  question_text: str,
 
222
  ) -> float:
223
  text = f"{chunk.topic} {chunk.text}".lower()
224
  score = 0.0
@@ -230,7 +277,7 @@ def _score_chunk(
230
  elif topic.lower() in text:
231
  score += 2.0
232
 
233
- for term in _infer_structure_terms(question_text, topic):
234
  if term.lower() in text:
235
  score += 1.5
236
 
@@ -253,13 +300,14 @@ def _filter_retrieved_chunks(
253
  intent: str,
254
  topic: Optional[str],
255
  question_text: str,
 
256
  min_score: float = 2.5,
257
  max_chunks: int = 3,
258
  ) -> List[RetrievedChunk]:
259
  scored: List[tuple[float, RetrievedChunk]] = []
260
 
261
  for chunk in chunks:
262
- s = _score_chunk(chunk, intent, topic, question_text)
263
  if s >= min_score:
264
  scored.append((s, chunk))
265
 
@@ -273,6 +321,8 @@ def _build_retrieval_query(
273
  intent: str,
274
  topic: Optional[str],
275
  solved: bool,
 
 
276
  ) -> str:
277
  parts: List[str] = []
278
 
@@ -280,9 +330,15 @@ def _build_retrieval_query(
280
  if base:
281
  parts.append(base)
282
 
 
 
 
283
  if topic:
284
  parts.append(topic)
285
 
 
 
 
286
  if intent in {"definition", "concept"}:
287
  parts.append("definition concept explanation")
288
  elif intent in {"walkthrough", "step_by_step", "method", "instruction"}:
@@ -324,6 +380,15 @@ class ConversationEngine:
324
  solver_input = (question_text or raw_user_text or "").strip()
325
  user_text = (raw_user_text or "").strip()
326
 
 
 
 
 
 
 
 
 
 
327
  resolved_intent = intent or detect_intent(user_text, help_mode)
328
  resolved_help_mode = help_mode or intent_to_help_mode(resolved_intent)
329
  reveal_answer = resolved_help_mode == "answer" or transparency >= 0.8
@@ -334,7 +399,7 @@ class ConversationEngine:
334
  help_mode=resolved_help_mode,
335
  answer_letter=None,
336
  answer_value=None,
337
- topic=None,
338
  used_retrieval=False,
339
  used_generator=False,
340
  internal_answer=None,
@@ -345,23 +410,28 @@ class ConversationEngine:
345
 
346
  selected_chunks: List[RetrievedChunk] = []
347
 
348
- if is_quant_question(solver_input):
349
  solved_result = solve_quant(solver_input)
350
  if solved_result is not None:
351
  result = solved_result
352
  result.help_mode = resolved_help_mode
 
 
353
 
354
- reply = _compose_quant_reply(
355
  result=result,
356
  intent=resolved_intent,
357
  reveal_answer=reveal_answer,
358
  verbosity=verbosity,
 
 
359
  )
360
 
361
  allow_retrieval = should_retrieve(
362
  intent=resolved_intent,
363
  solved=bool(result.solved),
364
  raw_user_text=user_text or solver_input,
 
365
  )
366
 
367
  if allow_retrieval and retrieval_context:
@@ -370,6 +440,7 @@ class ConversationEngine:
370
  intent=resolved_intent,
371
  topic=result.topic,
372
  question_text=solver_input,
 
373
  )
374
  if filtered:
375
  selected_chunks = filtered
@@ -384,6 +455,8 @@ class ConversationEngine:
384
  intent=resolved_intent,
385
  topic=result.topic,
386
  solved=bool(result.solved),
 
 
387
  ),
388
  top_k=6,
389
  )
@@ -392,6 +465,7 @@ class ConversationEngine:
392
  intent=resolved_intent,
393
  topic=result.topic,
394
  question_text=solver_input,
 
395
  )
396
  if filtered:
397
  selected_chunks = filtered
@@ -423,6 +497,9 @@ class ConversationEngine:
423
  "intent": resolved_intent,
424
  "question_text": question_text or "",
425
  "options_count": len(options_text or []),
 
 
 
426
  }
427
 
428
  return result
 
8
  from generator_engine import GeneratorEngine
9
  from models import RetrievedChunk, SolverResult
10
  from quant_solver import is_quant_question, solve_quant
11
+ from question_classifier import classify_question
12
  from retrieval_engine import RetrievalEngine
 
13
 
14
 
15
  RETRIEVAL_ALLOWED_INTENTS = {
 
56
  "number_properties": [
57
  "integer", "odd", "even", "prime", "divisible", "factor", "multiple",
58
  ],
59
+ "number_theory": [
60
+ "integer", "odd", "even", "prime", "divisible", "factor", "multiple", "remainder",
61
+ ],
62
+ "sequence": [
63
+ "sequence", "geometric", "arithmetic", "term", "series",
64
+ ],
65
+ "quant": [
66
+ "equation", "solve", "value", "integer", "ratio", "percent",
67
+ ],
68
+ "data": [
69
+ "data", "mean", "median", "trend", "chart", "table", "correlation",
70
+ ],
71
+ "verbal": [
72
+ "grammar", "meaning", "author", "argument", "sentence", "word",
73
+ ],
74
+ "reasoning": [
75
+ "argument", "assume", "conclusion", "evidence", "author",
76
+ ],
77
+ "vocabulary": [
78
+ "meaning", "definition", "word", "closest in meaning",
79
+ ],
80
+ "grammar": [
81
+ "grammar", "sentence", "verb", "agreement", "idiom", "modifier",
82
+ ],
83
  }
84
 
85
  INTENT_KEYWORDS = {
 
104
  "probability": ["absolute value", "circle area", "quadratic"],
105
  "geometry": ["absolute value", "prime", "median salary"],
106
  "number_properties": ["circle", "triangle", "absolute value"],
107
+ "number_theory": ["circle", "triangle", "median salary"],
108
  }
109
 
110
 
 
119
  return lines
120
 
121
 
122
+ def _compose_reply(
123
  result: SolverResult,
124
  intent: str,
125
  reveal_answer: bool,
126
  verbosity: float,
127
+ category: Optional[str] = None,
128
+ question_type: Optional[str] = None,
129
  ) -> str:
130
  steps = result.steps or []
131
  internal = result.internal_answer or result.answer_value or ""
132
 
133
  if intent == "hint":
134
+ return steps[0] if steps else "Start by identifying what the question is really asking."
135
 
136
  if intent == "instruction":
137
  if steps:
138
  return f"First step: {steps[0]}"
139
+ return "First, identify the key relationship or comparison in the question."
140
 
141
  if intent == "definition":
142
  if steps:
143
  return f"Here is the idea in context:\n- {steps[0]}"
144
+ return "This is asking for the meaning of the term or idea in the question."
145
 
146
  if intent in {"walkthrough", "step_by_step", "explain", "method", "concept"}:
147
  if not steps:
 
166
  if steps:
167
  return steps[0]
168
 
169
+ if category == "Verbal":
170
+ return "I can help analyse the wording or logic, but I do not have a full verbal solver yet."
171
+
172
+ if category == "DataInsight":
173
+ return "I can help reason through the data, but I cannot confidently solve this from the current parse alone yet."
174
+
175
  return "I can help with this, but I cannot confidently solve it from the current parse alone yet."
176
 
177
 
 
189
  return {w for w in raw if len(w) > 2 and w not in stop}
190
 
191
 
192
+ def _infer_structure_terms(question_text: str, topic: Optional[str], question_type: Optional[str]) -> List[str]:
193
  terms: List[str] = []
194
 
195
  if topic and topic in STRUCTURE_KEYWORDS:
196
  terms.extend(STRUCTURE_KEYWORDS[topic])
197
 
198
+ if question_type:
199
+ terms.extend(question_type.replace("_", " ").split())
200
+
201
  q = (question_text or "").lower()
202
 
203
  if "=" in q:
 
210
  terms.extend(["multiply", "undo operations"])
211
  if "%" in q or "percent" in q:
212
  terms.extend(["percent", "percentage"])
213
+ if "ratio" in q:
214
+ terms.extend(["ratio", "proportion"])
215
+ if "mean" in q or "average" in q:
216
+ terms.extend(["mean", "average"])
217
+ if "median" in q:
218
+ terms.extend(["median"])
219
+ if "probability" in q:
220
+ terms.extend(["probability"])
221
 
222
  return list(dict.fromkeys(terms))
223
 
 
248
  return False
249
 
250
 
251
+ def should_retrieve(intent: str, solved: bool, raw_user_text: str, category: Optional[str] = None) -> bool:
252
  if intent in RETRIEVAL_ALLOWED_INTENTS:
253
  return True
254
+ if not solved and category in {"Verbal", "DataInsight"}:
255
+ return True
256
  if not solved:
257
  return True
258
  if _is_direct_solve_request(raw_user_text, intent):
 
265
  intent: str,
266
  topic: Optional[str],
267
  question_text: str,
268
+ question_type: Optional[str] = None,
269
  ) -> float:
270
  text = f"{chunk.topic} {chunk.text}".lower()
271
  score = 0.0
 
277
  elif topic.lower() in text:
278
  score += 2.0
279
 
280
+ for term in _infer_structure_terms(question_text, topic, question_type):
281
  if term.lower() in text:
282
  score += 1.5
283
 
 
300
  intent: str,
301
  topic: Optional[str],
302
  question_text: str,
303
+ question_type: Optional[str] = None,
304
  min_score: float = 2.5,
305
  max_chunks: int = 3,
306
  ) -> List[RetrievedChunk]:
307
  scored: List[tuple[float, RetrievedChunk]] = []
308
 
309
  for chunk in chunks:
310
+ s = _score_chunk(chunk, intent, topic, question_text, question_type)
311
  if s >= min_score:
312
  scored.append((s, chunk))
313
 
 
321
  intent: str,
322
  topic: Optional[str],
323
  solved: bool,
324
+ question_type: Optional[str] = None,
325
+ category: Optional[str] = None,
326
  ) -> str:
327
  parts: List[str] = []
328
 
 
330
  if base:
331
  parts.append(base)
332
 
333
+ if category:
334
+ parts.append(category)
335
+
336
  if topic:
337
  parts.append(topic)
338
 
339
+ if question_type:
340
+ parts.append(question_type.replace("_", " "))
341
+
342
  if intent in {"definition", "concept"}:
343
  parts.append("definition concept explanation")
344
  elif intent in {"walkthrough", "step_by_step", "method", "instruction"}:
 
380
  solver_input = (question_text or raw_user_text or "").strip()
381
  user_text = (raw_user_text or "").strip()
382
 
383
+ category = kwargs.get("category")
384
+ classification = classify_question(
385
+ question_text=solver_input,
386
+ category=category,
387
+ )
388
+ question_topic = classification.get("topic")
389
+ question_type = classification.get("type")
390
+ inferred_category = classification.get("category") or category
391
+
392
  resolved_intent = intent or detect_intent(user_text, help_mode)
393
  resolved_help_mode = help_mode or intent_to_help_mode(resolved_intent)
394
  reveal_answer = resolved_help_mode == "answer" or transparency >= 0.8
 
399
  help_mode=resolved_help_mode,
400
  answer_letter=None,
401
  answer_value=None,
402
+ topic=question_topic,
403
  used_retrieval=False,
404
  used_generator=False,
405
  internal_answer=None,
 
410
 
411
  selected_chunks: List[RetrievedChunk] = []
412
 
413
+ if inferred_category == "Quantitative" or is_quant_question(solver_input):
414
  solved_result = solve_quant(solver_input)
415
  if solved_result is not None:
416
  result = solved_result
417
  result.help_mode = resolved_help_mode
418
+ if not result.topic:
419
+ result.topic = question_topic
420
 
421
+ reply = _compose_reply(
422
  result=result,
423
  intent=resolved_intent,
424
  reveal_answer=reveal_answer,
425
  verbosity=verbosity,
426
+ category=inferred_category,
427
+ question_type=question_type,
428
  )
429
 
430
  allow_retrieval = should_retrieve(
431
  intent=resolved_intent,
432
  solved=bool(result.solved),
433
  raw_user_text=user_text or solver_input,
434
+ category=inferred_category,
435
  )
436
 
437
  if allow_retrieval and retrieval_context:
 
440
  intent=resolved_intent,
441
  topic=result.topic,
442
  question_text=solver_input,
443
+ question_type=question_type,
444
  )
445
  if filtered:
446
  selected_chunks = filtered
 
455
  intent=resolved_intent,
456
  topic=result.topic,
457
  solved=bool(result.solved),
458
+ question_type=question_type,
459
+ category=inferred_category,
460
  ),
461
  top_k=6,
462
  )
 
465
  intent=resolved_intent,
466
  topic=result.topic,
467
  question_text=solver_input,
468
+ question_type=question_type,
469
  )
470
  if filtered:
471
  selected_chunks = filtered
 
497
  "intent": resolved_intent,
498
  "question_text": question_text or "",
499
  "options_count": len(options_text or []),
500
+ "category": inferred_category,
501
+ "question_type": question_type,
502
+ "classified_topic": question_topic,
503
  }
504
 
505
  return result