j-js commited on
Commit
280a1a8
·
verified ·
1 Parent(s): 051ac24

Update question_support_loader.py

Browse files
Files changed (1) hide show
  1. question_support_loader.py +54 -41
question_support_loader.py CHANGED
@@ -2,57 +2,70 @@ from __future__ import annotations
2
 
3
  import json
4
  from pathlib import Path
5
- from typing import Any, Dict, Optional
6
-
7
-
8
- BASE_DIR = Path(__file__).resolve().parent
9
- DATA_DIR = BASE_DIR / "data"
10
- QUESTION_SUPPORT_PATH = DATA_DIR / "question_support_bank.jsonl"
11
 
12
 
13
  class QuestionSupportBank:
14
- def __init__(self, path: Path | None = None):
15
- self.path = path or QUESTION_SUPPORT_PATH
16
- self._by_id: Dict[str, Dict[str, Any]] = {}
17
  self._loaded = False
 
 
 
 
 
18
 
19
  def load(self) -> None:
20
  self._by_id = {}
21
-
22
- if not self.path.exists():
23
- self._loaded = True
24
- return
25
-
26
- with self.path.open("r", encoding="utf-8") as f:
27
- for line_number, line in enumerate(f, start=1):
28
- line = line.strip()
29
- if not line:
30
- continue
31
-
32
- try:
33
- row = json.loads(line)
34
- except json.JSONDecodeError as e:
35
- print(f"[QuestionSupportBank] Bad JSON on line {line_number}: {e}")
36
- continue
37
-
38
- question_id = str(row.get("question_id", "")).strip()
39
- if not question_id:
40
- print(f"[QuestionSupportBank] Missing question_id on line {line_number}")
41
- continue
42
-
43
- self._by_id[question_id] = row
44
 
45
  self._loaded = True
46
- print(f"[QuestionSupportBank] Loaded {len(self._by_id)} support entries.")
47
 
48
- def get(self, question_id: Optional[str]) -> Optional[Dict[str, Any]]:
49
  if not self._loaded:
50
  self.load()
51
 
52
- if not question_id:
53
- return None
54
-
55
- return self._by_id.get(str(question_id).strip())
56
-
57
-
58
- question_support_bank = QuestionSupportBank()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  import json
4
  from pathlib import Path
5
+ from typing import Any, Dict, List, Optional
 
 
 
 
 
6
 
7
 
8
  class QuestionSupportBank:
9
+ def __init__(self, data_path: Optional[str] = None) -> None:
10
+ base_dir = Path(__file__).resolve().parent
11
+ self.data_path = Path(data_path) if data_path else base_dir / "data" / "question_support_bank.jsonl"
12
  self._loaded = False
13
+ self._by_id: Dict[str, Dict[str, Any]] = {}
14
+ self._by_text: Dict[str, Dict[str, Any]] = {}
15
+
16
+ def _normalize(self, text: Optional[str]) -> str:
17
+ return " ".join((text or "").strip().lower().split())
18
 
19
  def load(self) -> None:
20
  self._by_id = {}
21
+ self._by_text = {}
22
+
23
+ if self.data_path.exists():
24
+ with self.data_path.open("r", encoding="utf-8") as handle:
25
+ for raw_line in handle:
26
+ line = raw_line.strip()
27
+ if not line:
28
+ continue
29
+ try:
30
+ item = json.loads(line)
31
+ except json.JSONDecodeError:
32
+ continue
33
+ self._store_item(item)
 
 
 
 
 
 
 
 
 
 
34
 
35
  self._loaded = True
 
36
 
37
+ def _ensure_loaded(self) -> None:
38
  if not self._loaded:
39
  self.load()
40
 
41
+ def _store_item(self, item: Dict[str, Any]) -> None:
42
+ if not isinstance(item, dict):
43
+ return
44
+ qid = str(item.get("question_id") or "").strip()
45
+ qtext = self._normalize(item.get("question_text") or item.get("stem") or "")
46
+ if qid:
47
+ self._by_id[qid] = item
48
+ if qtext:
49
+ self._by_text[qtext] = item
50
+
51
+ def get(self, question_id: Optional[str] = None, question_text: Optional[str] = None) -> Optional[Dict[str, Any]]:
52
+ self._ensure_loaded()
53
+ qid = str(question_id or "").strip()
54
+ if qid and qid in self._by_id:
55
+ return dict(self._by_id[qid])
56
+
57
+ qtext = self._normalize(question_text)
58
+ if qtext and qtext in self._by_text:
59
+ return dict(self._by_text[qtext])
60
+ return None
61
+
62
+ def upsert(self, item: Dict[str, Any]) -> None:
63
+ self._ensure_loaded()
64
+ self._store_item(item)
65
+
66
+ def all_items(self) -> List[Dict[str, Any]]:
67
+ self._ensure_loaded()
68
+ return [dict(v) for v in self._by_id.values()]
69
+
70
+
71
+ question_support_bank = QuestionSupportBank()