NavyDevilDoc commited on
Commit
ff72774
·
verified ·
1 Parent(s): 7a9482e

Update src/core/QuizEngine.py

Browse files
Files changed (1) hide show
  1. src/core/QuizEngine.py +47 -49
src/core/QuizEngine.py CHANGED
@@ -24,9 +24,7 @@ class QuizEngine:
24
  "question": f"What does **{acronym}** stand for?"
25
  }
26
 
27
- # --- MODE 2: DOCUMENTS (NEW) ---
28
- # ... inside QuizEngine class ...
29
-
30
  def get_document_context(self, username):
31
  user_dir = os.path.join(self.source_dir, username)
32
  if not os.path.exists(user_dir): return None
@@ -42,28 +40,32 @@ class QuizEngine:
42
  with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
43
  text = f.read()
44
 
45
- # DEFENSE 1: Revised Filtering
46
- # We split by double newline to get paragraphs
47
- paragraphs = text.split('\n\n')
 
 
 
 
48
  candidates = []
49
 
50
- for p in paragraphs:
51
- p = p.strip()
52
- # A. Absolute junk filter (too short to be a question)
53
- if len(p) < 150: continue
54
-
55
- # B. Administrative noise filter
56
- if "intentionally left blank" in p.lower(): continue
57
- if "table of contents" in p.lower(): continue
58
-
59
- # C. The "Number" Filter (RELAXED)
60
- # Old rule: if p[0].isdigit(): continue
61
- # New rule: If it starts with a number but is long, KEEP IT.
62
- # We only ban "short" numbered lines (likely headers/TOC)
63
- if p[0].isdigit() and len(p) < 250:
64
- continue
65
-
66
- candidates.append(p)
67
 
68
  if not candidates: continue
69
 
@@ -83,33 +85,30 @@ class QuizEngine:
83
  return None
84
 
85
  def construct_question_generation_prompt(self, context_text):
86
- """
87
- REVISED DEFENSE 2:
88
- Broadens 'valid' topics to include Budgeting/Acquisition while keeping
89
- the filter for true noise (formatting/headers).
90
- """
91
- return (
92
- f"Act as a US Navy Engineering Duty Officer Board Examiner.\n"
93
- f"Review the following source text for suitability:\n"
94
- f"'''{context_text}'''\n\n"
95
- f"EVALUATION CRITERIA:\n"
96
- f"You are looking for **Examinable Professional Knowledge** in ANY of these domains:\n"
97
- f"- Technical Engineering (Hull, Mechanical, Electrical, Combat Systems)\n"
98
- f"- Program Management & Acquisition (Milestones, Contracts)\n"
99
- f"- Financial Management (PPBE, Funding Types, Colors of Money)\n"
100
- f"- Maintenance & Modernization (Availabilities, Shipyards)\n"
101
- f"- Statutory & Regulatory Responsibilities\n\n"
102
- f"SKIP LOGIC:\n"
103
- f"- Output 'SKIP' ONLY if the text is structural noise (e.g., Table of Contents, Page Headers, List of Effective Pages, blank pages, or purely formatting instructions).\n"
104
- f"- Do NOT skip procedural descriptions (e.g., 'Step 1 of the Budget Process')—those are valid questions.\n\n"
105
- f"TASK:\n"
106
- f"If the text contains ANY examinable material, generate a difficult, scenario-based question.\n"
107
- f"Do not ask 'What does the text say?'. Ask 'How would you apply this regulation...' or 'What are the implications of...'\n"
108
- f"OUTPUT: Just the question text."
109
- )
110
 
111
  def construct_grading_prompt(self, question, answer, context_text):
112
- """Grades the deep dive answer."""
113
  return (
114
  f"You are a Board Examiner.\n"
115
  f"Reference Material: '{context_text}'\n\n"
@@ -124,7 +123,6 @@ class QuizEngine:
124
  )
125
 
126
  def construct_acronym_grading_prompt(self, term, correct_definition, user_answer):
127
- """Grades the acronym answer."""
128
  return (
129
  f"Term: {term}\n"
130
  f"Official Definition: {correct_definition}\n"
 
24
  "question": f"What does **{acronym}** stand for?"
25
  }
26
 
27
+ # --- MODE 2: DOCUMENTS (The Fix) ---
 
 
28
  def get_document_context(self, username):
29
  user_dir = os.path.join(self.source_dir, username)
30
  if not os.path.exists(user_dir): return None
 
40
  with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
41
  text = f.read()
42
 
43
+ # --- NEW CHUNKING STRATEGY: SLIDING WINDOW ---
44
+ # Instead of splitting by paragraphs (which kills lists),
45
+ # we grab chunks of roughly 1000-1500 characters.
46
+ # This ensures we get the Header + The Bullets together.
47
+
48
+ step_size = 1000
49
+ window_size = 1500
50
  candidates = []
51
 
52
+ # If text is short, just take the whole thing
53
+ if len(text) < window_size:
54
+ candidates.append(text)
55
+ else:
56
+ # Slide a window across the text
57
+ for i in range(0, len(text) - window_size, step_size):
58
+ chunk = text[i : i + window_size]
59
+
60
+ # BASIC SANITY CHECKS (Very Relaxed)
61
+ # 1. Must have content
62
+ if len(chunk.strip()) < 100: continue
63
+
64
+ # 2. Must not be pure administrative junk
65
+ # We let the LLM decide mostly, but we filter obvious "Blank Pages"
66
+ if "intentionally left blank" in chunk.lower(): continue
67
+
68
+ candidates.append(chunk)
69
 
70
  if not candidates: continue
71
 
 
85
  return None
86
 
87
  def construct_question_generation_prompt(self, context_text):
88
+ """
89
+ REVISED PROMPT: Focuses on Professional Knowledge.
90
+ """
91
+ return (
92
+ f"Act as a US Navy Engineering Duty Officer Board Examiner.\n"
93
+ f"Review the following source text for suitability:\n"
94
+ f"'''{context_text}'''\n\n"
95
+ f"EVALUATION CRITERIA:\n"
96
+ f"You are looking for **Examinable Professional Knowledge** in ANY of these domains:\n"
97
+ f"- Technical Engineering (HM&E, Combat Systems)\n"
98
+ f"- Program Management & Acquisition (Milestones, Contracts)\n"
99
+ f"- Financial Management (PPBE, Funding Types)\n"
100
+ f"- Maintenance & Modernization (Availabilities, Shipyards)\n"
101
+ f"- Statutory & Regulatory Responsibilities (Goldwater-Nichols, Title 10)\n\n"
102
+ f"SKIP LOGIC:\n"
103
+ f"- Output 'SKIP' ONLY if the text is structural noise (e.g., Table of Contents, Page Headers, purely formatting instructions).\n"
104
+ f"- Do NOT skip lists, definitions, or procedural descriptions.\n\n"
105
+ f"TASK:\n"
106
+ f"If the text contains ANY examinable material, generate a difficult, scenario-based question.\n"
107
+ f"Do not ask 'What does the text say?'. Ask 'How would you apply this regulation...' or 'What are the implications of...'\n"
108
+ f"OUTPUT: Just the question text."
109
+ )
 
 
110
 
111
  def construct_grading_prompt(self, question, answer, context_text):
 
112
  return (
113
  f"You are a Board Examiner.\n"
114
  f"Reference Material: '{context_text}'\n\n"
 
123
  )
124
 
125
  def construct_acronym_grading_prompt(self, term, correct_definition, user_answer):
 
126
  return (
127
  f"Term: {term}\n"
128
  f"Official Definition: {correct_definition}\n"