Spaces:

NavyDevilDoc
/

AI_Toolkit

Sleeping

App Files Files Community

NavyDevilDoc commited on Dec 22, 2025

Commit

ff72774

verified ·

1 Parent(s): 7a9482e

Update src/core/QuizEngine.py

Browse files

Files changed (1) hide show

src/core/QuizEngine.py +47 -49

src/core/QuizEngine.py CHANGED Viewed

@@ -24,9 +24,7 @@ class QuizEngine:
             "question": f"What does **{acronym}** stand for?"
         }
-    # --- MODE 2: DOCUMENTS (NEW) ---
-# ... inside QuizEngine class ...
     def get_document_context(self, username):
         user_dir = os.path.join(self.source_dir, username)
         if not os.path.exists(user_dir): return None
@@ -42,28 +40,32 @@ class QuizEngine:
                 with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                     text = f.read()
-                # DEFENSE 1: Revised Filtering
-                # We split by double newline to get paragraphs
-                paragraphs = text.split('\n\n')
                 candidates = []
-                for p in paragraphs:
-                    p = p.strip()
-                    # A. Absolute junk filter (too short to be a question)
-                    if len(p) < 150: continue
-                    # B. Administrative noise filter
-                    if "intentionally left blank" in p.lower(): continue
-                    if "table of contents" in p.lower(): continue
-                    # C. The "Number" Filter (RELAXED)
-                    # Old rule: if p[0].isdigit(): continue
-                    # New rule: If it starts with a number but is long, KEEP IT.
-                    # We only ban "short" numbered lines (likely headers/TOC)
-                    if p[0].isdigit() and len(p) < 250:
-                        continue
-                    candidates.append(p)
                 if not candidates: continue
@@ -83,33 +85,30 @@ class QuizEngine:
         return None
     def construct_question_generation_prompt(self, context_text):
-            """
-            REVISED DEFENSE 2:
-            Broadens 'valid' topics to include Budgeting/Acquisition while keeping
-            the filter for true noise (formatting/headers).
-            """
-            return (
-                f"Act as a US Navy Engineering Duty Officer Board Examiner.\n"
-                f"Review the following source text for suitability:\n"
-                f"'''{context_text}'''\n\n"
-                f"EVALUATION CRITERIA:\n"
-                f"You are looking for **Examinable Professional Knowledge** in ANY of these domains:\n"
-                f"- Technical Engineering (Hull, Mechanical, Electrical, Combat Systems)\n"
-                f"- Program Management & Acquisition (Milestones, Contracts)\n"
-                f"- Financial Management (PPBE, Funding Types, Colors of Money)\n"
-                f"- Maintenance & Modernization (Availabilities, Shipyards)\n"
-                f"- Statutory & Regulatory Responsibilities\n\n"
-                f"SKIP LOGIC:\n"
-                f"- Output 'SKIP' ONLY if the text is structural noise (e.g., Table of Contents, Page Headers, List of Effective Pages, blank pages, or purely formatting instructions).\n"
-                f"- Do NOT skip procedural descriptions (e.g., 'Step 1 of the Budget Process')—those are valid questions.\n\n"
-                f"TASK:\n"
-                f"If the text contains ANY examinable material, generate a difficult, scenario-based question.\n"
-                f"Do not ask 'What does the text say?'. Ask 'How would you apply this regulation...' or 'What are the implications of...'\n"
-                f"OUTPUT: Just the question text."
-            )
     def construct_grading_prompt(self, question, answer, context_text):
-        """Grades the deep dive answer."""
         return (
             f"You are a Board Examiner.\n"
             f"Reference Material: '{context_text}'\n\n"
@@ -124,7 +123,6 @@ class QuizEngine:
         )
     def construct_acronym_grading_prompt(self, term, correct_definition, user_answer):
-        """Grades the acronym answer."""
         return (
             f"Term: {term}\n"
             f"Official Definition: {correct_definition}\n"

             "question": f"What does **{acronym}** stand for?"
         }
+    # --- MODE 2: DOCUMENTS (The Fix) ---
     def get_document_context(self, username):
         user_dir = os.path.join(self.source_dir, username)
         if not os.path.exists(user_dir): return None
                 with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                     text = f.read()
+                # --- NEW CHUNKING STRATEGY: SLIDING WINDOW ---
+                # Instead of splitting by paragraphs (which kills lists),
+                # we grab chunks of roughly 1000-1500 characters.
+                # This ensures we get the Header + The Bullets together.
+                step_size = 1000
+                window_size = 1500
                 candidates = []
+                # If text is short, just take the whole thing
+                if len(text) < window_size:
+                    candidates.append(text)
+                else:
+                    # Slide a window across the text
+                    for i in range(0, len(text) - window_size, step_size):
+                        chunk = text[i : i + window_size]
+                        # BASIC SANITY CHECKS (Very Relaxed)
+                        # 1. Must have content
+                        if len(chunk.strip()) < 100: continue
+                        # 2. Must not be pure administrative junk
+                        # We let the LLM decide mostly, but we filter obvious "Blank Pages"
+                        if "intentionally left blank" in chunk.lower(): continue
+                        candidates.append(chunk)
                 if not candidates: continue
         return None
     def construct_question_generation_prompt(self, context_text):
+        """
+        REVISED PROMPT: Focuses on Professional Knowledge.
+        """
+        return (
+            f"Act as a US Navy Engineering Duty Officer Board Examiner.\n"
+            f"Review the following source text for suitability:\n"
+            f"'''{context_text}'''\n\n"
+            f"EVALUATION CRITERIA:\n"
+            f"You are looking for **Examinable Professional Knowledge** in ANY of these domains:\n"
+            f"- Technical Engineering (HM&E, Combat Systems)\n"
+            f"- Program Management & Acquisition (Milestones, Contracts)\n"
+            f"- Financial Management (PPBE, Funding Types)\n"
+            f"- Maintenance & Modernization (Availabilities, Shipyards)\n"
+            f"- Statutory & Regulatory Responsibilities (Goldwater-Nichols, Title 10)\n\n"
+            f"SKIP LOGIC:\n"
+            f"- Output 'SKIP' ONLY if the text is structural noise (e.g., Table of Contents, Page Headers, purely formatting instructions).\n"
+            f"- Do NOT skip lists, definitions, or procedural descriptions.\n\n"
+            f"TASK:\n"
+            f"If the text contains ANY examinable material, generate a difficult, scenario-based question.\n"
+            f"Do not ask 'What does the text say?'. Ask 'How would you apply this regulation...' or 'What are the implications of...'\n"
+            f"OUTPUT: Just the question text."
+        )
     def construct_grading_prompt(self, question, answer, context_text):
         return (
             f"You are a Board Examiner.\n"
             f"Reference Material: '{context_text}'\n\n"
         )
     def construct_acronym_grading_prompt(self, term, correct_definition, user_answer):
         return (
             f"Term: {term}\n"
             f"Official Definition: {correct_definition}\n"