Spaces:

NavyDevilDoc
/

AI_Toolkit

Sleeping

App Files Files Community

NavyDevilDoc commited on Dec 21, 2025

Commit

63221b9

verified ·

1 Parent(s): f076cab

Update src/app.py

Browse files

Files changed (1) hide show

src/app.py +129 -116

src/app.py CHANGED Viewed

@@ -12,6 +12,7 @@ from openai import OpenAI
 from datetime import datetime
 from test_integration import run_tests
 from core.QuizEngine import QuizEngine
 # --- CONFIGURATION ---
 st.set_page_config(page_title="Navy AI Toolkit", page_icon="⚓", layout="wide")
@@ -26,12 +27,16 @@ if "roles" not in st.session_state:
 if "quiz_state" not in st.session_state:
     st.session_state.quiz_state = {
         "active": False,        # Is a question currently displayed?
-        "question_data": None,  # The current acronym object
         "user_answer": "",      # What the user typed
         "feedback": None,       # The LLM's grading response
-        "streak": 0             # Fun gamification metric
     }
 # --- FLATTENER LOGIC (Integrated) ---
 class OutlineProcessor:
     """Parses text outlines for the Flattener tool."""
@@ -183,6 +188,35 @@ with st.sidebar:
             )
     st.divider()
     # Model Selector
     st.header("🧠 Intelligence")
@@ -230,18 +264,12 @@ with st.sidebar:
     if st.button("Run Integration Test"):
         with st.spinner("Running diagnostics..."):
-            # Create a buffer to capture the text that would normally be printed
             f = io.StringIO()
-            # Redirect 'print' statements to our buffer instead of the console
             try:
                 with contextlib.redirect_stdout(f):
                     run_tests()
-                # Display the result in a code block for easy reading
                 st.success("Tests Completed")
                 st.code(f.getvalue(), language="text")
             except Exception as e:
                 st.error(f"Test Execution Failed: {e}")
@@ -269,31 +297,32 @@ with tab1:
         # RAG Search
         context_txt = ""
-        # 1. Default System Prompt (No RAG)
         sys_p = "You are a helpful AI assistant."
         if use_rag:
-            with st.spinner("Searching Knowledge Base..."):
-                docs = rag_engine.search_knowledge_base(prompt, st.session_state.username)
-                if docs:
-                    # 2. Strict System Prompt (With RAG)
-                    # We relax the strictness slightly to allow for inference,
-                    # while still demanding evidence.
-                    sys_p = (
-                        "You are a Navy Document Analyst. "
-                        "You must answer the user's question based PRIMARILY on the provided Context. "
-                        "If the Context contains the answer, output it clearly. "
-                        "If the Context does NOT contain the answer, simply state: "
-                        "'I cannot find that specific information in the documents provided.'"
                     )
-                    # 3. XML-Formatted Context Construction
-                    # This helps the model "see" the start and end of each chunk clearly.
-                    for i, d in enumerate(docs):
-                        src = d.metadata.get('source', 'Unknown')
-                        context_txt += f"<document index='{i+1}' source='{src}'>\n{d.page_content}\n</document>\n"
-        # 4. Construct Final User Payload
         if context_txt:
             final_prompt = (
                 f"User Question: {prompt}\n\n"
@@ -306,7 +335,6 @@ with tab1:
         # Generation
         with st.chat_message("assistant"):
             with st.spinner("Thinking..."):
-                # Memory Window
                 hist = [{"role":"system", "content":sys_p}] + st.session_state.messages[-6:-1] + [{"role":"user", "content":final_prompt}]
                 resp, usage = query_model_universal(hist, 2000, model_choice, st.session_state.get("user_openai_key"))
@@ -337,59 +365,57 @@ with tab2:
     if uploaded_file:
         # Save temp
-        temp_path = rag_engine.save_uploaded_file(uploaded_file)
         # ACTION BAR
         col_a, col_b, col_c = st.columns(3)
-        # 1. ADD TO DB (With Strategy Selection)
         with col_a:
             chunk_strategy = st.selectbox(
                 "Chunking Strategy",
-                ["paragraph", "token"], # Removed 'page' as it is not implemented in new engine yet
                 help="Paragraph: Standard. Token: Dense text.",
                 key="chunk_selector"
             )
             if st.button("📥 Add to Knowledge Base", type="primary"):
-                with st.spinner("Ingesting..."):
-                    # Note: New engine uses internal Tesseract OCR, not GPT-4o Vision
-                    # so we don't pass vision flags or keys here anymore.
-                    ok, msg = rag_engine.ingest_file(
-                        file_path=temp_path,
-                        username=st.session_state.username,
-                        strategy=chunk_strategy
-                    )
-                    if ok:
-                        tracker.upload_user_db(st.session_state.username) # Auto-Sync
-                        st.success(msg)
-                    else:
-                        st.error(msg)
         # 2. SUMMARIZE
         with col_b:
-            # Spacer to align buttons visually since col_a has a selectbox
             st.write("")
             st.write("")
             if st.button("📝 Summarize Document"):
                 with st.spinner("Reading & Summarizing..."):
                     key = st.session_state.get("user_openai_key") or OPENAI_KEY
-                    # Extract raw text first
                     class FileObj:
                         def __init__(self, p, n): self.path=p; self.name=n
                         def read(self):
                             with open(self.path, "rb") as f: return f.read()
-                    # Extraction
                     raw = doc_loader.extract_text_from_file(
                         FileObj(temp_path, uploaded_file.name),
                         use_vision=use_vision, api_key=key
                     )
-                    # Call LLM
-                    prompt = f"Summarize this document into a key executive brief:\n\n{raw[:20000]}" # Truncate for safety
                     msgs = [{"role":"user", "content": prompt}]
                     summ, usage = query_model_universal(msgs, 1000, model_choice, st.session_state.get("user_openai_key"))
@@ -402,11 +428,9 @@ with tab2:
         # 3. FLATTEN
         with col_c:
-            # Spacer to align buttons
             st.write("")
             st.write("")
-            # We use a session state variable to store the result so it persists for the "Index" step
             if "flattened_result" not in st.session_state:
                 st.session_state.flattened_result = None
@@ -414,7 +438,6 @@ with tab2:
                 with st.spinner("Flattening..."):
                     key = st.session_state.get("user_openai_key") or OPENAI_KEY
-                    # A. Extract
                     with open(temp_path, "rb") as f:
                         class Wrapper:
                             def __init__(self, data, n): self.data=data; self.name=n
@@ -423,11 +446,9 @@ with tab2:
                             Wrapper(f.read(), uploaded_file.name), use_vision=use_vision, api_key=key
                         )
-                    # B. Parse
                     proc = OutlineProcessor(raw)
                     items = proc.parse()
-                    # C. Flatten
                     out_txt = []
                     bar = st.progress(0)
                     for i, item in enumerate(items):
@@ -437,35 +458,57 @@ with tab2:
                         out_txt.append(res)
                         bar.progress((i+1)/len(items))
-                    # D. Store Result in Session State
                     final_flattened_text = "\n".join(out_txt)
                     st.session_state.flattened_result = {
                         "text": final_flattened_text,
                         "source": f"{uploaded_file.name}_flat"
                     }
-                    st.rerun() # Refresh to show the new result/buttons
-            # Display Result & Index Option
             if st.session_state.flattened_result:
                 res = st.session_state.flattened_result
                 st.success("Flattening Complete!")
                 st.text_area("Result", res["text"], height=200)
-                # The New Button
                 if st.button("📥 Index This Flattened Version"):
-                    with st.spinner("Indexing Flattened Text..."):
-                        ok, msg = rag_engine.process_and_add_text(
-                            res["text"],
-                            res["source"],
-                            st.session_state.username
-                        )
-                        if ok:
-                            tracker.upload_user_db(st.session_state.username) # Sync!
-                            st.success(msg)
-                        else:
-                            st.error(msg)
     st.divider()
 # === TAB 3: QUIZ MODE ===
 with tab3:
@@ -491,7 +534,7 @@ with tab3:
     st.divider()
-    # 2. START BUTTON (Logic branches based on mode)
     if not qs["active"]:
         if st.button("🚀 Generate New Question", type="primary"):
@@ -510,7 +553,6 @@ with tab3:
                 # MODE B: DOCUMENTS
                 else:
-                    # Retry logic for the LLM's "SKIP" response
                     valid_question_found = False
                     attempts = 0
@@ -525,7 +567,6 @@ with tab3:
                                 300, model_choice, st.session_state.get("user_openai_key")
                             )
-                            # If LLM liked the chunk, it gave us a question. If not, it said "SKIP".
                             if "SKIP" not in question_text and len(question_text) > 10:
                                 valid_question_found = True
                                 qs["active"] = True
@@ -541,7 +582,6 @@ with tab3:
     if qs["active"]:
         st.markdown(f"### {qs['generated_question_text']}")
-        # Hints for Doc Mode
         if "document" in qs.get("question_data", {}).get("type", ""):
             st.caption(f"Source: *{qs['question_data']['source_file']}*")
@@ -553,7 +593,6 @@ with tab3:
             with st.spinner("Grading..."):
                 data = qs["question_data"]
-                # BRANCH GRADING LOGIC
                 if data["type"] == "acronym":
                     prompt = quiz.construct_acronym_grading_prompt(
                         data["term"], data["correct_definition"], user_ans
@@ -563,7 +602,6 @@ with tab3:
                         qs["generated_question_text"], user_ans, data["context_text"]
                     )
-                # Get Grade
                 msgs = [{"role": "user", "content": prompt}]
                 grade, _ = query_model_universal(
                     msgs, 500, model_choice, st.session_state.get("user_openai_key")
@@ -571,7 +609,6 @@ with tab3:
                 qs["feedback"] = grade
-                # Streak Logic
                 if "GRADE:** PASS" in grade or "GRADE:** Pass" in grade:
                     qs["streak"] += 1
                 elif "GRADE:** FAIL" in grade:
@@ -579,53 +616,29 @@ with tab3:
                 st.rerun()
-    # 4. FEEDBACK AREA
     if qs["feedback"]:
         if "PASS" in qs["feedback"]:
             st.success("✅ CORRECT")
         else:
-            st.warning("⚠️ NEEDS IMPROVEMENT")
         st.markdown(qs["feedback"])
-        # For documents, show the source text so you can learn
-        if qs["question_data"]["type"] == "document":
             with st.expander("Show Source Text (Answer Key)"):
-                st.info(qs["question_data"]["context_text"])
         if st.button("Next Question ➡️"):
             qs["active"] = False
             qs["question_data"] = None
             qs["feedback"] = None
-            st.rerun()
-    # 4. FEEDBACK DISPLAY
-    if qs["feedback"]:
-        st.divider()
-        if "PASS" in qs["feedback"]:
-            st.success("✅ CORRECT")
-        else:
-            st.error("❌ INCORRECT")
-        st.markdown(qs["feedback"])
-        st.info(f"**Official Definition:** {qs['question_data']['correct_definition']}")
-        if st.button("Next Question ➡️"):
-            qs["active"] = False
-            qs["question_data"] = None
-            qs["feedback"] = None
-            st.rerun()
-    # DB MANAGER
-    st.subheader("Database Management")
-    docs = rag_engine.list_documents(st.session_state.username)
-    if docs:
-        for d in docs:
-            c1, c2 = st.columns([4,1])
-            c1.text(f"📄 {d['filename']} ({d['chunks']} chunks)")
-            if c2.button("🗑️", key=d['source']):
-                rag_engine.delete_document(st.session_state.username, d['source'])
-                tracker.upload_user_db(st.session_state.username)
-                st.rerun()
-    else:
-        st.info("Database Empty.")

 from datetime import datetime
 from test_integration import run_tests
 from core.QuizEngine import QuizEngine
+from core.PineconeManager import PineconeManager # FIXED: Added missing import
 # --- CONFIGURATION ---
 st.set_page_config(page_title="Navy AI Toolkit", page_icon="⚓", layout="wide")
 if "quiz_state" not in st.session_state:
     st.session_state.quiz_state = {
         "active": False,        # Is a question currently displayed?
+        "question_data": None,  # The current acronym/doc object
         "user_answer": "",      # What the user typed
         "feedback": None,       # The LLM's grading response
+        "streak": 0,            # Fun gamification metric
+        "generated_question_text": ""
     }
+if "active_index" not in st.session_state:
+    st.session_state.active_index = None
 # --- FLATTENER LOGIC (Integrated) ---
 class OutlineProcessor:
     """Parses text outlines for the Flattener tool."""
             )
     st.divider()
+    st.header("🌲 Pinecone Settings")
+    # Initialize Manager
+    pc_key = os.getenv("PINECONE_API_KEY")
+    if pc_key:
+        pm = PineconeManager(pc_key)
+        indexes = pm.list_indexes()
+        # 1. INDEX SELECTOR
+        selected_index = st.selectbox("Active Index", indexes)
+        st.session_state.active_index = selected_index
+        # 2. SAFETY CHECK VISUAL
+        if selected_index:
+            is_compatible = pm.check_dimension_compatibility(selected_index, 384)
+            if is_compatible:
+                st.caption("✅ Dimensions Match (384)")
+            else:
+                st.error("❌ Dimension Mismatch! Do not use.")
+        # 3. CREATE NEW INDEX
+        with st.expander("Create New Index"):
+            new_idx_name = st.text_input("Index Name")
+            if st.button("Create"):
+                ok, msg = pm.create_index(new_idx_name)
+                if ok: st.success(msg); st.rerun()
+                else: st.error(msg)
+    else:
+        st.warning("No Pinecone Key Found")
     # Model Selector
     st.header("🧠 Intelligence")
     if st.button("Run Integration Test"):
         with st.spinner("Running diagnostics..."):
             f = io.StringIO()
             try:
                 with contextlib.redirect_stdout(f):
                     run_tests()
                 st.success("Tests Completed")
                 st.code(f.getvalue(), language="text")
             except Exception as e:
                 st.error(f"Test Execution Failed: {e}")
         # RAG Search
         context_txt = ""
         sys_p = "You are a helpful AI assistant."
         if use_rag:
+            if not st.session_state.active_index:
+                st.error("⚠️ Please select an Active Index in the sidebar first.")
+            else:
+                with st.spinner("Searching Knowledge Base..."):
+                    # FIXED: Added index_name parameter
+                    docs = rag_engine.search_knowledge_base(
+                        query=prompt,
+                        username=st.session_state.username,
+                        index_name=st.session_state.active_index
                     )
+                    if docs:
+                        sys_p = (
+                            "You are a Navy Document Analyst. "
+                            "You must answer the user's question based PRIMARILY on the provided Context. "
+                            "If the Context contains the answer, output it clearly. "
+                            "If the Context does NOT contain the answer, simply state: "
+                            "'I cannot find that specific information in the documents provided.'"
+                        )
+                        for i, d in enumerate(docs):
+                            src = d.metadata.get('source', 'Unknown')
+                            context_txt += f"<document index='{i+1}' source='{src}'>\n{d.page_content}\n</document>\n"
+        # Construct Payload
         if context_txt:
             final_prompt = (
                 f"User Question: {prompt}\n\n"
         # Generation
         with st.chat_message("assistant"):
             with st.spinner("Thinking..."):
                 hist = [{"role":"system", "content":sys_p}] + st.session_state.messages[-6:-1] + [{"role":"user", "content":final_prompt}]
                 resp, usage = query_model_universal(hist, 2000, model_choice, st.session_state.get("user_openai_key"))
     if uploaded_file:
         # Save temp
+        temp_path = rag_engine.save_uploaded_file(uploaded_file, st.session_state.username)
         # ACTION BAR
         col_a, col_b, col_c = st.columns(3)
+        # 1. ADD TO DB
         with col_a:
             chunk_strategy = st.selectbox(
                 "Chunking Strategy",
+                ["paragraph", "token"],
                 help="Paragraph: Standard. Token: Dense text.",
                 key="chunk_selector"
             )
             if st.button("📥 Add to Knowledge Base", type="primary"):
+                if not st.session_state.active_index:
+                    st.error("Please select an Active Index in the sidebar.")
+                else:
+                    with st.spinner("Ingesting..."):
+                        # FIXED: Added index_name parameter
+                        ok, msg = rag_engine.ingest_file(
+                            file_path=temp_path,
+                            username=st.session_state.username,
+                            index_name=st.session_state.active_index,
+                            strategy=chunk_strategy
+                        )
+                        if ok:
+                            tracker.upload_user_db(st.session_state.username) # Auto-Sync
+                            st.success(msg)
+                        else:
+                            st.error(msg)
         # 2. SUMMARIZE
         with col_b:
             st.write("")
             st.write("")
             if st.button("📝 Summarize Document"):
                 with st.spinner("Reading & Summarizing..."):
                     key = st.session_state.get("user_openai_key") or OPENAI_KEY
                     class FileObj:
                         def __init__(self, p, n): self.path=p; self.name=n
                         def read(self):
                             with open(self.path, "rb") as f: return f.read()
                     raw = doc_loader.extract_text_from_file(
                         FileObj(temp_path, uploaded_file.name),
                         use_vision=use_vision, api_key=key
                     )
+                    prompt = f"Summarize this document into a key executive brief:\n\n{raw[:20000]}"
                     msgs = [{"role":"user", "content": prompt}]
                     summ, usage = query_model_universal(msgs, 1000, model_choice, st.session_state.get("user_openai_key"))
         # 3. FLATTEN
         with col_c:
             st.write("")
             st.write("")
             if "flattened_result" not in st.session_state:
                 st.session_state.flattened_result = None
                 with st.spinner("Flattening..."):
                     key = st.session_state.get("user_openai_key") or OPENAI_KEY
                     with open(temp_path, "rb") as f:
                         class Wrapper:
                             def __init__(self, data, n): self.data=data; self.name=n
                             Wrapper(f.read(), uploaded_file.name), use_vision=use_vision, api_key=key
                         )
                     proc = OutlineProcessor(raw)
                     items = proc.parse()
                     out_txt = []
                     bar = st.progress(0)
                     for i, item in enumerate(items):
                         out_txt.append(res)
                         bar.progress((i+1)/len(items))
                     final_flattened_text = "\n".join(out_txt)
                     st.session_state.flattened_result = {
                         "text": final_flattened_text,
                         "source": f"{uploaded_file.name}_flat"
                     }
+                    st.rerun()
             if st.session_state.flattened_result:
                 res = st.session_state.flattened_result
                 st.success("Flattening Complete!")
                 st.text_area("Result", res["text"], height=200)
                 if st.button("📥 Index This Flattened Version"):
+                    if not st.session_state.active_index:
+                        st.error("Please select an Active Index in the sidebar.")
+                    else:
+                        with st.spinner("Indexing Flattened Text..."):
+                            # FIXED: Added index_name parameter
+                            ok, msg = rag_engine.process_and_add_text(
+                                text=res["text"],
+                                source_name=res["source"],
+                                username=st.session_state.username,
+                                index_name=st.session_state.active_index
+                            )
+                            if ok:
+                                tracker.upload_user_db(st.session_state.username)
+                                st.success(msg)
+                            else:
+                                st.error(msg)
     st.divider()
+    # DB MANAGER
+    st.subheader("Database Management")
+    # This reads from local cache so no index needed
+    docs = rag_engine.list_documents(st.session_state.username)
+    if docs:
+        for d in docs:
+            c1, c2 = st.columns([4,1])
+            c1.text(f"📄 {d['filename']} (Cached)")
+            if c2.button("🗑️", key=d['source']):
+                if not st.session_state.active_index:
+                    st.error("Select Index first.")
+                else:
+                    # FIXED: Added index_name parameter
+                    rag_engine.delete_document(st.session_state.username, d['source'], st.session_state.active_index)
+                    tracker.upload_user_db(st.session_state.username)
+                    st.rerun()
+    else:
+        st.info("Database Empty (No cached files found).")
 # === TAB 3: QUIZ MODE ===
 with tab3:
     st.divider()
+    # 2. START BUTTON
     if not qs["active"]:
         if st.button("🚀 Generate New Question", type="primary"):
                 # MODE B: DOCUMENTS
                 else:
                     valid_question_found = False
                     attempts = 0
                                 300, model_choice, st.session_state.get("user_openai_key")
                             )
                             if "SKIP" not in question_text and len(question_text) > 10:
                                 valid_question_found = True
                                 qs["active"] = True
     if qs["active"]:
         st.markdown(f"### {qs['generated_question_text']}")
         if "document" in qs.get("question_data", {}).get("type", ""):
             st.caption(f"Source: *{qs['question_data']['source_file']}*")
             with st.spinner("Grading..."):
                 data = qs["question_data"]
                 if data["type"] == "acronym":
                     prompt = quiz.construct_acronym_grading_prompt(
                         data["term"], data["correct_definition"], user_ans
                         qs["generated_question_text"], user_ans, data["context_text"]
                     )
                 msgs = [{"role": "user", "content": prompt}]
                 grade, _ = query_model_universal(
                     msgs, 500, model_choice, st.session_state.get("user_openai_key")
                 qs["feedback"] = grade
                 if "GRADE:** PASS" in grade or "GRADE:** Pass" in grade:
                     qs["streak"] += 1
                 elif "GRADE:** FAIL" in grade:
                 st.rerun()
+    # 4. FEEDBACK AREA (MERGED & FIXED)
     if qs["feedback"]:
+        st.divider()
         if "PASS" in qs["feedback"]:
             st.success("✅ CORRECT")
         else:
+            if "FAIL" in qs["feedback"]:
+                st.error("❌ INCORRECT")
+            else:
+                st.warning("⚠️ PARTIAL / COMMENTARY")
         st.markdown(qs["feedback"])
+        # Display Correct Answer based on type
+        data = qs["question_data"]
+        if data["type"] == "acronym":
+             st.info(f"**Official Definition:** {data['correct_definition']}")
+        elif data["type"] == "document":
             with st.expander("Show Source Text (Answer Key)"):
+                st.info(data["context_text"])
         if st.button("Next Question ➡️"):
             qs["active"] = False
             qs["question_data"] = None
             qs["feedback"] = None
+            st.rerun()