Spaces:

mayzinoo
/

Geometry_Lesson

Sleeping

App Files Files Community

mayzinoo commited on Jul 25, 2025

Commit

72a3c35

verified ·

1 Parent(s): 668e43c

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -142

app.py CHANGED Viewed

@@ -1,145 +1,92 @@
 import os
 import re
-import zipfile
-import gradio as gr
-from langchain_openai import ChatOpenAI
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain_chroma import Chroma
-from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
-from langchain.chains import LLMChain
-# Unzip vector DB if not already extracted
-if not os.path.exists("geometry_chroma"):
-    with zipfile.ZipFile("geometry_chroma.zip", 'r') as zip_ref:
-        zip_ref.extractall(".")
-# Load vector DB
-embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-vectordb = Chroma(persist_directory="geometry_chroma", embedding_function=embedding_model)
-retriever = vectordb.as_retriever()
-# Set OpenAI key (use Secrets or .env later)
-os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
-llm = ChatOpenAI(model_name="gpt-4.1", temperature=0.2)
-# ✅ Prompt templates
-templates = {
-    "flashcard": PromptTemplate(
-        input_variables=["context", "query"],
-        template="""
-{context}
-Create 5 flashcards based on the topic: "{query}"
-Each flashcard should include:
-- A clear question
-- A short answer
-Focus on high school geometry understanding.
-"""
-    ),
-    "lesson plan": PromptTemplate(
-        input_variables=["context", "query"],
-        template="""
-Given the following retrieved SOL text:
-{context}
-Generate a Geometry lesson plan based on: "{query}"
-Include:
-1. Simple explanation of the concept.
-2. Real-world example.
-3. Engaging class activity.
-Be concise and curriculum-aligned for high school.
-"""
-    ),
-    "worksheet": PromptTemplate(
-        input_variables=["context", "query"],
-        template="""
-{context}
-Create a student worksheet for: "{query}"
-Include:
-- Concept summary
-- A worked example
-- 3 practice problems
-"""
-    ),
-    "proofs": PromptTemplate(
-        input_variables=["context", "query"],
-        template="""
-{context}
-Generate a proof-focused geometry lesson plan for: "{query}"
-Include:
-- Student-friendly explanation
-- Real-world connection
-- One short class activity
-"""
-    ),
-   "general question": ChatPromptTemplate.from_messages([
-        HumanMessagePromptTemplate.from_template(
-            """
-You are a Virginia Geometry SOL assistant.
-From the following SOL context:
-{context}
-Identify the SOL standard (e.g., G.RLT.1) that best matches this query: "{query}"
-Respond with:
-1. The exact SOL code (e.g., G.RLT.1)
-2. The exact description line from the SOL guide
-Do not summarize. Only copy from the context.
-"""
-        )
-    ])
-}
-def generate_prompt_output(prompt_type, query, retriever, llm):
-    try:
-        import re
-        sol_match = re.search(r"\bG\.[A-Z]+\.\d+\b", query)
-        matched_code = sol_match.group(0) if sol_match else None
-        if matched_code:
-            all_docs = retriever.vectorstore._collection.get(include=['documents', 'metadatas'])
-            filtered = []
-            for doc_text, metadata in zip(all_docs['documents'], all_docs['metadatas']):
-                if metadata.get('standard') == matched_code:
-                    filtered.append(doc_text)
-            context = "\n\n".join(filtered)
-        else:
-            docs = retriever.get_relevant_documents(query)
-            context = "\n\n".join([doc.page_content for doc in docs])
-        chain = LLMChain(llm=llm, prompt=templates[prompt_type])
-        return chain.run({"context": context, "query": query}).strip()
-    except Exception as e:
-        return f"❌ Error: {str(e)}"
-# ✅ Gradio UI
-with gr.Blocks() as demo:
-    gr.Markdown("# 📐 Geometry Teaching Assistant")
-    with gr.Row():
-        query = gr.Textbox(label="Enter a geometry topic")
-        prompt_type = gr.Dropdown(
-            ["general question", "lesson plan", "worksheet", "proofs", "flashcard"],
-            value="general question",
-            label="Prompt Type"
-        )
-    output = gr.Textbox(label="Generated Output", lines=12, interactive=True)
-    btn = gr.Button("Generate")
-    btn.click(fn=generate_prompt_output, inputs=[prompt_type, query], outputs=output)
-demo.launch()

+# app.py
+import gradio as gr
 import os
+from transformers import pipeline
+from sentence_transformers import SentenceTransformer
+import faiss
+import numpy as np
+import json
 import re
+# --- Load necessary components for the RAG system ---
+# These paths are relative to the Space's root directory
+FAISS_INDEX_PATH = "sol_faiss_index.bin"
+DOCUMENT_IDS_PATH = "sol_document_ids.json"
+# Load SentenceTransformer model
+# Ensure this model is downloaded or available in the environment
+# For Spaces, you might need to add it to requirements.txt or directly download if space has internet
+# It's better to declare it globally or as a shared resource.
+try:
+    model = SentenceTransformer('all-mpnet-base-v2')
+except Exception as e:
+    print(f"Error loading SentenceTransformer model: {e}")
+    print("Attempting to load from local cache or download on first use.")
+    # If running in a Space, the model will be downloaded to cache if not present.
+    # Ensure you have internet access in your Space settings.
+# Load FAISS index
+try:
+    index = faiss.read_index(FAISS_INDEX_PATH)
+except Exception as e:
+    print(f"Error loading FAISS index: {e}")
+    # Handle error, maybe create a dummy index or exit
+    index = None # Placeholder if loading fails
+# Load document IDs
+try:
+    with open(DOCUMENT_IDS_PATH, "r") as f:
+        document_ids = json.load(f)
+except Exception as e:
+    print(f"Error loading document IDs: {e}")
+    document_ids = [] # Placeholder if loading fails
+# Placeholder for the actual content of "10 Geometry Mathematics Instructional Guide.pdf"
+# In a real deployed scenario, this content would be loaded from a file
+# that you upload to your Hugging Face Space or fetched at runtime.
+# For now, we'll assume it's available or that 'documents' are pre-processed and loaded.
+# You would typically load the 'documents' list created in Step 2 here.
+# For deployment, it's best to save the `documents` list (sol_data) as a JSON
+# and load it back. Let's add that.
+# Assuming you've saved sol_data as 'sol_documents.json'
+SOL_DOCUMENTS_PATH = "sol_documents.json"
+try:
+    with open(SOL_DOCUMENTS_PATH, "r") as f:
+        documents = json.load(f)
+except Exception as e:
+    print(f"Error loading sol documents: {e}")
+    documents = [] # Placeholder
+# Load LLM for generation
+# For a Hugging Face Space, you need to ensure the model is available.
+# 'google/gemma-2b-it' is a good option.
+# Ensure you set up environment variables or secrets for API keys if using paid models.
+try:
+    llm_pipeline = pipeline("text-generation", model="google/gemma-2b-it")
+except Exception as e:
+    print(f"Error loading LLM pipeline: {e}")
+    llm_pipeline = None # Placeholder
+def retrieve_and_generate_app(query, top_k=3):
+    if not model or not index or not document_ids or not documents or not llm_pipeline:
+        return "System not fully initialized. Please check logs for missing components."
+    # 1. Query Embedding
+    query_embedding = model.encode([query])
+    # 2. Retrieval using FAISS
+    D, I = index.search(query_embedding, top_k)
+    retrieved_docs = []
+    for i in I[0]:
+        sol_id = document_ids[i]
+        # Find the full content of the retrieved SOL
+        # This relies on the 'documents' list being correctly loaded and matching by ID
+        retrieved_content = next((doc["content"] for doc in documents if doc["id"] == sol_id), "Content not found.")
+        retrieved_docs.append({"id": sol_id, "content": retrieved_content})
+    context = "\n\n".join([f"SOL {doc['id']}: {doc['content']}" for doc in retrieved_docs])
+    prompt = f"""