Spaces:

JustTheStatsHuman
/

Togmal-demo

Configuration error

HeTalksInMaths commited on 21 days ago

Commit

4663c58

1 Parent(s): 29ce16b

Add progressive database expansion feature

- Initial build: 5K questions (~3-5 min, fast first launch)
- Expand button: Add 5K more on demand (~2-3 min per click)
- Users can expand to full 12K questions progressively
- Database stats show current size and remaining questions
- Perfect UX: quick start + optional full expansion

Files changed (1) hide show

app.py +116 -13

app.py CHANGED Viewed

@@ -95,21 +95,11 @@ else:
     logger.info(f"✓ Loaded existing database with {current_count:,} questions")
 def analyze_prompt(prompt: str, k: int = 5) -> str:
-    """
-    Analyze a prompt and return difficulty assessment.
-    Args:
-        prompt: The user's prompt/question
-        k: Number of similar questions to retrieve
-    Returns:
-        Formatted analysis results
-    """
     if not prompt.strip():
         return "Please enter a prompt to analyze."
     try:
-        # Query the vector database
         result = db.query_similar_questions(prompt, k=k)
         # Format results
@@ -130,7 +120,6 @@ def analyze_prompt(prompt: str, k: int = 5) -> str:
             output.append(f"   - Similarity: {q['similarity']:.3f}")
             output.append("")
-        # Get current database size
         total_questions = db.collection.count()
         output.append(f"*Analyzed using {k} most similar questions from {total_questions:,} benchmark questions*")
@@ -139,11 +128,113 @@ def analyze_prompt(prompt: str, k: int = 5) -> str:
     except Exception as e:
         return f"Error analyzing prompt: {str(e)}"
 # Create Gradio interface
 with gr.Blocks(title="ToGMAL Prompt Difficulty Analyzer") as demo:
     gr.Markdown("# 🧠 ToGMAL Prompt Difficulty Analyzer")
     gr.Markdown("Enter any prompt to see how difficult it is for current LLMs based on real benchmark data.")
     with gr.Row():
         with gr.Column():
             prompt_input = gr.Textbox(
@@ -158,7 +249,7 @@ with gr.Blocks(title="ToGMAL Prompt Difficulty Analyzer") as demo:
                 step=1,
                 label="Number of similar questions to show"
             )
-            submit_btn = gr.Button("Analyze Difficulty")
         with gr.Column():
             result_output = gr.Markdown(label="Analysis Results")
@@ -189,6 +280,18 @@ with gr.Blocks(title="ToGMAL Prompt Difficulty Analyzer") as demo:
         inputs=[prompt_input, k_slider],
         outputs=result_output
     )
 if __name__ == "__main__":
     demo.launch(share=True, server_port=7861)

     logger.info(f"✓ Loaded existing database with {current_count:,} questions")
 def analyze_prompt(prompt: str, k: int = 5) -> str:
+    """Analyze a prompt and return difficulty assessment."""
     if not prompt.strip():
         return "Please enter a prompt to analyze."
     try:
         result = db.query_similar_questions(prompt, k=k)
         # Format results
             output.append(f"   - Similarity: {q['similarity']:.3f}")
             output.append("")
         total_questions = db.collection.count()
         output.append(f"*Analyzed using {k} most similar questions from {total_questions:,} benchmark questions*")
     except Exception as e:
         return f"Error analyzing prompt: {str(e)}"
+def expand_database(batch_size: int = 5000) -> str:
+    """Expand the database by adding another batch of questions."""
+    try:
+        from datasets import load_dataset
+        from benchmark_vector_db import BenchmarkQuestion
+        import random
+        current_count = db.collection.count()
+        # Load full MMLU-Pro test dataset
+        logger.info("Loading MMLU-Pro test dataset...")
+        test_dataset = load_dataset("TIGER-Lab/MMLU-Pro", split="test")
+        total_available = len(test_dataset)
+        # Figure out which questions we haven't indexed yet
+        # We'll use a simple offset approach
+        already_indexed = current_count
+        remaining = total_available - already_indexed
+        if remaining <= 0:
+            return f"✅ Database is complete! All {total_available:,} questions indexed."
+        # Sample next batch
+        start_idx = already_indexed
+        end_idx = min(start_idx + batch_size, total_available)
+        batch_questions = []
+        logger.info(f"Expanding database: adding questions {start_idx} to {end_idx}...")
+        for idx in range(start_idx, end_idx):
+            item = test_dataset[idx]
+            question = BenchmarkQuestion(
+                question_id=f"mmlu_pro_test_{idx}",
+                source_benchmark="MMLU_Pro",
+                domain=item.get('category', 'unknown').lower(),
+                question_text=item['question'],
+                correct_answer=item['answer'],
+                choices=item.get('options', []),
+                success_rate=0.45,
+                difficulty_score=0.55,
+                difficulty_label="Hard",
+                num_models_tested=0
+            )
+            batch_questions.append(question)
+        # Index the batch
+        logger.info(f"Indexing {len(batch_questions)} new questions...")
+        db.index_questions(batch_questions)
+        new_count = db.collection.count()
+        still_remaining = total_available - new_count
+        result = f"✅ Successfully added {len(batch_questions)} questions!\n\n"
+        result += f"**Database Stats:**\n"
+        result += f"- Total Questions: {new_count:,}\n"
+        result += f"- Just Added: {len(batch_questions)}\n"
+        result += f"- Remaining: {still_remaining:,}\n\n"
+        if still_remaining > 0:
+            result += f"Click 'Expand Database' again to add {min(batch_size, still_remaining)} more questions."
+        else:
+            result += f"🎉 Database is now complete with all {total_available:,} questions!"
+        return result
+    except Exception as e:
+        logger.error(f"Expansion failed: {e}")
+        return f"❌ Error expanding database: {str(e)}"
+def get_database_info() -> str:
+    """Get current database statistics."""
+    try:
+        current_count = db.collection.count()
+        # Estimate total available (MMLU-Pro test has ~12K)
+        total_available = 12032
+        remaining = total_available - current_count
+        info = f"### 📊 Database Status\n\n"
+        info += f"**Current Size:** {current_count:,} questions\n"
+        info += f"**Available:** {total_available:,} questions\n"
+        info += f"**Remaining:** {max(0, remaining):,} questions\n\n"
+        if remaining > 0:
+            info += f"💡 Click 'Expand Database' to add 5,000 more questions (takes ~2-3 min)"
+        else:
+            info += f"✅ Database is complete!"
+        return info
+    except Exception as e:
+        return f"Error getting database info: {str(e)}"
 # Create Gradio interface
 with gr.Blocks(title="ToGMAL Prompt Difficulty Analyzer") as demo:
     gr.Markdown("# 🧠 ToGMAL Prompt Difficulty Analyzer")
     gr.Markdown("Enter any prompt to see how difficult it is for current LLMs based on real benchmark data.")
+    # Database expansion section
+    with gr.Accordion("📊 Database Management", open=False):
+        db_info = gr.Markdown(get_database_info())
+        with gr.Row():
+            expand_btn = gr.Button("🚀 Expand Database (+5K questions)", variant="secondary")
+            refresh_btn = gr.Button("🔄 Refresh Stats", variant="secondary")
+        expand_output = gr.Markdown()
     with gr.Row():
         with gr.Column():
             prompt_input = gr.Textbox(
                 step=1,
                 label="Number of similar questions to show"
             )
+            submit_btn = gr.Button("Analyze Difficulty", variant="primary")
         with gr.Column():
             result_output = gr.Markdown(label="Analysis Results")
         inputs=[prompt_input, k_slider],
         outputs=result_output
     )
+    expand_btn.click(
+        fn=expand_database,
+        inputs=[],
+        outputs=expand_output
+    )
+    refresh_btn.click(
+        fn=get_database_info,
+        inputs=[],
+        outputs=db_info
+    )
 if __name__ == "__main__":
     demo.launch(share=True, server_port=7861)